From cb79a180f2e7eb51de5a4848652893197637bccb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 1 Nov 2017 20:30:49 +0100 Subject: [PATCH 01/31] xfrm: defer daddr pointer assignment after spi parsing syzbot reports: BUG: KASAN: use-after-free in __xfrm_state_lookup+0x695/0x6b0 Read of size 4 at addr ffff8801d434e538 by task syzkaller647520/2991 [..] __xfrm_state_lookup+0x695/0x6b0 net/xfrm/xfrm_state.c:833 xfrm_state_lookup+0x8a/0x160 net/xfrm/xfrm_state.c:1592 xfrm_input+0x8e5/0x22f0 net/xfrm/xfrm_input.c:302 The use-after-free is the ipv4 destination address, which points to an skb head area that has been reallocated: pskb_expand_head+0x36b/0x1210 net/core/skbuff.c:1494 __pskb_pull_tail+0x14a/0x17c0 net/core/skbuff.c:1877 pskb_may_pull include/linux/skbuff.h:2102 [inline] xfrm_parse_spi+0x3d3/0x4d0 net/xfrm/xfrm_input.c:170 xfrm_input+0xce2/0x22f0 net/xfrm/xfrm_input.c:291 so the real bug is that xfrm_parse_spi() uses pskb_may_pull, but for now do smaller workaround that makes xfrm_input fetch daddr after spi parsing. Reported-by: syzbot Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 8ac9d32fb79d..1c6051cb7733 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -265,8 +265,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto lock; } - daddr = (xfrm_address_t *)(skb_network_header(skb) + - XFRM_SPI_SKB_CB(skb)->daddroff); family = XFRM_SPI_SKB_CB(skb)->family; /* if tunnel is present override skb->mark value with tunnel i_key */ @@ -293,6 +291,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto drop; } + daddr = (xfrm_address_t *)(skb_network_header(skb) + + XFRM_SPI_SKB_CB(skb)->daddroff); do { if (skb->sp->len == XFRM_MAX_DEPTH) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); From cf37966751747727629fe51fd4a1d4edd8457c60 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 2 Nov 2017 16:46:01 +0100 Subject: [PATCH 02/31] xfrm: do unconditional template resolution before pcpu cache check Stephen Smalley says: Since 4.14-rc1, the selinux-testsuite has been encountering sporadic failures during testing of labeled IPSEC. git bisect pointed to commit ec30d ("xfrm: add xdst pcpu cache"). The xdst pcpu cache is only checking that the policies are the same, but does not validate that the policy, state, and flow match with respect to security context labeling. As a result, the wrong SA could be used and the receiver could end up performing permission checking and providing SO_PEERSEC or SCM_SECURITY values for the wrong security context. This fix makes it so that we always do the template resolution, and then checks that the found states match those in the pcpu bundle. This has the disadvantage of doing a bit more work (lookup in state hash table) if we can reuse the xdst entry (we only avoid xdst alloc/free) but we don't add a lot of extra work in case we can't reuse. xfrm_pol_dead() check is removed, reasoning is that xfrm_tmpl_resolve does all needed checks. Cc: Paul Moore Fixes: ec30d78c14a813db39a647b6a348b428 ("xfrm: add xdst pcpu cache") Reported-by: Stephen Smalley Tested-by: Stephen Smalley Signed-off-by: Florian Westphal Acked-by: Paul Moore Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 50 +++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 8cafb3c0a4ac..a2e531bf4f97 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1787,19 +1787,23 @@ void xfrm_policy_cache_flush(void) put_online_cpus(); } -static bool xfrm_pol_dead(struct xfrm_dst *xdst) +static bool xfrm_xdst_can_reuse(struct xfrm_dst *xdst, + struct xfrm_state * const xfrm[], + int num) { - unsigned int num_pols = xdst->num_pols; - unsigned int pol_dead = 0, i; + const struct dst_entry *dst = &xdst->u.dst; + int i; - for (i = 0; i < num_pols; i++) - pol_dead |= xdst->pols[i]->walk.dead; + if (xdst->num_xfrms != num) + return false; - /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */ - if (pol_dead) - xdst->u.dst.obsolete = DST_OBSOLETE_DEAD; + for (i = 0; i < num; i++) { + if (!dst || dst->xfrm != xfrm[i]) + return false; + dst = dst->child; + } - return pol_dead; + return xfrm_bundle_ok(xdst); } static struct xfrm_dst * @@ -1813,19 +1817,6 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, struct dst_entry *dst; int err; - xdst = this_cpu_read(xfrm_last_dst); - if (xdst && - xdst->u.dst.dev == dst_orig->dev && - xdst->num_pols == num_pols && - !xfrm_pol_dead(xdst) && - memcmp(xdst->pols, pols, - sizeof(struct xfrm_policy *) * num_pols) == 0 && - xfrm_bundle_ok(xdst)) { - dst_hold(&xdst->u.dst); - return xdst; - } - - old = xdst; /* Try to instantiate a bundle */ err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); if (err <= 0) { @@ -1834,6 +1825,21 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, return ERR_PTR(err); } + xdst = this_cpu_read(xfrm_last_dst); + if (xdst && + xdst->u.dst.dev == dst_orig->dev && + xdst->num_pols == num_pols && + memcmp(xdst->pols, pols, + sizeof(struct xfrm_policy *) * num_pols) == 0 && + xfrm_xdst_can_reuse(xdst, xfrm, err)) { + dst_hold(&xdst->u.dst); + while (err > 0) + xfrm_state_put(xfrm[--err]); + return xdst; + } + + old = xdst; + dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig); if (IS_ERR(dst)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR); From c9f3f813d462c72dbe412cee6a5cbacf13c4ad5e Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Thu, 2 Nov 2017 08:10:17 +0100 Subject: [PATCH 03/31] xfrm: Fix stack-out-of-bounds read in xfrm_state_find. When we do tunnel or beet mode, we pass saddr and daddr from the template to xfrm_state_find(), this is ok. On transport mode, we pass the addresses from the flowi, assuming that the IP addresses (and address family) don't change during transformation. This assumption is wrong in the IPv4 mapped IPv6 case, packet is IPv4 and template is IPv6. Fix this by using the addresses from the template unconditionally. Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index a2e531bf4f97..6eb228a70131 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1361,36 +1361,29 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, struct net *net = xp_net(policy); int nx; int i, error; - xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); - xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); xfrm_address_t tmp; for (nx = 0, i = 0; i < policy->xfrm_nr; i++) { struct xfrm_state *x; - xfrm_address_t *remote = daddr; - xfrm_address_t *local = saddr; + xfrm_address_t *local; + xfrm_address_t *remote; struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; - if (tmpl->mode == XFRM_MODE_TUNNEL || - tmpl->mode == XFRM_MODE_BEET) { - remote = &tmpl->id.daddr; - local = &tmpl->saddr; - if (xfrm_addr_any(local, tmpl->encap_family)) { - error = xfrm_get_saddr(net, fl->flowi_oif, - &tmp, remote, - tmpl->encap_family, 0); - if (error) - goto fail; - local = &tmp; - } + remote = &tmpl->id.daddr; + local = &tmpl->saddr; + if (xfrm_addr_any(local, tmpl->encap_family)) { + error = xfrm_get_saddr(net, fl->flowi_oif, + &tmp, remote, + tmpl->encap_family, 0); + if (error) + goto fail; + local = &tmp; } x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family); if (x && x->km.state == XFRM_STATE_VALID) { xfrm[nx++] = x; - daddr = remote; - saddr = local; continue; } if (x) { From 24de79e5008a928beb2c7ccc2396f15065613363 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Thu, 2 Nov 2017 19:26:22 +0530 Subject: [PATCH 04/31] cxgb4: update latest firmware version supported Change t4fw_version.h to update latest firmware version number to 1.16.63.0. Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h index f2d623a7aee0..123e2c1b65f5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h @@ -37,7 +37,7 @@ #define T4FW_VERSION_MAJOR 0x01 #define T4FW_VERSION_MINOR 0x10 -#define T4FW_VERSION_MICRO 0x2D +#define T4FW_VERSION_MICRO 0x3F #define T4FW_VERSION_BUILD 0x00 #define T4FW_MIN_VERSION_MAJOR 0x01 @@ -46,7 +46,7 @@ #define T5FW_VERSION_MAJOR 0x01 #define T5FW_VERSION_MINOR 0x10 -#define T5FW_VERSION_MICRO 0x2D +#define T5FW_VERSION_MICRO 0x3F #define T5FW_VERSION_BUILD 0x00 #define T5FW_MIN_VERSION_MAJOR 0x00 @@ -55,7 +55,7 @@ #define T6FW_VERSION_MAJOR 0x01 #define T6FW_VERSION_MINOR 0x10 -#define T6FW_VERSION_MICRO 0x2D +#define T6FW_VERSION_MICRO 0x3F #define T6FW_VERSION_BUILD 0x00 #define T6FW_MIN_VERSION_MAJOR 0x00 From 2b5ec1a5f9738ee7bf8f5ec0526e75e00362c48f Mon Sep 17 00:00:00 2001 From: Ye Yin Date: Thu, 26 Oct 2017 16:57:05 +0800 Subject: [PATCH 05/31] netfilter/ipvs: clear ipvs_property flag when SKB net namespace changed When run ipvs in two different network namespace at the same host, and one ipvs transport network traffic to the other network namespace ipvs. 'ipvs_property' flag will make the second ipvs take no effect. So we should clear 'ipvs_property' when SKB network namespace changed. Fixes: 621e84d6f373 ("dev: introduce skb_scrub_packet()") Signed-off-by: Ye Yin Signed-off-by: Wei Zhou Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/skbuff.h | 7 +++++++ net/core/skbuff.c | 1 + 2 files changed, 8 insertions(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 72299ef00061..d448a4804aea 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3770,6 +3770,13 @@ static inline void nf_reset_trace(struct sk_buff *skb) #endif } +static inline void ipvs_reset(struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IP_VS) + skb->ipvs_property = 0; +#endif +} + /* Note: This doesn't put any conntrack and bridge info in dst. */ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src, bool copy) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 24656076906d..e140ba49b30a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4864,6 +4864,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) if (!xnet) return; + ipvs_reset(skb); skb_orphan(skb); skb->mark = 0; } From baedf68a068ca29624f241426843635920f16e1d Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 2 Nov 2017 21:26:59 +0100 Subject: [PATCH 06/31] net: usb: asix: fill null-ptr-deref in asix_suspend When asix_suspend() is called dev->driver_priv might not have been assigned a value, so we need to check that it's not NULL. Found by syzkaller. kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN Modules linked in: CPU: 0 PID: 24 Comm: kworker/0:1 Not tainted 4.14.0-rc4-43422-geccacdd69a8c #400 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: usb_hub_wq hub_event task: ffff88006bb36300 task.stack: ffff88006bba8000 RIP: 0010:asix_suspend+0x76/0xc0 drivers/net/usb/asix_devices.c:629 RSP: 0018:ffff88006bbae718 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: ffff880061ba3b80 RCX: 1ffff1000c34d644 RDX: 0000000000000001 RSI: 0000000000000402 RDI: 0000000000000008 RBP: ffff88006bbae738 R08: 1ffff1000d775cad R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800630a8b40 R13: 0000000000000000 R14: 0000000000000402 R15: ffff880061ba3b80 FS: 0000000000000000(0000) GS:ffff88006c600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ff33cf89000 CR3: 0000000061c0a000 CR4: 00000000000006f0 Call Trace: usb_suspend_interface drivers/usb/core/driver.c:1209 usb_suspend_both+0x27f/0x7e0 drivers/usb/core/driver.c:1314 usb_runtime_suspend+0x41/0x120 drivers/usb/core/driver.c:1852 __rpm_callback+0x339/0xb60 drivers/base/power/runtime.c:334 rpm_callback+0x106/0x220 drivers/base/power/runtime.c:461 rpm_suspend+0x465/0x1980 drivers/base/power/runtime.c:596 __pm_runtime_suspend+0x11e/0x230 drivers/base/power/runtime.c:1009 pm_runtime_put_sync_autosuspend ./include/linux/pm_runtime.h:251 usb_new_device+0xa37/0x1020 drivers/usb/core/hub.c:2487 hub_port_connect drivers/usb/core/hub.c:4903 hub_port_connect_change drivers/usb/core/hub.c:5009 port_event drivers/usb/core/hub.c:5115 hub_event+0x194d/0x3740 drivers/usb/core/hub.c:5195 process_one_work+0xc7f/0x1db0 kernel/workqueue.c:2119 worker_thread+0x221/0x1850 kernel/workqueue.c:2253 kthread+0x3a1/0x470 kernel/kthread.c:231 ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:431 Code: 8d 7c 24 20 48 89 fa 48 c1 ea 03 80 3c 02 00 75 5b 48 b8 00 00 00 00 00 fc ff df 4d 8b 6c 24 20 49 8d 7d 08 48 89 fa 48 c1 ea 03 <80> 3c 02 00 75 34 4d 8b 6d 08 4d 85 ed 74 0b e8 26 2b 51 fd 4c RIP: asix_suspend+0x76/0xc0 RSP: ffff88006bbae718 ---[ end trace dfc4f5649284342c ]--- Signed-off-by: Andrey Konovalov Signed-off-by: David S. Miller --- drivers/net/usb/asix_devices.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index b2ff88e69a81..743416be84f3 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -626,7 +626,7 @@ static int asix_suspend(struct usb_interface *intf, pm_message_t message) struct usbnet *dev = usb_get_intfdata(intf); struct asix_common_private *priv = dev->driver_priv; - if (priv->suspend) + if (priv && priv->suspend) priv->suspend(dev); return usbnet_suspend(intf, message); From 8f7dc9ae4a7aece9fbc3e6637bdfa38b36bcdf09 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 3 Nov 2017 16:49:00 +0100 Subject: [PATCH 07/31] l2tp: don't use l2tp_tunnel_find() in l2tp_ip and l2tp_ip6 Using l2tp_tunnel_find() in l2tp_ip_recv() is wrong for two reasons: * It doesn't take a reference on the returned tunnel, which makes the call racy wrt. concurrent tunnel deletion. * The lookup is only based on the tunnel identifier, so it can return a tunnel that doesn't match the packet's addresses or protocol. For example, a packet sent to an L2TPv3 over IPv6 tunnel can be delivered to an L2TPv2 over UDPv4 tunnel. This is worse than a simple cross-talk: when delivering the packet to an L2TP over UDP tunnel, the corresponding socket is UDP, where ->sk_backlog_rcv() is NULL. Calling sk_receive_skb() will then crash the kernel by trying to execute this callback. And l2tp_tunnel_find() isn't even needed here. __l2tp_ip_bind_lookup() properly checks the socket binding and connection settings. It was used as a fallback mechanism for finding tunnels that didn't have their data path registered yet. But it's not limited to this case and can be used to replace l2tp_tunnel_find() in the general case. Fix l2tp_ip6 in the same way. Fixes: 0d76751fad77 ("l2tp: Add L2TPv3 IP encapsulation (no UDP) support") Fixes: a32e0eec7042 ("l2tp: introduce L2TPv3 IP encapsulation support for IPv6") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip.c | 24 +++++++++--------------- net/l2tp/l2tp_ip6.c | 24 +++++++++--------------- 2 files changed, 18 insertions(+), 30 deletions(-) diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 4d322c1b7233..e4280b6568b4 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -123,6 +123,7 @@ static int l2tp_ip_recv(struct sk_buff *skb) unsigned char *ptr, *optr; struct l2tp_session *session; struct l2tp_tunnel *tunnel = NULL; + struct iphdr *iph; int length; if (!pskb_may_pull(skb, 4)) @@ -178,24 +179,17 @@ pass_up: goto discard; tunnel_id = ntohl(*(__be32 *) &skb->data[4]); - tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel) { - sk = tunnel->sock; - sock_hold(sk); - } else { - struct iphdr *iph = (struct iphdr *) skb_network_header(skb); + iph = (struct iphdr *)skb_network_header(skb); - read_lock_bh(&l2tp_ip_lock); - sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr, - inet_iif(skb), tunnel_id); - if (!sk) { - read_unlock_bh(&l2tp_ip_lock); - goto discard; - } - - sock_hold(sk); + read_lock_bh(&l2tp_ip_lock); + sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr, inet_iif(skb), + tunnel_id); + if (!sk) { read_unlock_bh(&l2tp_ip_lock); + goto discard; } + sock_hold(sk); + read_unlock_bh(&l2tp_ip_lock); if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_put; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 88b397c30d86..8bcaa975b432 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -136,6 +136,7 @@ static int l2tp_ip6_recv(struct sk_buff *skb) unsigned char *ptr, *optr; struct l2tp_session *session; struct l2tp_tunnel *tunnel = NULL; + struct ipv6hdr *iph; int length; if (!pskb_may_pull(skb, 4)) @@ -192,24 +193,17 @@ pass_up: goto discard; tunnel_id = ntohl(*(__be32 *) &skb->data[4]); - tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel) { - sk = tunnel->sock; - sock_hold(sk); - } else { - struct ipv6hdr *iph = ipv6_hdr(skb); + iph = ipv6_hdr(skb); - read_lock_bh(&l2tp_ip6_lock); - sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr, - inet6_iif(skb), tunnel_id); - if (!sk) { - read_unlock_bh(&l2tp_ip6_lock); - goto discard; - } - - sock_hold(sk); + read_lock_bh(&l2tp_ip6_lock); + sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr, + inet6_iif(skb), tunnel_id); + if (!sk) { read_unlock_bh(&l2tp_ip6_lock); + goto discard; } + sock_hold(sk); + read_unlock_bh(&l2tp_ip6_lock); if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_put; From d09b9e60e06d431b008a878c4b1d48d6cce816ef Mon Sep 17 00:00:00 2001 From: Priyaranjan Jha Date: Fri, 3 Nov 2017 17:46:55 -0700 Subject: [PATCH 08/31] tcp: fix DSACK-based undo on non-duplicate ACK Fixes DSACK-based undo when sender is in Open State and an ACK advances snd_una. Example scenario: - Sender goes into recovery and makes some spurious rtx. - It comes out of recovery and enters into open state. - It sends some more packets, let's say 4. - The receiver sends an ACK for the first two, but this ACK is lost. - The sender receives ack for first two, and DSACK for previous spurious rtx. Signed-off-by: Priyaranjan Jha Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Acked-by: Yousuk Seung Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5a87a00641d3..b2fc7163bd40 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -115,7 +115,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) -#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE) +#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE|FLAG_DSACKING_ACK) #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) From 13c249a94f525fe4c757d28854049780b25605c4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 4 Nov 2017 12:33:47 +0000 Subject: [PATCH 09/31] net: mvpp2: Prevent userspace from changing TX affinities The mvpp2 driver can't cope at all with the TX affinities being changed from userspace, and spit an endless stream of [ 91.779920] mvpp2 f4000000.ethernet eth2: wrong cpu on the end of Tx processing [ 91.779930] mvpp2 f4000000.ethernet eth2: wrong cpu on the end of Tx processing [ 91.780402] mvpp2 f4000000.ethernet eth2: wrong cpu on the end of Tx processing [ 91.780406] mvpp2 f4000000.ethernet eth2: wrong cpu on the end of Tx processing [ 91.780415] mvpp2 f4000000.ethernet eth2: wrong cpu on the end of Tx processing [ 91.780418] mvpp2 f4000000.ethernet eth2: wrong cpu on the end of Tx processing rendering the box completely useless (I've measured around 600k interrupts/s on a 8040 box) once irqbalance kicks in and start doing its job. Obviously, the driver was never designed with this in mind. So let's work around the problem by preventing userspace from interacting with these interrupts altogether. Signed-off-by: Marc Zyngier Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index a37af5813f33..fcf9ba5eb8d1 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -6747,6 +6747,9 @@ static int mvpp2_irqs_init(struct mvpp2_port *port) for (i = 0; i < port->nqvecs; i++) { struct mvpp2_queue_vector *qv = port->qvecs + i; + if (qv->type == MVPP2_QUEUE_VECTOR_PRIVATE) + irq_set_status_flags(qv->irq, IRQ_NO_BALANCING); + err = request_irq(qv->irq, mvpp2_isr, 0, port->dev->name, qv); if (err) goto err; @@ -6776,6 +6779,7 @@ static void mvpp2_irqs_deinit(struct mvpp2_port *port) struct mvpp2_queue_vector *qv = port->qvecs + i; irq_set_affinity_hint(qv->irq, NULL); + irq_clear_status_flags(qv->irq, IRQ_NO_BALANCING); free_irq(qv->irq, qv); } } From 39a4b86f0de4ce5024985a56fc39b16194b04313 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sat, 4 Nov 2017 22:54:53 -0500 Subject: [PATCH 10/31] net/mlx5e/core/en_fs: fix pointer dereference after free in mlx5e_execute_l2_action hn is being kfree'd in mlx5e_del_l2_from_hash and then dereferenced by accessing hn->ai.addr Fix this by copying the MAC address into a local variable for its safe use in all possible execution paths within function mlx5e_execute_l2_action. Addresses-Coverity-ID: 1417789 Fixes: eeb66cdb6826 ("net/mlx5: Separate between E-Switch and MPFS") Signed-off-by: Gustavo A. R. Silva Acked-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 850cdc980ab5..4837045ffba3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -365,21 +365,24 @@ static void mlx5e_execute_l2_action(struct mlx5e_priv *priv, struct mlx5e_l2_hash_node *hn) { u8 action = hn->action; + u8 mac_addr[ETH_ALEN]; int l2_err = 0; + ether_addr_copy(mac_addr, hn->ai.addr); + switch (action) { case MLX5E_ACTION_ADD: mlx5e_add_l2_flow_rule(priv, &hn->ai, MLX5E_FULLMATCH); - if (!is_multicast_ether_addr(hn->ai.addr)) { - l2_err = mlx5_mpfs_add_mac(priv->mdev, hn->ai.addr); + if (!is_multicast_ether_addr(mac_addr)) { + l2_err = mlx5_mpfs_add_mac(priv->mdev, mac_addr); hn->mpfs = !l2_err; } hn->action = MLX5E_ACTION_NONE; break; case MLX5E_ACTION_DEL: - if (!is_multicast_ether_addr(hn->ai.addr) && hn->mpfs) - l2_err = mlx5_mpfs_del_mac(priv->mdev, hn->ai.addr); + if (!is_multicast_ether_addr(mac_addr) && hn->mpfs) + l2_err = mlx5_mpfs_del_mac(priv->mdev, mac_addr); mlx5e_del_l2_flow_rule(priv, &hn->ai); mlx5e_del_l2_from_hash(hn); break; @@ -387,7 +390,7 @@ static void mlx5e_execute_l2_action(struct mlx5e_priv *priv, if (l2_err) netdev_warn(priv->netdev, "MPFS, failed to %s mac %pM, err(%d)\n", - action == MLX5E_ACTION_ADD ? "add" : "del", hn->ai.addr, l2_err); + action == MLX5E_ACTION_ADD ? "add" : "del", mac_addr, l2_err); } static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv) From b5f862180d7011d9575d0499fa37f0f25b423b12 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 6 Nov 2017 09:01:57 +0800 Subject: [PATCH 11/31] bonding: discard lowest hash bit for 802.3ad layer3+4 After commit 07f4c90062f8 ("tcp/dccp: try to not exhaust ip_local_port_range in connect()"), we will try to use even ports for connect(). Then if an application (seen clearly with iperf) opens multiple streams to the same destination IP and port, each stream will be given an even source port. So the bonding driver's simple xmit_hash_policy based on layer3+4 addressing will always hash all these streams to the same interface. And the total throughput will limited to a single slave. Change the tcp code will impact the whole tcp behavior, only for bonding usage. Paolo Abeni suggested fix this by changing the bonding code only, which should be more reasonable, and less impact. Fix this by discarding the lowest hash bit because it contains little entropy. After the fix we can re-balance between slaves. Signed-off-by: Paolo Abeni Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index c99dc59d729b..76e8054bfc4e 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3253,7 +3253,7 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) hash ^= (hash >> 16); hash ^= (hash >> 8); - return hash; + return hash >> 1; } /*-------------------------- Device entry points ----------------------------*/ From 2cb80187ba065d7decad7c6614e35e07aec8a974 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Mon, 6 Nov 2017 15:37:22 +0100 Subject: [PATCH 12/31] net: cdc_ether: fix divide by 0 on bad descriptors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Setting dev->hard_mtu to 0 will cause a divide error in usbnet_probe. Protect against devices with bogus CDC Ethernet functional descriptors by ignoring a zero wMaxSegmentSize. Signed-off-by: Bjørn Mork Acked-by: Oliver Neukum Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ether.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 3e7a3ac3a362..05dca3e5c93d 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -230,7 +230,7 @@ skip: goto bad_desc; } - if (header.usb_cdc_ether_desc) { + if (header.usb_cdc_ether_desc && info->ether->wMaxSegmentSize) { dev->hard_mtu = le16_to_cpu(info->ether->wMaxSegmentSize); /* because of Zaurus, we may be ignoring the host * side link address we were given. From 7fd078337201cf7468f53c3d9ef81ff78cb6df3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Mon, 6 Nov 2017 15:32:18 +0100 Subject: [PATCH 13/31] net: qmi_wwan: fix divide by 0 on bad descriptors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A CDC Ethernet functional descriptor with wMaxSegmentSize = 0 will cause a divide error in usbnet_probe: divide error: 0000 [#1] PREEMPT SMP KASAN Modules linked in: CPU: 0 PID: 24 Comm: kworker/0:1 Not tainted 4.14.0-rc8-44453-g1fdc1a82c34f #56 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: usb_hub_wq hub_event task: ffff88006bef5c00 task.stack: ffff88006bf60000 RIP: 0010:usbnet_update_max_qlen+0x24d/0x390 drivers/net/usb/usbnet.c:355 RSP: 0018:ffff88006bf67508 EFLAGS: 00010246 RAX: 00000000000163c8 RBX: ffff8800621fce40 RCX: ffff8800621fcf34 RDX: 0000000000000000 RSI: ffffffff837ecb7a RDI: ffff8800621fcf34 RBP: ffff88006bf67520 R08: ffff88006bef5c00 R09: ffffed000c43f881 R10: ffffed000c43f880 R11: ffff8800621fc406 R12: 0000000000000003 R13: ffffffff85c71de0 R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88006ca00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffe9c0d6dac CR3: 00000000614f4000 CR4: 00000000000006f0 Call Trace: usbnet_probe+0x18b5/0x2790 drivers/net/usb/usbnet.c:1783 qmi_wwan_probe+0x133/0x220 drivers/net/usb/qmi_wwan.c:1338 usb_probe_interface+0x324/0x940 drivers/usb/core/driver.c:361 really_probe drivers/base/dd.c:413 driver_probe_device+0x522/0x740 drivers/base/dd.c:557 Fix by simply ignoring the bogus descriptor, as it is optional for QMI devices anyway. Fixes: 423ce8caab7e ("net: usb: qmi_wwan: New driver for Huawei QMI based WWAN devices") Reported-by: Andrey Konovalov Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 8c3733608271..a4f229edcceb 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -681,7 +681,7 @@ static int qmi_wwan_bind(struct usbnet *dev, struct usb_interface *intf) } /* errors aren't fatal - we can live with the dynamic address */ - if (cdc_ether) { + if (cdc_ether && cdc_ether->wMaxSegmentSize) { dev->hard_mtu = le16_to_cpu(cdc_ether->wMaxSegmentSize); usbnet_get_ethernet_addr(dev, cdc_ether->iMACAddress); } From b7e732fa3171318418524b776b841b4024933b2b Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 6 Nov 2017 20:50:35 -0800 Subject: [PATCH 14/31] qrtr: Move to postcore_initcall Registering qrtr with module_init makes the ability of typical platform code to create AF_QIPCRTR socket during probe a matter of link order luck. Moving qrtr to postcore_initcall() avoids this. Signed-off-by: Bjorn Andersson Signed-off-by: David S. Miller --- net/qrtr/qrtr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index c2f5c13550c0..78418f38464a 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -1085,7 +1085,7 @@ static int __init qrtr_proto_init(void) return 0; } -module_init(qrtr_proto_init); +postcore_initcall(qrtr_proto_init); static void __exit qrtr_proto_fini(void) { From 055db6957e4735b16cd2fa94a5bbfb754c9b8023 Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Tue, 7 Nov 2017 19:50:07 +0900 Subject: [PATCH 15/31] bonding: fix slave stuck in BOND_LINK_FAIL state The bonding miimon logic has a flaw, in that a failure of the rtnl_trylock can cause a slave to become permanently stuck in BOND_LINK_FAIL state. The sequence of events to cause this is as follows: 1) bond_miimon_inspect finds that a slave's link is down, and so calls bond_propose_link_state, setting slave->new_link_state to BOND_LINK_FAIL, then sets slave->new_link to BOND_LINK_DOWN and returns non-zero. 2) In bond_mii_monitor, the rtnl_trylock fails, and the timer is rescheduled. No change is committed. 3) bond_miimon_inspect is called again, but this time the slave from step 1 has recovered. slave->new_link is reset to NOCHANGE, and, as slave->link was never changed, the switch enters the BOND_LINK_UP case, and does nothing. The pending BOND_LINK_FAIL state from step 1 remains pending, as new_link_state is not reset. 4) The state from step 3 persists until another slave changes link state and causes bond_miimon_inspect to return non-zero. At this point, the BOND_LINK_FAIL state change on the slave from steps 1-3 is committed, and the slave will remain stuck in BOND_LINK_FAIL state even though it is actually link up. The remedy for this is to initialize new_link_state on each entry to bond_miimon_inspect, as is already done with new_link. Fixes: fb9eb899a6dc ("bonding: handle link transition from FAIL to UP correctly") Reported-by: Alex Sidorenko Reviewed-by: Jarod Wilson Signed-off-by: Jay Vosburgh Acked-by: Mahesh Bandewar Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 76e8054bfc4e..b2db581131b2 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2042,6 +2042,7 @@ static int bond_miimon_inspect(struct bonding *bond) bond_for_each_slave_rcu(bond, slave, iter) { slave->new_link = BOND_LINK_NOCHANGE; + slave->link_new_state = slave->link; link_state = bond_check_dev_link(bond, slave->dev, 0); From 0de0add10e587effa880c741c9413c874f16be91 Mon Sep 17 00:00:00 2001 From: Kristian Evensen Date: Tue, 7 Nov 2017 13:47:56 +0100 Subject: [PATCH 16/31] qmi_wwan: Add missing skb_reset_mac_header-call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we receive a packet on a QMI device in raw IP mode, we should call skb_reset_mac_header() to ensure that skb->mac_header contains a valid offset in the packet. While it shouldn't really matter, the packets have no MAC header and the interface is configured as-such, it seems certain parts of the network stack expects a "good" value in skb->mac_header. Without the skb_reset_mac_header() call added in this patch, for example shaping traffic (using tc) triggers the following oops on the first received packet: [ 303.642957] skbuff: skb_under_panic: text:8f137918 len:177 put:67 head:8e4b0f00 data:8e4b0eff tail:0x8e4b0fb0 end:0x8e4b1520 dev:wwan0 [ 303.655045] Kernel bug detected[#1]: [ 303.658622] CPU: 1 PID: 1002 Comm: logd Not tainted 4.9.58 #0 [ 303.664339] task: 8fdf05e0 task.stack: 8f15c000 [ 303.668844] $ 0 : 00000000 00000001 0000007a 00000000 [ 303.674062] $ 4 : 8149a2fc 8149a2fc 8149ce20 00000000 [ 303.679284] $ 8 : 00000030 3878303a 31623465 20303235 [ 303.684510] $12 : ded731e3 2626a277 00000000 03bd0000 [ 303.689747] $16 : 8ef62b40 00000043 8f137918 804db5fc [ 303.694978] $20 : 00000001 00000004 8fc13800 00000003 [ 303.700215] $24 : 00000001 8024ab10 [ 303.705442] $28 : 8f15c000 8fc19cf0 00000043 802cc920 [ 303.710664] Hi : 00000000 [ 303.713533] Lo : 74e58000 [ 303.716436] epc : 802cc920 skb_panic+0x58/0x5c [ 303.721046] ra : 802cc920 skb_panic+0x58/0x5c [ 303.725639] Status: 11007c03 KERNEL EXL IE [ 303.729823] Cause : 50800024 (ExcCode 09) [ 303.733817] PrId : 0001992f (MIPS 1004Kc) [ 303.737892] Modules linked in: rt2800pci rt2800mmio rt2800lib qcserial ppp_async option usb_wwan rt2x00pci rt2x00mmio rt2x00lib rndis_host qmi_wwan ppp_generic nf_nat_pptp nf_conntrack_pptp nf_conntrack_ipv6 mt76x2i Process logd (pid: 1002, threadinfo=8f15c000, task=8fdf05e0, tls=77b3eee4) [ 303.962509] Stack : 00000000 80408990 8f137918 000000b1 00000043 8e4b0f00 8e4b0eff 8e4b0fb0 [ 303.970871] 8e4b1520 8fec1800 00000043 802cd2a4 6e000045 00000043 00000000 8ef62000 [ 303.979219] 8eef5d00 8ef62b40 8fea7300 8f137918 00000000 00000000 0002bb01 793e5664 [ 303.987568] 8ef08884 00000001 8fea7300 00000002 8fc19e80 8eef5d00 00000006 00000003 [ 303.995934] 00000000 8030ba90 00000003 77ab3fd0 8149dc80 8004d1bc 8f15c000 8f383700 [ 304.004324] ... [ 304.006767] Call Trace: [ 304.009241] [<802cc920>] skb_panic+0x58/0x5c [ 304.013504] [<802cd2a4>] skb_push+0x78/0x90 [ 304.017783] [<8f137918>] 0x8f137918 [ 304.021269] Code: 00602825 0c02a3b4 24842888 <000c000d> 8c870060 8c8200a0 0007382b 00070336 8c88005c [ 304.031034] [ 304.032805] ---[ end trace b778c482b3f0bda9 ]--- [ 304.041384] Kernel panic - not syncing: Fatal exception in interrupt [ 304.051975] Rebooting in 3 seconds.. While the oops is for a 4.9-kernel, I was able to trigger the same oops with net-next as of yesterday. Fixes: 32f7adf633b9 ("net: qmi_wwan: support "raw IP" mode") Signed-off-by: Kristian Evensen Acked-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index a4f229edcceb..8d4a6f7cba61 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -499,6 +499,7 @@ static int qmi_wwan_rx_fixup(struct usbnet *dev, struct sk_buff *skb) return 1; } if (rawip) { + skb_reset_mac_header(skb); skb->dev = dev->net; /* normally set by eth_type_trans */ skb->protocol = proto; return 1; From 1a8e6b48fbf534028ce4031d0d035e7e72779cef Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 9 Nov 2017 09:21:44 +0900 Subject: [PATCH 17/31] Revert "net: usb: asix: fill null-ptr-deref in asix_suspend" This reverts commit baedf68a068ca29624f241426843635920f16e1d. There is an updated version of this fix which covers the problem more thoroughly. Signed-off-by: David S. Miller --- drivers/net/usb/asix_devices.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 743416be84f3..b2ff88e69a81 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -626,7 +626,7 @@ static int asix_suspend(struct usb_interface *intf, pm_message_t message) struct usbnet *dev = usb_get_intfdata(intf); struct asix_common_private *priv = dev->driver_priv; - if (priv && priv->suspend) + if (priv->suspend) priv->suspend(dev); return usbnet_suspend(intf, message); From 8f5624629105589bcc23d0e51cc01bd8103d09a5 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 6 Nov 2017 13:26:46 +0100 Subject: [PATCH 18/31] net: usb: asix: fill null-ptr-deref in asix_suspend When asix_suspend() is called dev->driver_priv might not have been assigned a value, so we need to check that it's not NULL. Similar issue is present in asix_resume(), this patch fixes it as well. Found by syzkaller. kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN Modules linked in: CPU: 0 PID: 24 Comm: kworker/0:1 Not tainted 4.14.0-rc4-43422-geccacdd69a8c #400 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: usb_hub_wq hub_event task: ffff88006bb36300 task.stack: ffff88006bba8000 RIP: 0010:asix_suspend+0x76/0xc0 drivers/net/usb/asix_devices.c:629 RSP: 0018:ffff88006bbae718 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: ffff880061ba3b80 RCX: 1ffff1000c34d644 RDX: 0000000000000001 RSI: 0000000000000402 RDI: 0000000000000008 RBP: ffff88006bbae738 R08: 1ffff1000d775cad R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800630a8b40 R13: 0000000000000000 R14: 0000000000000402 R15: ffff880061ba3b80 FS: 0000000000000000(0000) GS:ffff88006c600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ff33cf89000 CR3: 0000000061c0a000 CR4: 00000000000006f0 Call Trace: usb_suspend_interface drivers/usb/core/driver.c:1209 usb_suspend_both+0x27f/0x7e0 drivers/usb/core/driver.c:1314 usb_runtime_suspend+0x41/0x120 drivers/usb/core/driver.c:1852 __rpm_callback+0x339/0xb60 drivers/base/power/runtime.c:334 rpm_callback+0x106/0x220 drivers/base/power/runtime.c:461 rpm_suspend+0x465/0x1980 drivers/base/power/runtime.c:596 __pm_runtime_suspend+0x11e/0x230 drivers/base/power/runtime.c:1009 pm_runtime_put_sync_autosuspend ./include/linux/pm_runtime.h:251 usb_new_device+0xa37/0x1020 drivers/usb/core/hub.c:2487 hub_port_connect drivers/usb/core/hub.c:4903 hub_port_connect_change drivers/usb/core/hub.c:5009 port_event drivers/usb/core/hub.c:5115 hub_event+0x194d/0x3740 drivers/usb/core/hub.c:5195 process_one_work+0xc7f/0x1db0 kernel/workqueue.c:2119 worker_thread+0x221/0x1850 kernel/workqueue.c:2253 kthread+0x3a1/0x470 kernel/kthread.c:231 ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:431 Code: 8d 7c 24 20 48 89 fa 48 c1 ea 03 80 3c 02 00 75 5b 48 b8 00 00 00 00 00 fc ff df 4d 8b 6c 24 20 49 8d 7d 08 48 89 fa 48 c1 ea 03 <80> 3c 02 00 75 34 4d 8b 6d 08 4d 85 ed 74 0b e8 26 2b 51 fd 4c RIP: asix_suspend+0x76/0xc0 RSP: ffff88006bbae718 ---[ end trace dfc4f5649284342c ]--- Signed-off-by: Andrey Konovalov Signed-off-by: David S. Miller --- drivers/net/usb/asix_devices.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index b2ff88e69a81..3d4f7959dabb 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -626,7 +626,7 @@ static int asix_suspend(struct usb_interface *intf, pm_message_t message) struct usbnet *dev = usb_get_intfdata(intf); struct asix_common_private *priv = dev->driver_priv; - if (priv->suspend) + if (priv && priv->suspend) priv->suspend(dev); return usbnet_suspend(intf, message); @@ -678,7 +678,7 @@ static int asix_resume(struct usb_interface *intf) struct usbnet *dev = usb_get_intfdata(intf); struct asix_common_private *priv = dev->driver_priv; - if (priv->resume) + if (priv && priv->resume) priv->resume(dev); return usbnet_resume(intf); From c7e460ce55724d4e4e22d3126e5c47273819c53a Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:18 -0800 Subject: [PATCH 19/31] Revert "net_sched: hold netns refcnt for each action" This reverts commit ceffcc5e254b450e6159f173e4538215cebf1b59. If we hold that refcnt, the netns can never be destroyed until all actions are destroyed by user, this breaks our netns design which we expect all actions are destroyed when we destroy the whole netns. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/act_api.h | 4 +--- net/sched/act_api.c | 2 -- net/sched/act_bpf.c | 2 +- net/sched/act_connmark.c | 2 +- net/sched/act_csum.c | 2 +- net/sched/act_gact.c | 2 +- net/sched/act_ife.c | 2 +- net/sched/act_ipt.c | 4 ++-- net/sched/act_mirred.c | 2 +- net/sched/act_nat.c | 2 +- net/sched/act_pedit.c | 2 +- net/sched/act_police.c | 2 +- net/sched/act_sample.c | 2 +- net/sched/act_simple.c | 2 +- net/sched/act_skbedit.c | 2 +- net/sched/act_skbmod.c | 2 +- net/sched/act_tunnel_key.c | 2 +- net/sched/act_vlan.c | 2 +- 18 files changed, 18 insertions(+), 22 deletions(-) diff --git a/include/net/act_api.h b/include/net/act_api.h index 1e6df0eb058f..a10a3b1813f3 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -14,7 +14,6 @@ struct tcf_idrinfo { spinlock_t lock; struct idr action_idr; - struct net *net; }; struct tc_action_ops; @@ -106,7 +105,7 @@ struct tc_action_net { static inline int tc_action_net_init(struct tc_action_net *tn, - const struct tc_action_ops *ops, struct net *net) + const struct tc_action_ops *ops) { int err = 0; @@ -114,7 +113,6 @@ int tc_action_net_init(struct tc_action_net *tn, if (!tn->idrinfo) return -ENOMEM; tn->ops = ops; - tn->idrinfo->net = net; spin_lock_init(&tn->idrinfo->lock); idr_init(&tn->idrinfo->action_idr); return err; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index ca2ff0b3123f..8f2c63514956 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -78,7 +78,6 @@ static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p) spin_lock_bh(&idrinfo->lock); idr_remove_ext(&idrinfo->action_idr, p->tcfa_index); spin_unlock_bh(&idrinfo->lock); - put_net(idrinfo->net); gen_kill_estimator(&p->tcfa_rate_est); free_tcf(p); } @@ -337,7 +336,6 @@ err3: p->idrinfo = idrinfo; p->ops = ops; INIT_LIST_HEAD(&p->list); - get_net(idrinfo->net); *a = p; return 0; } diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 9bce8cc84cbb..c0c707eb2c96 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -398,7 +398,7 @@ static __net_init int bpf_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, bpf_net_id); - return tc_action_net_init(tn, &act_bpf_ops, net); + return tc_action_net_init(tn, &act_bpf_ops); } static void __net_exit bpf_exit_net(struct net *net) diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 34e52d01a5dd..10b7a8855a6c 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -206,7 +206,7 @@ static __net_init int connmark_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, connmark_net_id); - return tc_action_net_init(tn, &act_connmark_ops, net); + return tc_action_net_init(tn, &act_connmark_ops); } static void __net_exit connmark_exit_net(struct net *net) diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 35171df2ebef..1c40caadcff9 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -626,7 +626,7 @@ static __net_init int csum_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, csum_net_id); - return tc_action_net_init(tn, &act_csum_ops, net); + return tc_action_net_init(tn, &act_csum_ops); } static void __net_exit csum_exit_net(struct net *net) diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index ef7f7f39d26d..e29a48ef7fc3 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -232,7 +232,7 @@ static __net_init int gact_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, gact_net_id); - return tc_action_net_init(tn, &act_gact_ops, net); + return tc_action_net_init(tn, &act_gact_ops); } static void __net_exit gact_exit_net(struct net *net) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index f65e4b5058e0..8ccd35825b6b 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -818,7 +818,7 @@ static __net_init int ife_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, ife_net_id); - return tc_action_net_init(tn, &act_ife_ops, net); + return tc_action_net_init(tn, &act_ife_ops); } static void __net_exit ife_exit_net(struct net *net) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index dbdf3b2470d5..d9e399a7e3d5 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -334,7 +334,7 @@ static __net_init int ipt_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, ipt_net_id); - return tc_action_net_init(tn, &act_ipt_ops, net); + return tc_action_net_init(tn, &act_ipt_ops); } static void __net_exit ipt_exit_net(struct net *net) @@ -384,7 +384,7 @@ static __net_init int xt_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, xt_net_id); - return tc_action_net_init(tn, &act_xt_ops, net); + return tc_action_net_init(tn, &act_xt_ops); } static void __net_exit xt_exit_net(struct net *net) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 84759cfd5a33..416627c66f08 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -343,7 +343,7 @@ static __net_init int mirred_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, mirred_net_id); - return tc_action_net_init(tn, &act_mirred_ops, net); + return tc_action_net_init(tn, &act_mirred_ops); } static void __net_exit mirred_exit_net(struct net *net) diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 7eeaaf9217b6..c365d01b99c8 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -307,7 +307,7 @@ static __net_init int nat_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, nat_net_id); - return tc_action_net_init(tn, &act_nat_ops, net); + return tc_action_net_init(tn, &act_nat_ops); } static void __net_exit nat_exit_net(struct net *net) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index b3d82c334a5f..491fe5deb09e 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -450,7 +450,7 @@ static __net_init int pedit_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, pedit_net_id); - return tc_action_net_init(tn, &act_pedit_ops, net); + return tc_action_net_init(tn, &act_pedit_ops); } static void __net_exit pedit_exit_net(struct net *net) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 9ec42b26e4b9..3bb2ebf9e9ae 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -331,7 +331,7 @@ static __net_init int police_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, police_net_id); - return tc_action_net_init(tn, &act_police_ops, net); + return tc_action_net_init(tn, &act_police_ops); } static void __net_exit police_exit_net(struct net *net) diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index e69a1e3a39bf..8b5abcd2f32f 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -240,7 +240,7 @@ static __net_init int sample_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, sample_net_id); - return tc_action_net_init(tn, &act_sample_ops, net); + return tc_action_net_init(tn, &act_sample_ops); } static void __net_exit sample_exit_net(struct net *net) diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index a8d0ea95f894..e7b57e5071a3 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -201,7 +201,7 @@ static __net_init int simp_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, simp_net_id); - return tc_action_net_init(tn, &act_simp_ops, net); + return tc_action_net_init(tn, &act_simp_ops); } static void __net_exit simp_exit_net(struct net *net) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index fbac62472e09..59949d61f20d 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -238,7 +238,7 @@ static __net_init int skbedit_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, skbedit_net_id); - return tc_action_net_init(tn, &act_skbedit_ops, net); + return tc_action_net_init(tn, &act_skbedit_ops); } static void __net_exit skbedit_exit_net(struct net *net) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 8e12d8897d2f..b642ad3d39dd 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -263,7 +263,7 @@ static __net_init int skbmod_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); - return tc_action_net_init(tn, &act_skbmod_ops, net); + return tc_action_net_init(tn, &act_skbmod_ops); } static void __net_exit skbmod_exit_net(struct net *net) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index c33faa373cf2..30c96274c638 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -322,7 +322,7 @@ static __net_init int tunnel_key_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); - return tc_action_net_init(tn, &act_tunnel_key_ops, net); + return tc_action_net_init(tn, &act_tunnel_key_ops); } static void __net_exit tunnel_key_exit_net(struct net *net) diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 115fc33cc6d8..16eb067a8d8f 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -269,7 +269,7 @@ static __net_init int vlan_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, vlan_net_id); - return tc_action_net_init(tn, &act_vlan_ops, net); + return tc_action_net_init(tn, &act_vlan_ops); } static void __net_exit vlan_exit_net(struct net *net) From e4b95c41df36befcfd117210900cd790bc2cd048 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:19 -0800 Subject: [PATCH 20/31] net_sched: introduce tcf_exts_get_net() and tcf_exts_put_net() Instead of holding netns refcnt in tc actions, we can minimize the holding time by saving it in struct tcf_exts instead. This means we can just hold netns refcnt right before call_rcu() and release it after tcf_exts_destroy() is done. However, because on netns cleanup path we call tcf_proto_destroy() too, obviously we can not hold netns for a zero refcnt, in this case we have to do cleanup synchronously. It is fine for RCU too, the caller cleanup_net() already waits for a grace period. For other cases, refcnt is non-zero and we can safely grab it as normal and release it after we are done. This patch provides two new API for each filter to use: tcf_exts_get_net() and tcf_exts_put_net(). And all filters now can use the following pattern: void __destroy_filter() { tcf_exts_destroy(); tcf_exts_put_net(); // <== release netns refcnt kfree(); } void some_work() { rtnl_lock(); __destroy_filter(); rtnl_unlock(); } void some_rcu_callback() { tcf_queue_work(some_work); } if (tcf_exts_get_net()) // <== hold netns refcnt call_rcu(some_rcu_callback); else __destroy_filter(); Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 24 ++++++++++++++++++++++++ net/sched/cls_api.c | 1 + 2 files changed, 25 insertions(+) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 70ca2437740e..8826747ef83e 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -94,6 +94,7 @@ struct tcf_exts { __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ int nr_actions; struct tc_action **actions; + struct net *net; #endif /* Map to export classifier specific extension TLV types to the * generic extensions API. Unsupported extensions must be set to 0. @@ -107,6 +108,7 @@ static inline int tcf_exts_init(struct tcf_exts *exts, int action, int police) #ifdef CONFIG_NET_CLS_ACT exts->type = 0; exts->nr_actions = 0; + exts->net = NULL; exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *), GFP_KERNEL); if (!exts->actions) @@ -117,6 +119,28 @@ static inline int tcf_exts_init(struct tcf_exts *exts, int action, int police) return 0; } +/* Return false if the netns is being destroyed in cleanup_net(). Callers + * need to do cleanup synchronously in this case, otherwise may race with + * tc_action_net_exit(). Return true for other cases. + */ +static inline bool tcf_exts_get_net(struct tcf_exts *exts) +{ +#ifdef CONFIG_NET_CLS_ACT + exts->net = maybe_get_net(exts->net); + return exts->net != NULL; +#else + return true; +#endif +} + +static inline void tcf_exts_put_net(struct tcf_exts *exts) +{ +#ifdef CONFIG_NET_CLS_ACT + if (exts->net) + put_net(exts->net); +#endif +} + static inline void tcf_exts_to_list(const struct tcf_exts *exts, struct list_head *actions) { diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index b2d310745487..ecbb019efcbd 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -927,6 +927,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, exts->actions[i++] = act; exts->nr_actions = i; } + exts->net = net; } #else if ((exts->action && tb[exts->action]) || From 0b2a59894b7657fab46b50f176bd772aa495044f Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:20 -0800 Subject: [PATCH 21/31] cls_basic: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_basic.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index f177649a2419..e43c56d5b96a 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -85,16 +85,21 @@ static int basic_init(struct tcf_proto *tp) return 0; } +static void __basic_delete_filter(struct basic_filter *f) +{ + tcf_exts_destroy(&f->exts); + tcf_em_tree_destroy(&f->ematches); + tcf_exts_put_net(&f->exts); + kfree(f); +} + static void basic_delete_filter_work(struct work_struct *work) { struct basic_filter *f = container_of(work, struct basic_filter, work); rtnl_lock(); - tcf_exts_destroy(&f->exts); - tcf_em_tree_destroy(&f->ematches); + __basic_delete_filter(f); rtnl_unlock(); - - kfree(f); } static void basic_delete_filter(struct rcu_head *head) @@ -113,7 +118,10 @@ static void basic_destroy(struct tcf_proto *tp) list_for_each_entry_safe(f, n, &head->flist, link) { list_del_rcu(&f->link); tcf_unbind_filter(tp, &f->res); - call_rcu(&f->rcu, basic_delete_filter); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, basic_delete_filter); + else + __basic_delete_filter(f); } kfree_rcu(head, rcu); } @@ -125,6 +133,7 @@ static int basic_delete(struct tcf_proto *tp, void *arg, bool *last) list_del_rcu(&f->link); tcf_unbind_filter(tp, &f->res); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, basic_delete_filter); *last = list_empty(&head->flist); return 0; @@ -219,6 +228,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, if (fold) { list_replace_rcu(&fold->link, &fnew->link); tcf_unbind_filter(tp, &fold->res); + tcf_exts_get_net(&fold->exts); call_rcu(&fold->rcu, basic_delete_filter); } else { list_add_rcu(&fnew->link, &head->flist); From aae2c35ec89252639a97769fa72dbbf8f1cc3107 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:21 -0800 Subject: [PATCH 22/31] cls_bpf: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_bpf.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 037a3ae86829..990eb4d91d54 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -249,6 +249,7 @@ static int cls_bpf_init(struct tcf_proto *tp) static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog) { tcf_exts_destroy(&prog->exts); + tcf_exts_put_net(&prog->exts); if (cls_bpf_is_ebpf(prog)) bpf_prog_put(prog->filter); @@ -282,7 +283,10 @@ static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog) cls_bpf_stop_offload(tp, prog); list_del_rcu(&prog->link); tcf_unbind_filter(tp, &prog->res); - call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu); + if (tcf_exts_get_net(&prog->exts)) + call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu); + else + __cls_bpf_delete_prog(prog); } static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last) @@ -516,6 +520,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, if (oldprog) { list_replace_rcu(&oldprog->link, &prog->link); tcf_unbind_filter(tp, &oldprog->res); + tcf_exts_get_net(&oldprog->exts); call_rcu(&oldprog->rcu, cls_bpf_delete_prog_rcu); } else { list_add_rcu(&prog->link, &head->plist); From ed1481681414e4d4264ad46864d5c1da5ff6ccb1 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:22 -0800 Subject: [PATCH 23/31] cls_cgroup: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_cgroup.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index a97e069bee89..309d5899265f 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -60,15 +60,21 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = { [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED }, }; +static void __cls_cgroup_destroy(struct cls_cgroup_head *head) +{ + tcf_exts_destroy(&head->exts); + tcf_em_tree_destroy(&head->ematches); + tcf_exts_put_net(&head->exts); + kfree(head); +} + static void cls_cgroup_destroy_work(struct work_struct *work) { struct cls_cgroup_head *head = container_of(work, struct cls_cgroup_head, work); rtnl_lock(); - tcf_exts_destroy(&head->exts); - tcf_em_tree_destroy(&head->ematches); - kfree(head); + __cls_cgroup_destroy(head); rtnl_unlock(); } @@ -124,8 +130,10 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, goto errout; rcu_assign_pointer(tp->root, new); - if (head) + if (head) { + tcf_exts_get_net(&head->exts); call_rcu(&head->rcu, cls_cgroup_destroy_rcu); + } return 0; errout: tcf_exts_destroy(&new->exts); @@ -138,8 +146,12 @@ static void cls_cgroup_destroy(struct tcf_proto *tp) struct cls_cgroup_head *head = rtnl_dereference(tp->root); /* Head can still be NULL due to cls_cgroup_init(). */ - if (head) - call_rcu(&head->rcu, cls_cgroup_destroy_rcu); + if (head) { + if (tcf_exts_get_net(&head->exts)) + call_rcu(&head->rcu, cls_cgroup_destroy_rcu); + else + __cls_cgroup_destroy(head); + } } static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last) From 22f7cec93f0af86c4b66bf34a977da9d7cef076e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:23 -0800 Subject: [PATCH 24/31] cls_flow: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_flow.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 67f3a2af6aab..85f765cff697 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -372,15 +372,21 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { [TCA_FLOW_PERTURB] = { .type = NLA_U32 }, }; +static void __flow_destroy_filter(struct flow_filter *f) +{ + del_timer_sync(&f->perturb_timer); + tcf_exts_destroy(&f->exts); + tcf_em_tree_destroy(&f->ematches); + tcf_exts_put_net(&f->exts); + kfree(f); +} + static void flow_destroy_filter_work(struct work_struct *work) { struct flow_filter *f = container_of(work, struct flow_filter, work); rtnl_lock(); - del_timer_sync(&f->perturb_timer); - tcf_exts_destroy(&f->exts); - tcf_em_tree_destroy(&f->ematches); - kfree(f); + __flow_destroy_filter(f); rtnl_unlock(); } @@ -552,8 +558,10 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, *arg = fnew; - if (fold) + if (fold) { + tcf_exts_get_net(&fold->exts); call_rcu(&fold->rcu, flow_destroy_filter); + } return 0; err2: @@ -570,6 +578,7 @@ static int flow_delete(struct tcf_proto *tp, void *arg, bool *last) struct flow_filter *f = arg; list_del_rcu(&f->list); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, flow_destroy_filter); *last = list_empty(&head->filters); return 0; @@ -594,7 +603,10 @@ static void flow_destroy(struct tcf_proto *tp) list_for_each_entry_safe(f, next, &head->filters, list) { list_del_rcu(&f->list); - call_rcu(&f->rcu, flow_destroy_filter); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, flow_destroy_filter); + else + __flow_destroy_filter(f); } kfree_rcu(head, rcu); } From 0dadc117ac8bc78d8144e862ac8ad23f342f9ea8 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:24 -0800 Subject: [PATCH 25/31] cls_flower: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 5b5722c8b32c..7a838d1c1c00 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -218,13 +218,19 @@ static int fl_init(struct tcf_proto *tp) return 0; } +static void __fl_destroy_filter(struct cls_fl_filter *f) +{ + tcf_exts_destroy(&f->exts); + tcf_exts_put_net(&f->exts); + kfree(f); +} + static void fl_destroy_filter_work(struct work_struct *work) { struct cls_fl_filter *f = container_of(work, struct cls_fl_filter, work); rtnl_lock(); - tcf_exts_destroy(&f->exts); - kfree(f); + __fl_destroy_filter(f); rtnl_unlock(); } @@ -318,7 +324,10 @@ static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f) if (!tc_skip_hw(f->flags)) fl_hw_destroy_filter(tp, f); tcf_unbind_filter(tp, &f->res); - call_rcu(&f->rcu, fl_destroy_filter); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, fl_destroy_filter); + else + __fl_destroy_filter(f); } static void fl_destroy_sleepable(struct work_struct *work) @@ -988,6 +997,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, idr_replace_ext(&head->handle_idr, fnew, fnew->handle); list_replace_rcu(&fold->list, &fnew->list); tcf_unbind_filter(tp, &fold->res); + tcf_exts_get_net(&fold->exts); call_rcu(&fold->rcu, fl_destroy_filter); } else { list_add_tail_rcu(&fnew->list, &head->filters); From d5f984f5af1d926bc9c7a7f90e7a1e1e313a8ba7 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:25 -0800 Subject: [PATCH 26/31] cls_fw: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_fw.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 99183b8621ec..7f45e5ab8afc 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -122,13 +122,19 @@ static int fw_init(struct tcf_proto *tp) return 0; } +static void __fw_delete_filter(struct fw_filter *f) +{ + tcf_exts_destroy(&f->exts); + tcf_exts_put_net(&f->exts); + kfree(f); +} + static void fw_delete_filter_work(struct work_struct *work) { struct fw_filter *f = container_of(work, struct fw_filter, work); rtnl_lock(); - tcf_exts_destroy(&f->exts); - kfree(f); + __fw_delete_filter(f); rtnl_unlock(); } @@ -154,7 +160,10 @@ static void fw_destroy(struct tcf_proto *tp) RCU_INIT_POINTER(head->ht[h], rtnl_dereference(f->next)); tcf_unbind_filter(tp, &f->res); - call_rcu(&f->rcu, fw_delete_filter); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, fw_delete_filter); + else + __fw_delete_filter(f); } } kfree_rcu(head, rcu); @@ -179,6 +188,7 @@ static int fw_delete(struct tcf_proto *tp, void *arg, bool *last) if (pfp == f) { RCU_INIT_POINTER(*fp, rtnl_dereference(f->next)); tcf_unbind_filter(tp, &f->res); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, fw_delete_filter); ret = 0; break; @@ -299,6 +309,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, RCU_INIT_POINTER(fnew->next, rtnl_dereference(pfp->next)); rcu_assign_pointer(*fp, fnew); tcf_unbind_filter(tp, &f->res); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, fw_delete_filter); *arg = fnew; From 57767e785321a427b8cdd282db2b8b33cd218ffa Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:26 -0800 Subject: [PATCH 27/31] cls_matchall: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_matchall.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index c33f711b9019..3684153cd8a9 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -44,13 +44,19 @@ static int mall_init(struct tcf_proto *tp) return 0; } +static void __mall_destroy(struct cls_mall_head *head) +{ + tcf_exts_destroy(&head->exts); + tcf_exts_put_net(&head->exts); + kfree(head); +} + static void mall_destroy_work(struct work_struct *work) { struct cls_mall_head *head = container_of(work, struct cls_mall_head, work); rtnl_lock(); - tcf_exts_destroy(&head->exts); - kfree(head); + __mall_destroy(head); rtnl_unlock(); } @@ -109,7 +115,10 @@ static void mall_destroy(struct tcf_proto *tp) if (tc_should_offload(dev, head->flags)) mall_destroy_hw_filter(tp, head, (unsigned long) head); - call_rcu(&head->rcu, mall_destroy_rcu); + if (tcf_exts_get_net(&head->exts)) + call_rcu(&head->rcu, mall_destroy_rcu); + else + __mall_destroy(head); } static void *mall_get(struct tcf_proto *tp, u32 handle) From 3fd51de5e3ba447624a08a8ba29f90d94f0fe909 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:27 -0800 Subject: [PATCH 28/31] cls_route: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_route.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 4b14ccd8b8f2..ac9a5b8825b9 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -257,13 +257,19 @@ static int route4_init(struct tcf_proto *tp) return 0; } +static void __route4_delete_filter(struct route4_filter *f) +{ + tcf_exts_destroy(&f->exts); + tcf_exts_put_net(&f->exts); + kfree(f); +} + static void route4_delete_filter_work(struct work_struct *work) { struct route4_filter *f = container_of(work, struct route4_filter, work); rtnl_lock(); - tcf_exts_destroy(&f->exts); - kfree(f); + __route4_delete_filter(f); rtnl_unlock(); } @@ -297,7 +303,10 @@ static void route4_destroy(struct tcf_proto *tp) next = rtnl_dereference(f->next); RCU_INIT_POINTER(b->ht[h2], next); tcf_unbind_filter(tp, &f->res); - call_rcu(&f->rcu, route4_delete_filter); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, route4_delete_filter); + else + __route4_delete_filter(f); } } RCU_INIT_POINTER(head->table[h1], NULL); @@ -338,6 +347,7 @@ static int route4_delete(struct tcf_proto *tp, void *arg, bool *last) /* Delete it */ tcf_unbind_filter(tp, &f->res); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, route4_delete_filter); /* Strip RTNL protected tree */ @@ -541,6 +551,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, *arg = f; if (fold) { tcf_unbind_filter(tp, &fold->res); + tcf_exts_get_net(&fold->exts); call_rcu(&fold->rcu, route4_delete_filter); } return 0; From 96585063a27f0704dcf7a09f8b78edd6a8973965 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:28 -0800 Subject: [PATCH 29/31] cls_rsvp: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_rsvp.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index bdbc541787f8..cf325625c99d 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -285,13 +285,19 @@ static int rsvp_init(struct tcf_proto *tp) return -ENOBUFS; } +static void __rsvp_delete_filter(struct rsvp_filter *f) +{ + tcf_exts_destroy(&f->exts); + tcf_exts_put_net(&f->exts); + kfree(f); +} + static void rsvp_delete_filter_work(struct work_struct *work) { struct rsvp_filter *f = container_of(work, struct rsvp_filter, work); rtnl_lock(); - tcf_exts_destroy(&f->exts); - kfree(f); + __rsvp_delete_filter(f); rtnl_unlock(); } @@ -310,7 +316,10 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) * grace period, since converted-to-rcu actions are relying on that * in cleanup() callback */ - call_rcu(&f->rcu, rsvp_delete_filter_rcu); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, rsvp_delete_filter_rcu); + else + __rsvp_delete_filter(f); } static void rsvp_destroy(struct tcf_proto *tp) From f2b751053ee9314e82c178f6ca0fee7e160fac95 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:29 -0800 Subject: [PATCH 30/31] cls_tcindex: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_tcindex.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index beaa95e09c25..a76937ee0b2d 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -139,13 +139,19 @@ static int tcindex_init(struct tcf_proto *tp) return 0; } +static void __tcindex_destroy_rexts(struct tcindex_filter_result *r) +{ + tcf_exts_destroy(&r->exts); + tcf_exts_put_net(&r->exts); +} + static void tcindex_destroy_rexts_work(struct work_struct *work) { struct tcindex_filter_result *r; r = container_of(work, struct tcindex_filter_result, work); rtnl_lock(); - tcf_exts_destroy(&r->exts); + __tcindex_destroy_rexts(r); rtnl_unlock(); } @@ -158,14 +164,20 @@ static void tcindex_destroy_rexts(struct rcu_head *head) tcf_queue_work(&r->work); } +static void __tcindex_destroy_fexts(struct tcindex_filter *f) +{ + tcf_exts_destroy(&f->result.exts); + tcf_exts_put_net(&f->result.exts); + kfree(f); +} + static void tcindex_destroy_fexts_work(struct work_struct *work) { struct tcindex_filter *f = container_of(work, struct tcindex_filter, work); rtnl_lock(); - tcf_exts_destroy(&f->result.exts); - kfree(f); + __tcindex_destroy_fexts(f); rtnl_unlock(); } @@ -210,10 +222,17 @@ found: * grace period, since converted-to-rcu actions are relying on that * in cleanup() callback */ - if (f) - call_rcu(&f->rcu, tcindex_destroy_fexts); - else - call_rcu(&r->rcu, tcindex_destroy_rexts); + if (f) { + if (tcf_exts_get_net(&f->result.exts)) + call_rcu(&f->rcu, tcindex_destroy_fexts); + else + __tcindex_destroy_fexts(f); + } else { + if (tcf_exts_get_net(&r->exts)) + call_rcu(&r->rcu, tcindex_destroy_rexts); + else + __tcindex_destroy_rexts(r); + } *last = false; return 0; From 35c55fc156d85a396a975fc17636f560fc02fd65 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:30 -0800 Subject: [PATCH 31/31] cls_u32: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index dadd1b344497..b58eccb21f03 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -399,6 +399,7 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, bool free_pf) { tcf_exts_destroy(&n->exts); + tcf_exts_put_net(&n->exts); if (n->ht_down) n->ht_down->refcnt--; #ifdef CONFIG_CLS_U32_PERF @@ -476,6 +477,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) RCU_INIT_POINTER(*kp, key->next); tcf_unbind_filter(tp, &key->res); + tcf_exts_get_net(&key->exts); call_rcu(&key->rcu, u32_delete_key_freepf_rcu); return 0; } @@ -588,7 +590,10 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) rtnl_dereference(n->next)); tcf_unbind_filter(tp, &n->res); u32_remove_hw_knode(tp, n->handle); - call_rcu(&n->rcu, u32_delete_key_freepf_rcu); + if (tcf_exts_get_net(&n->exts)) + call_rcu(&n->rcu, u32_delete_key_freepf_rcu); + else + u32_destroy_key(n->tp, n, true); } } } @@ -949,6 +954,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, u32_replace_knode(tp, tp_c, new); tcf_unbind_filter(tp, &n->res); + tcf_exts_get_net(&n->exts); call_rcu(&n->rcu, u32_delete_key_rcu); return 0; }