From cb79a180f2e7eb51de5a4848652893197637bccb Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 1 Nov 2017 20:30:49 +0100
Subject: [PATCH 01/31] xfrm: defer daddr pointer assignment after spi parsing

syzbot reports:
BUG: KASAN: use-after-free in __xfrm_state_lookup+0x695/0x6b0
Read of size 4 at addr ffff8801d434e538 by task syzkaller647520/2991
[..]
__xfrm_state_lookup+0x695/0x6b0 net/xfrm/xfrm_state.c:833
xfrm_state_lookup+0x8a/0x160 net/xfrm/xfrm_state.c:1592
xfrm_input+0x8e5/0x22f0 net/xfrm/xfrm_input.c:302

The use-after-free is the ipv4 destination address, which points
to an skb head area that has been reallocated:
  pskb_expand_head+0x36b/0x1210 net/core/skbuff.c:1494
  __pskb_pull_tail+0x14a/0x17c0 net/core/skbuff.c:1877
  pskb_may_pull include/linux/skbuff.h:2102 [inline]
  xfrm_parse_spi+0x3d3/0x4d0 net/xfrm/xfrm_input.c:170
  xfrm_input+0xce2/0x22f0 net/xfrm/xfrm_input.c:291

so the real bug is that xfrm_parse_spi() uses pskb_may_pull, but
for now do smaller workaround that makes xfrm_input fetch daddr
after spi parsing.

Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_input.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 8ac9d32fb79d..1c6051cb7733 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -265,8 +265,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 		goto lock;
 	}
 
-	daddr = (xfrm_address_t *)(skb_network_header(skb) +
-				   XFRM_SPI_SKB_CB(skb)->daddroff);
 	family = XFRM_SPI_SKB_CB(skb)->family;
 
 	/* if tunnel is present override skb->mark value with tunnel i_key */
@@ -293,6 +291,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 		goto drop;
 	}
 
+	daddr = (xfrm_address_t *)(skb_network_header(skb) +
+				   XFRM_SPI_SKB_CB(skb)->daddroff);
 	do {
 		if (skb->sp->len == XFRM_MAX_DEPTH) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);

From cf37966751747727629fe51fd4a1d4edd8457c60 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 2 Nov 2017 16:46:01 +0100
Subject: [PATCH 02/31] xfrm: do unconditional template resolution before pcpu
 cache check

Stephen Smalley says:
 Since 4.14-rc1, the selinux-testsuite has been encountering sporadic
 failures during testing of labeled IPSEC. git bisect pointed to
 commit ec30d ("xfrm: add xdst pcpu cache").
 The xdst pcpu cache is only checking that the policies are the same,
 but does not validate that the policy, state, and flow match with respect
 to security context labeling.
 As a result, the wrong SA could be used and the receiver could end up
 performing permission checking and providing SO_PEERSEC or SCM_SECURITY
 values for the wrong security context.

This fix makes it so that we always do the template resolution, and
then checks that the found states match those in the pcpu bundle.

This has the disadvantage of doing a bit more work (lookup in state hash
table) if we can reuse the xdst entry (we only avoid xdst alloc/free)
but we don't add a lot of extra work in case we can't reuse.

xfrm_pol_dead() check is removed, reasoning is that
xfrm_tmpl_resolve does all needed checks.

Cc: Paul Moore <paul@paul-moore.com>
Fixes: ec30d78c14a813db39a647b6a348b428 ("xfrm: add xdst pcpu cache")
Reported-by: Stephen Smalley <sds@tycho.nsa.gov>
Tested-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_policy.c | 50 +++++++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 22 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 8cafb3c0a4ac..a2e531bf4f97 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1787,19 +1787,23 @@ void xfrm_policy_cache_flush(void)
 	put_online_cpus();
 }
 
-static bool xfrm_pol_dead(struct xfrm_dst *xdst)
+static bool xfrm_xdst_can_reuse(struct xfrm_dst *xdst,
+				struct xfrm_state * const xfrm[],
+				int num)
 {
-	unsigned int num_pols = xdst->num_pols;
-	unsigned int pol_dead = 0, i;
+	const struct dst_entry *dst = &xdst->u.dst;
+	int i;
 
-	for (i = 0; i < num_pols; i++)
-		pol_dead |= xdst->pols[i]->walk.dead;
+	if (xdst->num_xfrms != num)
+		return false;
 
-	/* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
-	if (pol_dead)
-		xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
+	for (i = 0; i < num; i++) {
+		if (!dst || dst->xfrm != xfrm[i])
+			return false;
+		dst = dst->child;
+	}
 
-	return pol_dead;
+	return xfrm_bundle_ok(xdst);
 }
 
 static struct xfrm_dst *
@@ -1813,19 +1817,6 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 	struct dst_entry *dst;
 	int err;
 
-	xdst = this_cpu_read(xfrm_last_dst);
-	if (xdst &&
-	    xdst->u.dst.dev == dst_orig->dev &&
-	    xdst->num_pols == num_pols &&
-	    !xfrm_pol_dead(xdst) &&
-	    memcmp(xdst->pols, pols,
-		   sizeof(struct xfrm_policy *) * num_pols) == 0 &&
-	    xfrm_bundle_ok(xdst)) {
-		dst_hold(&xdst->u.dst);
-		return xdst;
-	}
-
-	old = xdst;
 	/* Try to instantiate a bundle */
 	err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
 	if (err <= 0) {
@@ -1834,6 +1825,21 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 		return ERR_PTR(err);
 	}
 
+	xdst = this_cpu_read(xfrm_last_dst);
+	if (xdst &&
+	    xdst->u.dst.dev == dst_orig->dev &&
+	    xdst->num_pols == num_pols &&
+	    memcmp(xdst->pols, pols,
+		   sizeof(struct xfrm_policy *) * num_pols) == 0 &&
+	    xfrm_xdst_can_reuse(xdst, xfrm, err)) {
+		dst_hold(&xdst->u.dst);
+		while (err > 0)
+			xfrm_state_put(xfrm[--err]);
+		return xdst;
+	}
+
+	old = xdst;
+
 	dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
 	if (IS_ERR(dst)) {
 		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);

From c9f3f813d462c72dbe412cee6a5cbacf13c4ad5e Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Thu, 2 Nov 2017 08:10:17 +0100
Subject: [PATCH 03/31] xfrm: Fix stack-out-of-bounds read in xfrm_state_find.

When we do tunnel or beet mode, we pass saddr and daddr from the
template to xfrm_state_find(), this is ok. On transport mode,
we pass the addresses from the flowi, assuming that the IP
addresses (and address family) don't change during transformation.
This assumption is wrong in the IPv4 mapped IPv6 case, packet
is IPv4 and template is IPv6. Fix this by using the addresses
from the template unconditionally.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_policy.c | 29 +++++++++++------------------
 1 file changed, 11 insertions(+), 18 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index a2e531bf4f97..6eb228a70131 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1361,36 +1361,29 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
 	struct net *net = xp_net(policy);
 	int nx;
 	int i, error;
-	xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
-	xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
 	xfrm_address_t tmp;
 
 	for (nx = 0, i = 0; i < policy->xfrm_nr; i++) {
 		struct xfrm_state *x;
-		xfrm_address_t *remote = daddr;
-		xfrm_address_t *local  = saddr;
+		xfrm_address_t *local;
+		xfrm_address_t *remote;
 		struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
 
-		if (tmpl->mode == XFRM_MODE_TUNNEL ||
-		    tmpl->mode == XFRM_MODE_BEET) {
-			remote = &tmpl->id.daddr;
-			local = &tmpl->saddr;
-			if (xfrm_addr_any(local, tmpl->encap_family)) {
-				error = xfrm_get_saddr(net, fl->flowi_oif,
-						       &tmp, remote,
-						       tmpl->encap_family, 0);
-				if (error)
-					goto fail;
-				local = &tmp;
-			}
+		remote = &tmpl->id.daddr;
+		local = &tmpl->saddr;
+		if (xfrm_addr_any(local, tmpl->encap_family)) {
+			error = xfrm_get_saddr(net, fl->flowi_oif,
+					       &tmp, remote,
+					       tmpl->encap_family, 0);
+			if (error)
+				goto fail;
+			local = &tmp;
 		}
 
 		x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
 
 		if (x && x->km.state == XFRM_STATE_VALID) {
 			xfrm[nx++] = x;
-			daddr = remote;
-			saddr = local;
 			continue;
 		}
 		if (x) {

From 24de79e5008a928beb2c7ccc2396f15065613363 Mon Sep 17 00:00:00 2001
From: Ganesh Goudar <ganeshgr@chelsio.com>
Date: Thu, 2 Nov 2017 19:26:22 +0530
Subject: [PATCH 04/31] cxgb4: update latest firmware version supported

Change t4fw_version.h to update latest firmware version
number to 1.16.63.0.

Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
index f2d623a7aee0..123e2c1b65f5 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
@@ -37,7 +37,7 @@
 
 #define T4FW_VERSION_MAJOR 0x01
 #define T4FW_VERSION_MINOR 0x10
-#define T4FW_VERSION_MICRO 0x2D
+#define T4FW_VERSION_MICRO 0x3F
 #define T4FW_VERSION_BUILD 0x00
 
 #define T4FW_MIN_VERSION_MAJOR 0x01
@@ -46,7 +46,7 @@
 
 #define T5FW_VERSION_MAJOR 0x01
 #define T5FW_VERSION_MINOR 0x10
-#define T5FW_VERSION_MICRO 0x2D
+#define T5FW_VERSION_MICRO 0x3F
 #define T5FW_VERSION_BUILD 0x00
 
 #define T5FW_MIN_VERSION_MAJOR 0x00
@@ -55,7 +55,7 @@
 
 #define T6FW_VERSION_MAJOR 0x01
 #define T6FW_VERSION_MINOR 0x10
-#define T6FW_VERSION_MICRO 0x2D
+#define T6FW_VERSION_MICRO 0x3F
 #define T6FW_VERSION_BUILD 0x00
 
 #define T6FW_MIN_VERSION_MAJOR 0x00

From 2b5ec1a5f9738ee7bf8f5ec0526e75e00362c48f Mon Sep 17 00:00:00 2001
From: Ye Yin <hustcat@gmail.com>
Date: Thu, 26 Oct 2017 16:57:05 +0800
Subject: [PATCH 05/31] netfilter/ipvs: clear ipvs_property flag when SKB net
 namespace changed

When run ipvs in two different network namespace at the same host, and one
ipvs transport network traffic to the other network namespace ipvs.
'ipvs_property' flag will make the second ipvs take no effect. So we should
clear 'ipvs_property' when SKB network namespace changed.

Fixes: 621e84d6f373 ("dev: introduce skb_scrub_packet()")
Signed-off-by: Ye Yin <hustcat@gmail.com>
Signed-off-by: Wei Zhou <chouryzhou@gmail.com>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 7 +++++++
 net/core/skbuff.c      | 1 +
 2 files changed, 8 insertions(+)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 72299ef00061..d448a4804aea 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3770,6 +3770,13 @@ static inline void nf_reset_trace(struct sk_buff *skb)
 #endif
 }
 
+static inline void ipvs_reset(struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_IP_VS)
+	skb->ipvs_property = 0;
+#endif
+}
+
 /* Note: This doesn't put any conntrack and bridge info in dst. */
 static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src,
 			     bool copy)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 24656076906d..e140ba49b30a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4864,6 +4864,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
 	if (!xnet)
 		return;
 
+	ipvs_reset(skb);
 	skb_orphan(skb);
 	skb->mark = 0;
 }

From baedf68a068ca29624f241426843635920f16e1d Mon Sep 17 00:00:00 2001
From: Andrey Konovalov <andreyknvl@google.com>
Date: Thu, 2 Nov 2017 21:26:59 +0100
Subject: [PATCH 06/31] net: usb: asix: fill null-ptr-deref in asix_suspend

When asix_suspend() is called dev->driver_priv might not have been
assigned a value, so we need to check that it's not NULL.

Found by syzkaller.

kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] PREEMPT SMP KASAN
Modules linked in:
CPU: 0 PID: 24 Comm: kworker/0:1 Not tainted 4.14.0-rc4-43422-geccacdd69a8c #400
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Workqueue: usb_hub_wq hub_event
task: ffff88006bb36300 task.stack: ffff88006bba8000
RIP: 0010:asix_suspend+0x76/0xc0 drivers/net/usb/asix_devices.c:629
RSP: 0018:ffff88006bbae718 EFLAGS: 00010202
RAX: dffffc0000000000 RBX: ffff880061ba3b80 RCX: 1ffff1000c34d644
RDX: 0000000000000001 RSI: 0000000000000402 RDI: 0000000000000008
RBP: ffff88006bbae738 R08: 1ffff1000d775cad R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800630a8b40
R13: 0000000000000000 R14: 0000000000000402 R15: ffff880061ba3b80
FS:  0000000000000000(0000) GS:ffff88006c600000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007ff33cf89000 CR3: 0000000061c0a000 CR4: 00000000000006f0
Call Trace:
 usb_suspend_interface drivers/usb/core/driver.c:1209
 usb_suspend_both+0x27f/0x7e0 drivers/usb/core/driver.c:1314
 usb_runtime_suspend+0x41/0x120 drivers/usb/core/driver.c:1852
 __rpm_callback+0x339/0xb60 drivers/base/power/runtime.c:334
 rpm_callback+0x106/0x220 drivers/base/power/runtime.c:461
 rpm_suspend+0x465/0x1980 drivers/base/power/runtime.c:596
 __pm_runtime_suspend+0x11e/0x230 drivers/base/power/runtime.c:1009
 pm_runtime_put_sync_autosuspend ./include/linux/pm_runtime.h:251
 usb_new_device+0xa37/0x1020 drivers/usb/core/hub.c:2487
 hub_port_connect drivers/usb/core/hub.c:4903
 hub_port_connect_change drivers/usb/core/hub.c:5009
 port_event drivers/usb/core/hub.c:5115
 hub_event+0x194d/0x3740 drivers/usb/core/hub.c:5195
 process_one_work+0xc7f/0x1db0 kernel/workqueue.c:2119
 worker_thread+0x221/0x1850 kernel/workqueue.c:2253
 kthread+0x3a1/0x470 kernel/kthread.c:231
 ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:431
Code: 8d 7c 24 20 48 89 fa 48 c1 ea 03 80 3c 02 00 75 5b 48 b8 00 00
00 00 00 fc ff df 4d 8b 6c 24 20 49 8d 7d 08 48 89 fa 48 c1 ea 03 <80>
3c 02 00 75 34 4d 8b 6d 08 4d 85 ed 74 0b e8 26 2b 51 fd 4c
RIP: asix_suspend+0x76/0xc0 RSP: ffff88006bbae718
---[ end trace dfc4f5649284342c ]---

Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/asix_devices.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c
index b2ff88e69a81..743416be84f3 100644
--- a/drivers/net/usb/asix_devices.c
+++ b/drivers/net/usb/asix_devices.c
@@ -626,7 +626,7 @@ static int asix_suspend(struct usb_interface *intf, pm_message_t message)
 	struct usbnet *dev = usb_get_intfdata(intf);
 	struct asix_common_private *priv = dev->driver_priv;
 
-	if (priv->suspend)
+	if (priv && priv->suspend)
 		priv->suspend(dev);
 
 	return usbnet_suspend(intf, message);

From 8f7dc9ae4a7aece9fbc3e6637bdfa38b36bcdf09 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Fri, 3 Nov 2017 16:49:00 +0100
Subject: [PATCH 07/31] l2tp: don't use l2tp_tunnel_find() in l2tp_ip and
 l2tp_ip6

Using l2tp_tunnel_find() in l2tp_ip_recv() is wrong for two reasons:

  * It doesn't take a reference on the returned tunnel, which makes the
    call racy wrt. concurrent tunnel deletion.

  * The lookup is only based on the tunnel identifier, so it can return
    a tunnel that doesn't match the packet's addresses or protocol.

For example, a packet sent to an L2TPv3 over IPv6 tunnel can be
delivered to an L2TPv2 over UDPv4 tunnel. This is worse than a simple
cross-talk: when delivering the packet to an L2TP over UDP tunnel, the
corresponding socket is UDP, where ->sk_backlog_rcv() is NULL. Calling
sk_receive_skb() will then crash the kernel by trying to execute this
callback.

And l2tp_tunnel_find() isn't even needed here. __l2tp_ip_bind_lookup()
properly checks the socket binding and connection settings. It was used
as a fallback mechanism for finding tunnels that didn't have their data
path registered yet. But it's not limited to this case and can be used
to replace l2tp_tunnel_find() in the general case.

Fix l2tp_ip6 in the same way.

Fixes: 0d76751fad77 ("l2tp: Add L2TPv3 IP encapsulation (no UDP) support")
Fixes: a32e0eec7042 ("l2tp: introduce L2TPv3 IP encapsulation support for IPv6")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ip.c  | 24 +++++++++---------------
 net/l2tp/l2tp_ip6.c | 24 +++++++++---------------
 2 files changed, 18 insertions(+), 30 deletions(-)

diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 4d322c1b7233..e4280b6568b4 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -123,6 +123,7 @@ static int l2tp_ip_recv(struct sk_buff *skb)
 	unsigned char *ptr, *optr;
 	struct l2tp_session *session;
 	struct l2tp_tunnel *tunnel = NULL;
+	struct iphdr *iph;
 	int length;
 
 	if (!pskb_may_pull(skb, 4))
@@ -178,24 +179,17 @@ pass_up:
 		goto discard;
 
 	tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
-	tunnel = l2tp_tunnel_find(net, tunnel_id);
-	if (tunnel) {
-		sk = tunnel->sock;
-		sock_hold(sk);
-	} else {
-		struct iphdr *iph = (struct iphdr *) skb_network_header(skb);
+	iph = (struct iphdr *)skb_network_header(skb);
 
-		read_lock_bh(&l2tp_ip_lock);
-		sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr,
-					   inet_iif(skb), tunnel_id);
-		if (!sk) {
-			read_unlock_bh(&l2tp_ip_lock);
-			goto discard;
-		}
-
-		sock_hold(sk);
+	read_lock_bh(&l2tp_ip_lock);
+	sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr, inet_iif(skb),
+				   tunnel_id);
+	if (!sk) {
 		read_unlock_bh(&l2tp_ip_lock);
+		goto discard;
 	}
+	sock_hold(sk);
+	read_unlock_bh(&l2tp_ip_lock);
 
 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto discard_put;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 88b397c30d86..8bcaa975b432 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -136,6 +136,7 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
 	unsigned char *ptr, *optr;
 	struct l2tp_session *session;
 	struct l2tp_tunnel *tunnel = NULL;
+	struct ipv6hdr *iph;
 	int length;
 
 	if (!pskb_may_pull(skb, 4))
@@ -192,24 +193,17 @@ pass_up:
 		goto discard;
 
 	tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
-	tunnel = l2tp_tunnel_find(net, tunnel_id);
-	if (tunnel) {
-		sk = tunnel->sock;
-		sock_hold(sk);
-	} else {
-		struct ipv6hdr *iph = ipv6_hdr(skb);
+	iph = ipv6_hdr(skb);
 
-		read_lock_bh(&l2tp_ip6_lock);
-		sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr,
-					    inet6_iif(skb), tunnel_id);
-		if (!sk) {
-			read_unlock_bh(&l2tp_ip6_lock);
-			goto discard;
-		}
-
-		sock_hold(sk);
+	read_lock_bh(&l2tp_ip6_lock);
+	sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr,
+				    inet6_iif(skb), tunnel_id);
+	if (!sk) {
 		read_unlock_bh(&l2tp_ip6_lock);
+		goto discard;
 	}
+	sock_hold(sk);
+	read_unlock_bh(&l2tp_ip6_lock);
 
 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto discard_put;

From d09b9e60e06d431b008a878c4b1d48d6cce816ef Mon Sep 17 00:00:00 2001
From: Priyaranjan Jha <priyarjha@google.com>
Date: Fri, 3 Nov 2017 17:46:55 -0700
Subject: [PATCH 08/31] tcp: fix DSACK-based undo on non-duplicate ACK

Fixes DSACK-based undo when sender is in Open State and
an ACK advances snd_una.

Example scenario:
- Sender goes into recovery and makes some spurious rtx.
- It comes out of recovery and enters into open state.
- It sends some more packets, let's say 4.
- The receiver sends an ACK for the first two, but this ACK is lost.
- The sender receives ack for first two, and DSACK for previous
  spurious rtx.

Signed-off-by: Priyaranjan Jha <priyarjha@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Yousuk Seung <ysseung@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5a87a00641d3..b2fc7163bd40 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -115,7 +115,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
 
 #define FLAG_ACKED		(FLAG_DATA_ACKED|FLAG_SYN_ACKED)
 #define FLAG_NOT_DUP		(FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
-#define FLAG_CA_ALERT		(FLAG_DATA_SACKED|FLAG_ECE)
+#define FLAG_CA_ALERT		(FLAG_DATA_SACKED|FLAG_ECE|FLAG_DSACKING_ACK)
 #define FLAG_FORWARD_PROGRESS	(FLAG_ACKED|FLAG_DATA_SACKED)
 
 #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)

From 13c249a94f525fe4c757d28854049780b25605c4 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Sat, 4 Nov 2017 12:33:47 +0000
Subject: [PATCH 09/31] net: mvpp2: Prevent userspace from changing TX
 affinities

The mvpp2 driver can't cope at all with the TX affinities being
changed from userspace, and spit an endless stream of

[   91.779920] mvpp2 f4000000.ethernet eth2: wrong cpu on the end of Tx processing
[   91.779930] mvpp2 f4000000.ethernet eth2: wrong cpu on the end of Tx processing
[   91.780402] mvpp2 f4000000.ethernet eth2: wrong cpu on the end of Tx processing
[   91.780406] mvpp2 f4000000.ethernet eth2: wrong cpu on the end of Tx processing
[   91.780415] mvpp2 f4000000.ethernet eth2: wrong cpu on the end of Tx processing
[   91.780418] mvpp2 f4000000.ethernet eth2: wrong cpu on the end of Tx processing

rendering the box completely useless (I've measured around 600k
interrupts/s on a 8040 box) once irqbalance kicks in and start
doing its job.

Obviously, the driver was never designed with this in mind. So let's
work around the problem by preventing userspace from interacting
with these interrupts altogether.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index a37af5813f33..fcf9ba5eb8d1 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -6747,6 +6747,9 @@ static int mvpp2_irqs_init(struct mvpp2_port *port)
 	for (i = 0; i < port->nqvecs; i++) {
 		struct mvpp2_queue_vector *qv = port->qvecs + i;
 
+		if (qv->type == MVPP2_QUEUE_VECTOR_PRIVATE)
+			irq_set_status_flags(qv->irq, IRQ_NO_BALANCING);
+
 		err = request_irq(qv->irq, mvpp2_isr, 0, port->dev->name, qv);
 		if (err)
 			goto err;
@@ -6776,6 +6779,7 @@ static void mvpp2_irqs_deinit(struct mvpp2_port *port)
 		struct mvpp2_queue_vector *qv = port->qvecs + i;
 
 		irq_set_affinity_hint(qv->irq, NULL);
+		irq_clear_status_flags(qv->irq, IRQ_NO_BALANCING);
 		free_irq(qv->irq, qv);
 	}
 }

From 39a4b86f0de4ce5024985a56fc39b16194b04313 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Sat, 4 Nov 2017 22:54:53 -0500
Subject: [PATCH 10/31] net/mlx5e/core/en_fs: fix pointer dereference after
 free in mlx5e_execute_l2_action

hn is being kfree'd in mlx5e_del_l2_from_hash and then dereferenced
by accessing hn->ai.addr

Fix this by copying the MAC address into a local variable for its safe use
in all possible execution paths within function mlx5e_execute_l2_action.

Addresses-Coverity-ID: 1417789
Fixes: eeb66cdb6826 ("net/mlx5: Separate between E-Switch and MPFS")
Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Acked-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 850cdc980ab5..4837045ffba3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -365,21 +365,24 @@ static void mlx5e_execute_l2_action(struct mlx5e_priv *priv,
 				    struct mlx5e_l2_hash_node *hn)
 {
 	u8 action = hn->action;
+	u8 mac_addr[ETH_ALEN];
 	int l2_err = 0;
 
+	ether_addr_copy(mac_addr, hn->ai.addr);
+
 	switch (action) {
 	case MLX5E_ACTION_ADD:
 		mlx5e_add_l2_flow_rule(priv, &hn->ai, MLX5E_FULLMATCH);
-		if (!is_multicast_ether_addr(hn->ai.addr)) {
-			l2_err = mlx5_mpfs_add_mac(priv->mdev, hn->ai.addr);
+		if (!is_multicast_ether_addr(mac_addr)) {
+			l2_err = mlx5_mpfs_add_mac(priv->mdev, mac_addr);
 			hn->mpfs = !l2_err;
 		}
 		hn->action = MLX5E_ACTION_NONE;
 		break;
 
 	case MLX5E_ACTION_DEL:
-		if (!is_multicast_ether_addr(hn->ai.addr) && hn->mpfs)
-			l2_err = mlx5_mpfs_del_mac(priv->mdev, hn->ai.addr);
+		if (!is_multicast_ether_addr(mac_addr) && hn->mpfs)
+			l2_err = mlx5_mpfs_del_mac(priv->mdev, mac_addr);
 		mlx5e_del_l2_flow_rule(priv, &hn->ai);
 		mlx5e_del_l2_from_hash(hn);
 		break;
@@ -387,7 +390,7 @@ static void mlx5e_execute_l2_action(struct mlx5e_priv *priv,
 
 	if (l2_err)
 		netdev_warn(priv->netdev, "MPFS, failed to %s mac %pM, err(%d)\n",
-			    action == MLX5E_ACTION_ADD ? "add" : "del", hn->ai.addr, l2_err);
+			    action == MLX5E_ACTION_ADD ? "add" : "del", mac_addr, l2_err);
 }
 
 static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv)

From b5f862180d7011d9575d0499fa37f0f25b423b12 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Mon, 6 Nov 2017 09:01:57 +0800
Subject: [PATCH 11/31] bonding: discard lowest hash bit for 802.3ad layer3+4

After commit 07f4c90062f8 ("tcp/dccp: try to not exhaust ip_local_port_range
in connect()"), we will try to use even ports for connect(). Then if an
application (seen clearly with iperf) opens multiple streams to the same
destination IP and port, each stream will be given an even source port.

So the bonding driver's simple xmit_hash_policy based on layer3+4 addressing
will always hash all these streams to the same interface. And the total
throughput will limited to a single slave.

Change the tcp code will impact the whole tcp behavior, only for bonding
usage. Paolo Abeni suggested fix this by changing the bonding code only,
which should be more reasonable, and less impact.

Fix this by discarding the lowest hash bit because it contains little entropy.
After the fix we can re-balance between slaves.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index c99dc59d729b..76e8054bfc4e 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3253,7 +3253,7 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
 	hash ^= (hash >> 16);
 	hash ^= (hash >> 8);
 
-	return hash;
+	return hash >> 1;
 }
 
 /*-------------------------- Device entry points ----------------------------*/

From 2cb80187ba065d7decad7c6614e35e07aec8a974 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Mon, 6 Nov 2017 15:37:22 +0100
Subject: [PATCH 12/31] net: cdc_ether: fix divide by 0 on bad descriptors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Setting dev->hard_mtu to 0 will cause a divide error in
usbnet_probe. Protect against devices with bogus CDC Ethernet
functional descriptors by ignoring a zero wMaxSegmentSize.

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Acked-by: Oliver Neukum <oneukum@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ether.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index 3e7a3ac3a362..05dca3e5c93d 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -230,7 +230,7 @@ skip:
 			goto bad_desc;
 	}
 
-	if (header.usb_cdc_ether_desc) {
+	if (header.usb_cdc_ether_desc && info->ether->wMaxSegmentSize) {
 		dev->hard_mtu = le16_to_cpu(info->ether->wMaxSegmentSize);
 		/* because of Zaurus, we may be ignoring the host
 		 * side link address we were given.

From 7fd078337201cf7468f53c3d9ef81ff78cb6df3b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Mon, 6 Nov 2017 15:32:18 +0100
Subject: [PATCH 13/31] net: qmi_wwan: fix divide by 0 on bad descriptors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A CDC Ethernet functional descriptor with wMaxSegmentSize = 0 will
cause a divide error in usbnet_probe:

divide error: 0000 [#1] PREEMPT SMP KASAN
Modules linked in:
CPU: 0 PID: 24 Comm: kworker/0:1 Not tainted 4.14.0-rc8-44453-g1fdc1a82c34f #56
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Workqueue: usb_hub_wq hub_event
task: ffff88006bef5c00 task.stack: ffff88006bf60000
RIP: 0010:usbnet_update_max_qlen+0x24d/0x390 drivers/net/usb/usbnet.c:355
RSP: 0018:ffff88006bf67508 EFLAGS: 00010246
RAX: 00000000000163c8 RBX: ffff8800621fce40 RCX: ffff8800621fcf34
RDX: 0000000000000000 RSI: ffffffff837ecb7a RDI: ffff8800621fcf34
RBP: ffff88006bf67520 R08: ffff88006bef5c00 R09: ffffed000c43f881
R10: ffffed000c43f880 R11: ffff8800621fc406 R12: 0000000000000003
R13: ffffffff85c71de0 R14: 0000000000000000 R15: 0000000000000000
FS:  0000000000000000(0000) GS:ffff88006ca00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007ffe9c0d6dac CR3: 00000000614f4000 CR4: 00000000000006f0
Call Trace:
 usbnet_probe+0x18b5/0x2790 drivers/net/usb/usbnet.c:1783
 qmi_wwan_probe+0x133/0x220 drivers/net/usb/qmi_wwan.c:1338
 usb_probe_interface+0x324/0x940 drivers/usb/core/driver.c:361
 really_probe drivers/base/dd.c:413
 driver_probe_device+0x522/0x740 drivers/base/dd.c:557

Fix by simply ignoring the bogus descriptor, as it is optional
for QMI devices anyway.

Fixes: 423ce8caab7e ("net: usb: qmi_wwan: New driver for Huawei QMI based WWAN devices")
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/qmi_wwan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 8c3733608271..a4f229edcceb 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -681,7 +681,7 @@ static int qmi_wwan_bind(struct usbnet *dev, struct usb_interface *intf)
 	}
 
 	/* errors aren't fatal - we can live with the dynamic address */
-	if (cdc_ether) {
+	if (cdc_ether && cdc_ether->wMaxSegmentSize) {
 		dev->hard_mtu = le16_to_cpu(cdc_ether->wMaxSegmentSize);
 		usbnet_get_ethernet_addr(dev, cdc_ether->iMACAddress);
 	}

From b7e732fa3171318418524b776b841b4024933b2b Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Mon, 6 Nov 2017 20:50:35 -0800
Subject: [PATCH 14/31] qrtr: Move to postcore_initcall

Registering qrtr with module_init makes the ability of typical platform
code to create AF_QIPCRTR socket during probe a matter of link order
luck. Moving qrtr to postcore_initcall() avoids this.

Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/qrtr/qrtr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index c2f5c13550c0..78418f38464a 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -1085,7 +1085,7 @@ static int __init qrtr_proto_init(void)
 
 	return 0;
 }
-module_init(qrtr_proto_init);
+postcore_initcall(qrtr_proto_init);
 
 static void __exit qrtr_proto_fini(void)
 {

From 055db6957e4735b16cd2fa94a5bbfb754c9b8023 Mon Sep 17 00:00:00 2001
From: Jay Vosburgh <jay.vosburgh@canonical.com>
Date: Tue, 7 Nov 2017 19:50:07 +0900
Subject: [PATCH 15/31] bonding: fix slave stuck in BOND_LINK_FAIL state

The bonding miimon logic has a flaw, in that a failure of the
rtnl_trylock can cause a slave to become permanently stuck in
BOND_LINK_FAIL state.

	The sequence of events to cause this is as follows:

	1) bond_miimon_inspect finds that a slave's link is down, and so
calls bond_propose_link_state, setting slave->new_link_state to
BOND_LINK_FAIL, then sets slave->new_link to BOND_LINK_DOWN and returns
non-zero.

	2) In bond_mii_monitor, the rtnl_trylock fails, and the timer is
rescheduled.  No change is committed.

	3) bond_miimon_inspect is called again, but this time the slave
from step 1 has recovered.  slave->new_link is reset to NOCHANGE, and, as
slave->link was never changed, the switch enters the BOND_LINK_UP case,
and does nothing.  The pending BOND_LINK_FAIL state from step 1 remains
pending, as new_link_state is not reset.

	4) The state from step 3 persists until another slave changes link
state and causes bond_miimon_inspect to return non-zero.  At this point,
the BOND_LINK_FAIL state change on the slave from steps 1-3 is committed,
and the slave will remain stuck in BOND_LINK_FAIL state even though it
is actually link up.

	The remedy for this is to initialize new_link_state on each entry
to bond_miimon_inspect, as is already done with new_link.

Fixes: fb9eb899a6dc ("bonding: handle link transition from FAIL to UP correctly")
Reported-by: Alex Sidorenko <alexandre.sidorenko@hpe.com>
Reviewed-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Acked-by: Mahesh Bandewar <maheshb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 76e8054bfc4e..b2db581131b2 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2042,6 +2042,7 @@ static int bond_miimon_inspect(struct bonding *bond)
 
 	bond_for_each_slave_rcu(bond, slave, iter) {
 		slave->new_link = BOND_LINK_NOCHANGE;
+		slave->link_new_state = slave->link;
 
 		link_state = bond_check_dev_link(bond, slave->dev, 0);
 

From 0de0add10e587effa880c741c9413c874f16be91 Mon Sep 17 00:00:00 2001
From: Kristian Evensen <kristian.evensen@gmail.com>
Date: Tue, 7 Nov 2017 13:47:56 +0100
Subject: [PATCH 16/31] qmi_wwan: Add missing skb_reset_mac_header-call
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When we receive a packet on a QMI device in raw IP mode, we should call
skb_reset_mac_header() to ensure that skb->mac_header contains a valid
offset in the packet. While it shouldn't really matter, the packets have
no MAC header and the interface is configured as-such, it seems certain
parts of the network stack expects a "good" value in skb->mac_header.

Without the skb_reset_mac_header() call added in this patch, for example
shaping traffic (using tc) triggers the following oops on the first
received packet:

[  303.642957] skbuff: skb_under_panic: text:8f137918 len:177 put:67 head:8e4b0f00 data:8e4b0eff tail:0x8e4b0fb0 end:0x8e4b1520 dev:wwan0
[  303.655045] Kernel bug detected[#1]:
[  303.658622] CPU: 1 PID: 1002 Comm: logd Not tainted 4.9.58 #0
[  303.664339] task: 8fdf05e0 task.stack: 8f15c000
[  303.668844] $ 0   : 00000000 00000001 0000007a 00000000
[  303.674062] $ 4   : 8149a2fc 8149a2fc 8149ce20 00000000
[  303.679284] $ 8   : 00000030 3878303a 31623465 20303235
[  303.684510] $12   : ded731e3 2626a277 00000000 03bd0000
[  303.689747] $16   : 8ef62b40 00000043 8f137918 804db5fc
[  303.694978] $20   : 00000001 00000004 8fc13800 00000003
[  303.700215] $24   : 00000001 8024ab10
[  303.705442] $28   : 8f15c000 8fc19cf0 00000043 802cc920
[  303.710664] Hi    : 00000000
[  303.713533] Lo    : 74e58000
[  303.716436] epc   : 802cc920 skb_panic+0x58/0x5c
[  303.721046] ra    : 802cc920 skb_panic+0x58/0x5c
[  303.725639] Status: 11007c03 KERNEL EXL IE
[  303.729823] Cause : 50800024 (ExcCode 09)
[  303.733817] PrId  : 0001992f (MIPS 1004Kc)
[  303.737892] Modules linked in: rt2800pci rt2800mmio rt2800lib qcserial ppp_async option usb_wwan rt2x00pci rt2x00mmio rt2x00lib rndis_host qmi_wwan ppp_generic nf_nat_pptp nf_conntrack_pptp nf_conntrack_ipv6 mt76x2i
Process logd (pid: 1002, threadinfo=8f15c000, task=8fdf05e0, tls=77b3eee4)
[  303.962509] Stack : 00000000 80408990 8f137918 000000b1 00000043 8e4b0f00 8e4b0eff 8e4b0fb0
[  303.970871]         8e4b1520 8fec1800 00000043 802cd2a4 6e000045 00000043 00000000 8ef62000
[  303.979219]         8eef5d00 8ef62b40 8fea7300 8f137918 00000000 00000000 0002bb01 793e5664
[  303.987568]         8ef08884 00000001 8fea7300 00000002 8fc19e80 8eef5d00 00000006 00000003
[  303.995934]         00000000 8030ba90 00000003 77ab3fd0 8149dc80 8004d1bc 8f15c000 8f383700
[  304.004324]         ...
[  304.006767] Call Trace:
[  304.009241] [<802cc920>] skb_panic+0x58/0x5c
[  304.013504] [<802cd2a4>] skb_push+0x78/0x90
[  304.017783] [<8f137918>] 0x8f137918
[  304.021269] Code: 00602825  0c02a3b4  24842888 <000c000d> 8c870060  8c8200a0  0007382b  00070336  8c88005c
[  304.031034]
[  304.032805] ---[ end trace b778c482b3f0bda9 ]---
[  304.041384] Kernel panic - not syncing: Fatal exception in interrupt
[  304.051975] Rebooting in 3 seconds..

While the oops is for a 4.9-kernel, I was able to trigger the same oops with
net-next as of yesterday.

Fixes: 32f7adf633b9 ("net: qmi_wwan: support "raw IP" mode")
Signed-off-by: Kristian Evensen <kristian.evensen@gmail.com>
Acked-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/qmi_wwan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index a4f229edcceb..8d4a6f7cba61 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -499,6 +499,7 @@ static int qmi_wwan_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 		return 1;
 	}
 	if (rawip) {
+		skb_reset_mac_header(skb);
 		skb->dev = dev->net; /* normally set by eth_type_trans */
 		skb->protocol = proto;
 		return 1;

From 1a8e6b48fbf534028ce4031d0d035e7e72779cef Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 9 Nov 2017 09:21:44 +0900
Subject: [PATCH 17/31] Revert "net: usb: asix: fill null-ptr-deref in
 asix_suspend"

This reverts commit baedf68a068ca29624f241426843635920f16e1d.

There is an updated version of this fix which covers
the problem more thoroughly.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/asix_devices.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c
index 743416be84f3..b2ff88e69a81 100644
--- a/drivers/net/usb/asix_devices.c
+++ b/drivers/net/usb/asix_devices.c
@@ -626,7 +626,7 @@ static int asix_suspend(struct usb_interface *intf, pm_message_t message)
 	struct usbnet *dev = usb_get_intfdata(intf);
 	struct asix_common_private *priv = dev->driver_priv;
 
-	if (priv && priv->suspend)
+	if (priv->suspend)
 		priv->suspend(dev);
 
 	return usbnet_suspend(intf, message);

From 8f5624629105589bcc23d0e51cc01bd8103d09a5 Mon Sep 17 00:00:00 2001
From: Andrey Konovalov <andreyknvl@google.com>
Date: Mon, 6 Nov 2017 13:26:46 +0100
Subject: [PATCH 18/31] net: usb: asix: fill null-ptr-deref in asix_suspend

When asix_suspend() is called dev->driver_priv might not have been
assigned a value, so we need to check that it's not NULL.

Similar issue is present in asix_resume(), this patch fixes it as well.

Found by syzkaller.

kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] PREEMPT SMP KASAN
Modules linked in:
CPU: 0 PID: 24 Comm: kworker/0:1 Not tainted 4.14.0-rc4-43422-geccacdd69a8c #400
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Workqueue: usb_hub_wq hub_event
task: ffff88006bb36300 task.stack: ffff88006bba8000
RIP: 0010:asix_suspend+0x76/0xc0 drivers/net/usb/asix_devices.c:629
RSP: 0018:ffff88006bbae718 EFLAGS: 00010202
RAX: dffffc0000000000 RBX: ffff880061ba3b80 RCX: 1ffff1000c34d644
RDX: 0000000000000001 RSI: 0000000000000402 RDI: 0000000000000008
RBP: ffff88006bbae738 R08: 1ffff1000d775cad R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800630a8b40
R13: 0000000000000000 R14: 0000000000000402 R15: ffff880061ba3b80
FS:  0000000000000000(0000) GS:ffff88006c600000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007ff33cf89000 CR3: 0000000061c0a000 CR4: 00000000000006f0
Call Trace:
 usb_suspend_interface drivers/usb/core/driver.c:1209
 usb_suspend_both+0x27f/0x7e0 drivers/usb/core/driver.c:1314
 usb_runtime_suspend+0x41/0x120 drivers/usb/core/driver.c:1852
 __rpm_callback+0x339/0xb60 drivers/base/power/runtime.c:334
 rpm_callback+0x106/0x220 drivers/base/power/runtime.c:461
 rpm_suspend+0x465/0x1980 drivers/base/power/runtime.c:596
 __pm_runtime_suspend+0x11e/0x230 drivers/base/power/runtime.c:1009
 pm_runtime_put_sync_autosuspend ./include/linux/pm_runtime.h:251
 usb_new_device+0xa37/0x1020 drivers/usb/core/hub.c:2487
 hub_port_connect drivers/usb/core/hub.c:4903
 hub_port_connect_change drivers/usb/core/hub.c:5009
 port_event drivers/usb/core/hub.c:5115
 hub_event+0x194d/0x3740 drivers/usb/core/hub.c:5195
 process_one_work+0xc7f/0x1db0 kernel/workqueue.c:2119
 worker_thread+0x221/0x1850 kernel/workqueue.c:2253
 kthread+0x3a1/0x470 kernel/kthread.c:231
 ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:431
Code: 8d 7c 24 20 48 89 fa 48 c1 ea 03 80 3c 02 00 75 5b 48 b8 00 00
00 00 00 fc ff df 4d 8b 6c 24 20 49 8d 7d 08 48 89 fa 48 c1 ea 03 <80>
3c 02 00 75 34 4d 8b 6d 08 4d 85 ed 74 0b e8 26 2b 51 fd 4c
RIP: asix_suspend+0x76/0xc0 RSP: ffff88006bbae718
---[ end trace dfc4f5649284342c ]---

Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/asix_devices.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c
index b2ff88e69a81..3d4f7959dabb 100644
--- a/drivers/net/usb/asix_devices.c
+++ b/drivers/net/usb/asix_devices.c
@@ -626,7 +626,7 @@ static int asix_suspend(struct usb_interface *intf, pm_message_t message)
 	struct usbnet *dev = usb_get_intfdata(intf);
 	struct asix_common_private *priv = dev->driver_priv;
 
-	if (priv->suspend)
+	if (priv && priv->suspend)
 		priv->suspend(dev);
 
 	return usbnet_suspend(intf, message);
@@ -678,7 +678,7 @@ static int asix_resume(struct usb_interface *intf)
 	struct usbnet *dev = usb_get_intfdata(intf);
 	struct asix_common_private *priv = dev->driver_priv;
 
-	if (priv->resume)
+	if (priv && priv->resume)
 		priv->resume(dev);
 
 	return usbnet_resume(intf);

From c7e460ce55724d4e4e22d3126e5c47273819c53a Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 6 Nov 2017 13:47:18 -0800
Subject: [PATCH 19/31] Revert "net_sched: hold netns refcnt for each action"

This reverts commit ceffcc5e254b450e6159f173e4538215cebf1b59.
If we hold that refcnt, the netns can never be destroyed until
all actions are destroyed by user, this breaks our netns design
which we expect all actions are destroyed when we destroy the
whole netns.

Cc: Lucas Bates <lucasb@mojatatu.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h      | 4 +---
 net/sched/act_api.c        | 2 --
 net/sched/act_bpf.c        | 2 +-
 net/sched/act_connmark.c   | 2 +-
 net/sched/act_csum.c       | 2 +-
 net/sched/act_gact.c       | 2 +-
 net/sched/act_ife.c        | 2 +-
 net/sched/act_ipt.c        | 4 ++--
 net/sched/act_mirred.c     | 2 +-
 net/sched/act_nat.c        | 2 +-
 net/sched/act_pedit.c      | 2 +-
 net/sched/act_police.c     | 2 +-
 net/sched/act_sample.c     | 2 +-
 net/sched/act_simple.c     | 2 +-
 net/sched/act_skbedit.c    | 2 +-
 net/sched/act_skbmod.c     | 2 +-
 net/sched/act_tunnel_key.c | 2 +-
 net/sched/act_vlan.c       | 2 +-
 18 files changed, 18 insertions(+), 22 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 1e6df0eb058f..a10a3b1813f3 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -14,7 +14,6 @@
 struct tcf_idrinfo {
 	spinlock_t	lock;
 	struct idr	action_idr;
-	struct net	*net;
 };
 
 struct tc_action_ops;
@@ -106,7 +105,7 @@ struct tc_action_net {
 
 static inline
 int tc_action_net_init(struct tc_action_net *tn,
-		       const struct tc_action_ops *ops, struct net *net)
+		       const struct tc_action_ops *ops)
 {
 	int err = 0;
 
@@ -114,7 +113,6 @@ int tc_action_net_init(struct tc_action_net *tn,
 	if (!tn->idrinfo)
 		return -ENOMEM;
 	tn->ops = ops;
-	tn->idrinfo->net = net;
 	spin_lock_init(&tn->idrinfo->lock);
 	idr_init(&tn->idrinfo->action_idr);
 	return err;
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index ca2ff0b3123f..8f2c63514956 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -78,7 +78,6 @@ static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p)
 	spin_lock_bh(&idrinfo->lock);
 	idr_remove_ext(&idrinfo->action_idr, p->tcfa_index);
 	spin_unlock_bh(&idrinfo->lock);
-	put_net(idrinfo->net);
 	gen_kill_estimator(&p->tcfa_rate_est);
 	free_tcf(p);
 }
@@ -337,7 +336,6 @@ err3:
 	p->idrinfo = idrinfo;
 	p->ops = ops;
 	INIT_LIST_HEAD(&p->list);
-	get_net(idrinfo->net);
 	*a = p;
 	return 0;
 }
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 9bce8cc84cbb..c0c707eb2c96 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -398,7 +398,7 @@ static __net_init int bpf_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, bpf_net_id);
 
-	return tc_action_net_init(tn, &act_bpf_ops, net);
+	return tc_action_net_init(tn, &act_bpf_ops);
 }
 
 static void __net_exit bpf_exit_net(struct net *net)
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 34e52d01a5dd..10b7a8855a6c 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -206,7 +206,7 @@ static __net_init int connmark_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, connmark_net_id);
 
-	return tc_action_net_init(tn, &act_connmark_ops, net);
+	return tc_action_net_init(tn, &act_connmark_ops);
 }
 
 static void __net_exit connmark_exit_net(struct net *net)
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 35171df2ebef..1c40caadcff9 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -626,7 +626,7 @@ static __net_init int csum_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, csum_net_id);
 
-	return tc_action_net_init(tn, &act_csum_ops, net);
+	return tc_action_net_init(tn, &act_csum_ops);
 }
 
 static void __net_exit csum_exit_net(struct net *net)
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index ef7f7f39d26d..e29a48ef7fc3 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -232,7 +232,7 @@ static __net_init int gact_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, gact_net_id);
 
-	return tc_action_net_init(tn, &act_gact_ops, net);
+	return tc_action_net_init(tn, &act_gact_ops);
 }
 
 static void __net_exit gact_exit_net(struct net *net)
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index f65e4b5058e0..8ccd35825b6b 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -818,7 +818,7 @@ static __net_init int ife_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
 
-	return tc_action_net_init(tn, &act_ife_ops, net);
+	return tc_action_net_init(tn, &act_ife_ops);
 }
 
 static void __net_exit ife_exit_net(struct net *net)
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index dbdf3b2470d5..d9e399a7e3d5 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -334,7 +334,7 @@ static __net_init int ipt_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, ipt_net_id);
 
-	return tc_action_net_init(tn, &act_ipt_ops, net);
+	return tc_action_net_init(tn, &act_ipt_ops);
 }
 
 static void __net_exit ipt_exit_net(struct net *net)
@@ -384,7 +384,7 @@ static __net_init int xt_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, xt_net_id);
 
-	return tc_action_net_init(tn, &act_xt_ops, net);
+	return tc_action_net_init(tn, &act_xt_ops);
 }
 
 static void __net_exit xt_exit_net(struct net *net)
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 84759cfd5a33..416627c66f08 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -343,7 +343,7 @@ static __net_init int mirred_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, mirred_net_id);
 
-	return tc_action_net_init(tn, &act_mirred_ops, net);
+	return tc_action_net_init(tn, &act_mirred_ops);
 }
 
 static void __net_exit mirred_exit_net(struct net *net)
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 7eeaaf9217b6..c365d01b99c8 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -307,7 +307,7 @@ static __net_init int nat_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, nat_net_id);
 
-	return tc_action_net_init(tn, &act_nat_ops, net);
+	return tc_action_net_init(tn, &act_nat_ops);
 }
 
 static void __net_exit nat_exit_net(struct net *net)
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index b3d82c334a5f..491fe5deb09e 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -450,7 +450,7 @@ static __net_init int pedit_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, pedit_net_id);
 
-	return tc_action_net_init(tn, &act_pedit_ops, net);
+	return tc_action_net_init(tn, &act_pedit_ops);
 }
 
 static void __net_exit pedit_exit_net(struct net *net)
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 9ec42b26e4b9..3bb2ebf9e9ae 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -331,7 +331,7 @@ static __net_init int police_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, police_net_id);
 
-	return tc_action_net_init(tn, &act_police_ops, net);
+	return tc_action_net_init(tn, &act_police_ops);
 }
 
 static void __net_exit police_exit_net(struct net *net)
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index e69a1e3a39bf..8b5abcd2f32f 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -240,7 +240,7 @@ static __net_init int sample_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, sample_net_id);
 
-	return tc_action_net_init(tn, &act_sample_ops, net);
+	return tc_action_net_init(tn, &act_sample_ops);
 }
 
 static void __net_exit sample_exit_net(struct net *net)
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index a8d0ea95f894..e7b57e5071a3 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -201,7 +201,7 @@ static __net_init int simp_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, simp_net_id);
 
-	return tc_action_net_init(tn, &act_simp_ops, net);
+	return tc_action_net_init(tn, &act_simp_ops);
 }
 
 static void __net_exit simp_exit_net(struct net *net)
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index fbac62472e09..59949d61f20d 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -238,7 +238,7 @@ static __net_init int skbedit_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
 
-	return tc_action_net_init(tn, &act_skbedit_ops, net);
+	return tc_action_net_init(tn, &act_skbedit_ops);
 }
 
 static void __net_exit skbedit_exit_net(struct net *net)
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 8e12d8897d2f..b642ad3d39dd 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -263,7 +263,7 @@ static __net_init int skbmod_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
 
-	return tc_action_net_init(tn, &act_skbmod_ops, net);
+	return tc_action_net_init(tn, &act_skbmod_ops);
 }
 
 static void __net_exit skbmod_exit_net(struct net *net)
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index c33faa373cf2..30c96274c638 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -322,7 +322,7 @@ static __net_init int tunnel_key_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
 
-	return tc_action_net_init(tn, &act_tunnel_key_ops, net);
+	return tc_action_net_init(tn, &act_tunnel_key_ops);
 }
 
 static void __net_exit tunnel_key_exit_net(struct net *net)
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 115fc33cc6d8..16eb067a8d8f 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -269,7 +269,7 @@ static __net_init int vlan_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 
-	return tc_action_net_init(tn, &act_vlan_ops, net);
+	return tc_action_net_init(tn, &act_vlan_ops);
 }
 
 static void __net_exit vlan_exit_net(struct net *net)

From e4b95c41df36befcfd117210900cd790bc2cd048 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 6 Nov 2017 13:47:19 -0800
Subject: [PATCH 20/31] net_sched: introduce tcf_exts_get_net() and
 tcf_exts_put_net()

Instead of holding netns refcnt in tc actions, we can minimize
the holding time by saving it in struct tcf_exts instead. This
means we can just hold netns refcnt right before call_rcu() and
release it after tcf_exts_destroy() is done.

However, because on netns cleanup path we call tcf_proto_destroy()
too, obviously we can not hold netns for a zero refcnt, in this
case we have to do cleanup synchronously. It is fine for RCU too,
the caller cleanup_net() already waits for a grace period.

For other cases, refcnt is non-zero and we can safely grab it as
normal and release it after we are done.

This patch provides two new API for each filter to use:
tcf_exts_get_net() and tcf_exts_put_net(). And all filters now can
use the following pattern:

void __destroy_filter() {
  tcf_exts_destroy();
  tcf_exts_put_net();  // <== release netns refcnt
  kfree();
}
void some_work() {
  rtnl_lock();
  __destroy_filter();
  rtnl_unlock();
}
void some_rcu_callback() {
  tcf_queue_work(some_work);
}

if (tcf_exts_get_net())  // <== hold netns refcnt
  call_rcu(some_rcu_callback);
else
  __destroy_filter();

Cc: Lucas Bates <lucasb@mojatatu.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h | 24 ++++++++++++++++++++++++
 net/sched/cls_api.c   |  1 +
 2 files changed, 25 insertions(+)

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 70ca2437740e..8826747ef83e 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -94,6 +94,7 @@ struct tcf_exts {
 	__u32	type; /* for backward compat(TCA_OLD_COMPAT) */
 	int nr_actions;
 	struct tc_action **actions;
+	struct net *net;
 #endif
 	/* Map to export classifier specific extension TLV types to the
 	 * generic extensions API. Unsupported extensions must be set to 0.
@@ -107,6 +108,7 @@ static inline int tcf_exts_init(struct tcf_exts *exts, int action, int police)
 #ifdef CONFIG_NET_CLS_ACT
 	exts->type = 0;
 	exts->nr_actions = 0;
+	exts->net = NULL;
 	exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *),
 				GFP_KERNEL);
 	if (!exts->actions)
@@ -117,6 +119,28 @@ static inline int tcf_exts_init(struct tcf_exts *exts, int action, int police)
 	return 0;
 }
 
+/* Return false if the netns is being destroyed in cleanup_net(). Callers
+ * need to do cleanup synchronously in this case, otherwise may race with
+ * tc_action_net_exit(). Return true for other cases.
+ */
+static inline bool tcf_exts_get_net(struct tcf_exts *exts)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	exts->net = maybe_get_net(exts->net);
+	return exts->net != NULL;
+#else
+	return true;
+#endif
+}
+
+static inline void tcf_exts_put_net(struct tcf_exts *exts)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	if (exts->net)
+		put_net(exts->net);
+#endif
+}
+
 static inline void tcf_exts_to_list(const struct tcf_exts *exts,
 				    struct list_head *actions)
 {
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index b2d310745487..ecbb019efcbd 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -927,6 +927,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 				exts->actions[i++] = act;
 			exts->nr_actions = i;
 		}
+		exts->net = net;
 	}
 #else
 	if ((exts->action && tb[exts->action]) ||

From 0b2a59894b7657fab46b50f176bd772aa495044f Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 6 Nov 2017 13:47:20 -0800
Subject: [PATCH 21/31] cls_basic: use tcf_exts_get_net() before call_rcu()

Hold netns refcnt before call_rcu() and release it after
the tcf_exts_destroy() is done.

Note, on ->destroy() path we have to respect the return value
of tcf_exts_get_net(), on other paths it should always return
true, so we don't need to care.

Cc: Lucas Bates <lucasb@mojatatu.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_basic.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index f177649a2419..e43c56d5b96a 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -85,16 +85,21 @@ static int basic_init(struct tcf_proto *tp)
 	return 0;
 }
 
+static void __basic_delete_filter(struct basic_filter *f)
+{
+	tcf_exts_destroy(&f->exts);
+	tcf_em_tree_destroy(&f->ematches);
+	tcf_exts_put_net(&f->exts);
+	kfree(f);
+}
+
 static void basic_delete_filter_work(struct work_struct *work)
 {
 	struct basic_filter *f = container_of(work, struct basic_filter, work);
 
 	rtnl_lock();
-	tcf_exts_destroy(&f->exts);
-	tcf_em_tree_destroy(&f->ematches);
+	__basic_delete_filter(f);
 	rtnl_unlock();
-
-	kfree(f);
 }
 
 static void basic_delete_filter(struct rcu_head *head)
@@ -113,7 +118,10 @@ static void basic_destroy(struct tcf_proto *tp)
 	list_for_each_entry_safe(f, n, &head->flist, link) {
 		list_del_rcu(&f->link);
 		tcf_unbind_filter(tp, &f->res);
-		call_rcu(&f->rcu, basic_delete_filter);
+		if (tcf_exts_get_net(&f->exts))
+			call_rcu(&f->rcu, basic_delete_filter);
+		else
+			__basic_delete_filter(f);
 	}
 	kfree_rcu(head, rcu);
 }
@@ -125,6 +133,7 @@ static int basic_delete(struct tcf_proto *tp, void *arg, bool *last)
 
 	list_del_rcu(&f->link);
 	tcf_unbind_filter(tp, &f->res);
+	tcf_exts_get_net(&f->exts);
 	call_rcu(&f->rcu, basic_delete_filter);
 	*last = list_empty(&head->flist);
 	return 0;
@@ -219,6 +228,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
 	if (fold) {
 		list_replace_rcu(&fold->link, &fnew->link);
 		tcf_unbind_filter(tp, &fold->res);
+		tcf_exts_get_net(&fold->exts);
 		call_rcu(&fold->rcu, basic_delete_filter);
 	} else {
 		list_add_rcu(&fnew->link, &head->flist);

From aae2c35ec89252639a97769fa72dbbf8f1cc3107 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 6 Nov 2017 13:47:21 -0800
Subject: [PATCH 22/31] cls_bpf: use tcf_exts_get_net() before call_rcu()

Hold netns refcnt before call_rcu() and release it after
the tcf_exts_destroy() is done.

Note, on ->destroy() path we have to respect the return value
of tcf_exts_get_net(), on other paths it should always return
true, so we don't need to care.

Cc: Lucas Bates <lucasb@mojatatu.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_bpf.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 037a3ae86829..990eb4d91d54 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -249,6 +249,7 @@ static int cls_bpf_init(struct tcf_proto *tp)
 static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
 {
 	tcf_exts_destroy(&prog->exts);
+	tcf_exts_put_net(&prog->exts);
 
 	if (cls_bpf_is_ebpf(prog))
 		bpf_prog_put(prog->filter);
@@ -282,7 +283,10 @@ static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)
 	cls_bpf_stop_offload(tp, prog);
 	list_del_rcu(&prog->link);
 	tcf_unbind_filter(tp, &prog->res);
-	call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu);
+	if (tcf_exts_get_net(&prog->exts))
+		call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu);
+	else
+		__cls_bpf_delete_prog(prog);
 }
 
 static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last)
@@ -516,6 +520,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
 	if (oldprog) {
 		list_replace_rcu(&oldprog->link, &prog->link);
 		tcf_unbind_filter(tp, &oldprog->res);
+		tcf_exts_get_net(&oldprog->exts);
 		call_rcu(&oldprog->rcu, cls_bpf_delete_prog_rcu);
 	} else {
 		list_add_rcu(&prog->link, &head->plist);

From ed1481681414e4d4264ad46864d5c1da5ff6ccb1 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 6 Nov 2017 13:47:22 -0800
Subject: [PATCH 23/31] cls_cgroup: use tcf_exts_get_net() before call_rcu()

Hold netns refcnt before call_rcu() and release it after
the tcf_exts_destroy() is done.

Note, on ->destroy() path we have to respect the return value
of tcf_exts_get_net(), on other paths it should always return
true, so we don't need to care.

Cc: Lucas Bates <lucasb@mojatatu.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_cgroup.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index a97e069bee89..309d5899265f 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -60,15 +60,21 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
 	[TCA_CGROUP_EMATCHES]	= { .type = NLA_NESTED },
 };
 
+static void __cls_cgroup_destroy(struct cls_cgroup_head *head)
+{
+	tcf_exts_destroy(&head->exts);
+	tcf_em_tree_destroy(&head->ematches);
+	tcf_exts_put_net(&head->exts);
+	kfree(head);
+}
+
 static void cls_cgroup_destroy_work(struct work_struct *work)
 {
 	struct cls_cgroup_head *head = container_of(work,
 						    struct cls_cgroup_head,
 						    work);
 	rtnl_lock();
-	tcf_exts_destroy(&head->exts);
-	tcf_em_tree_destroy(&head->ematches);
-	kfree(head);
+	__cls_cgroup_destroy(head);
 	rtnl_unlock();
 }
 
@@ -124,8 +130,10 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
 		goto errout;
 
 	rcu_assign_pointer(tp->root, new);
-	if (head)
+	if (head) {
+		tcf_exts_get_net(&head->exts);
 		call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
+	}
 	return 0;
 errout:
 	tcf_exts_destroy(&new->exts);
@@ -138,8 +146,12 @@ static void cls_cgroup_destroy(struct tcf_proto *tp)
 	struct cls_cgroup_head *head = rtnl_dereference(tp->root);
 
 	/* Head can still be NULL due to cls_cgroup_init(). */
-	if (head)
-		call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
+	if (head) {
+		if (tcf_exts_get_net(&head->exts))
+			call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
+		else
+			__cls_cgroup_destroy(head);
+	}
 }
 
 static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last)

From 22f7cec93f0af86c4b66bf34a977da9d7cef076e Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 6 Nov 2017 13:47:23 -0800
Subject: [PATCH 24/31] cls_flow: use tcf_exts_get_net() before call_rcu()

Hold netns refcnt before call_rcu() and release it after
the tcf_exts_destroy() is done.

Note, on ->destroy() path we have to respect the return value
of tcf_exts_get_net(), on other paths it should always return
true, so we don't need to care.

Cc: Lucas Bates <lucasb@mojatatu.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_flow.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 67f3a2af6aab..85f765cff697 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -372,15 +372,21 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
 	[TCA_FLOW_PERTURB]	= { .type = NLA_U32 },
 };
 
+static void __flow_destroy_filter(struct flow_filter *f)
+{
+	del_timer_sync(&f->perturb_timer);
+	tcf_exts_destroy(&f->exts);
+	tcf_em_tree_destroy(&f->ematches);
+	tcf_exts_put_net(&f->exts);
+	kfree(f);
+}
+
 static void flow_destroy_filter_work(struct work_struct *work)
 {
 	struct flow_filter *f = container_of(work, struct flow_filter, work);
 
 	rtnl_lock();
-	del_timer_sync(&f->perturb_timer);
-	tcf_exts_destroy(&f->exts);
-	tcf_em_tree_destroy(&f->ematches);
-	kfree(f);
+	__flow_destroy_filter(f);
 	rtnl_unlock();
 }
 
@@ -552,8 +558,10 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 
 	*arg = fnew;
 
-	if (fold)
+	if (fold) {
+		tcf_exts_get_net(&fold->exts);
 		call_rcu(&fold->rcu, flow_destroy_filter);
+	}
 	return 0;
 
 err2:
@@ -570,6 +578,7 @@ static int flow_delete(struct tcf_proto *tp, void *arg, bool *last)
 	struct flow_filter *f = arg;
 
 	list_del_rcu(&f->list);
+	tcf_exts_get_net(&f->exts);
 	call_rcu(&f->rcu, flow_destroy_filter);
 	*last = list_empty(&head->filters);
 	return 0;
@@ -594,7 +603,10 @@ static void flow_destroy(struct tcf_proto *tp)
 
 	list_for_each_entry_safe(f, next, &head->filters, list) {
 		list_del_rcu(&f->list);
-		call_rcu(&f->rcu, flow_destroy_filter);
+		if (tcf_exts_get_net(&f->exts))
+			call_rcu(&f->rcu, flow_destroy_filter);
+		else
+			__flow_destroy_filter(f);
 	}
 	kfree_rcu(head, rcu);
 }

From 0dadc117ac8bc78d8144e862ac8ad23f342f9ea8 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 6 Nov 2017 13:47:24 -0800
Subject: [PATCH 25/31] cls_flower: use tcf_exts_get_net() before call_rcu()

Hold netns refcnt before call_rcu() and release it after
the tcf_exts_destroy() is done.

Note, on ->destroy() path we have to respect the return value
of tcf_exts_get_net(), on other paths it should always return
true, so we don't need to care.

Cc: Lucas Bates <lucasb@mojatatu.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_flower.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 5b5722c8b32c..7a838d1c1c00 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -218,13 +218,19 @@ static int fl_init(struct tcf_proto *tp)
 	return 0;
 }
 
+static void __fl_destroy_filter(struct cls_fl_filter *f)
+{
+	tcf_exts_destroy(&f->exts);
+	tcf_exts_put_net(&f->exts);
+	kfree(f);
+}
+
 static void fl_destroy_filter_work(struct work_struct *work)
 {
 	struct cls_fl_filter *f = container_of(work, struct cls_fl_filter, work);
 
 	rtnl_lock();
-	tcf_exts_destroy(&f->exts);
-	kfree(f);
+	__fl_destroy_filter(f);
 	rtnl_unlock();
 }
 
@@ -318,7 +324,10 @@ static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f)
 	if (!tc_skip_hw(f->flags))
 		fl_hw_destroy_filter(tp, f);
 	tcf_unbind_filter(tp, &f->res);
-	call_rcu(&f->rcu, fl_destroy_filter);
+	if (tcf_exts_get_net(&f->exts))
+		call_rcu(&f->rcu, fl_destroy_filter);
+	else
+		__fl_destroy_filter(f);
 }
 
 static void fl_destroy_sleepable(struct work_struct *work)
@@ -988,6 +997,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 		idr_replace_ext(&head->handle_idr, fnew, fnew->handle);
 		list_replace_rcu(&fold->list, &fnew->list);
 		tcf_unbind_filter(tp, &fold->res);
+		tcf_exts_get_net(&fold->exts);
 		call_rcu(&fold->rcu, fl_destroy_filter);
 	} else {
 		list_add_tail_rcu(&fnew->list, &head->filters);

From d5f984f5af1d926bc9c7a7f90e7a1e1e313a8ba7 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 6 Nov 2017 13:47:25 -0800
Subject: [PATCH 26/31] cls_fw: use tcf_exts_get_net() before call_rcu()

Hold netns refcnt before call_rcu() and release it after
the tcf_exts_destroy() is done.

Note, on ->destroy() path we have to respect the return value
of tcf_exts_get_net(), on other paths it should always return
true, so we don't need to care.

Cc: Lucas Bates <lucasb@mojatatu.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_fw.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 99183b8621ec..7f45e5ab8afc 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -122,13 +122,19 @@ static int fw_init(struct tcf_proto *tp)
 	return 0;
 }
 
+static void __fw_delete_filter(struct fw_filter *f)
+{
+	tcf_exts_destroy(&f->exts);
+	tcf_exts_put_net(&f->exts);
+	kfree(f);
+}
+
 static void fw_delete_filter_work(struct work_struct *work)
 {
 	struct fw_filter *f = container_of(work, struct fw_filter, work);
 
 	rtnl_lock();
-	tcf_exts_destroy(&f->exts);
-	kfree(f);
+	__fw_delete_filter(f);
 	rtnl_unlock();
 }
 
@@ -154,7 +160,10 @@ static void fw_destroy(struct tcf_proto *tp)
 			RCU_INIT_POINTER(head->ht[h],
 					 rtnl_dereference(f->next));
 			tcf_unbind_filter(tp, &f->res);
-			call_rcu(&f->rcu, fw_delete_filter);
+			if (tcf_exts_get_net(&f->exts))
+				call_rcu(&f->rcu, fw_delete_filter);
+			else
+				__fw_delete_filter(f);
 		}
 	}
 	kfree_rcu(head, rcu);
@@ -179,6 +188,7 @@ static int fw_delete(struct tcf_proto *tp, void *arg, bool *last)
 		if (pfp == f) {
 			RCU_INIT_POINTER(*fp, rtnl_dereference(f->next));
 			tcf_unbind_filter(tp, &f->res);
+			tcf_exts_get_net(&f->exts);
 			call_rcu(&f->rcu, fw_delete_filter);
 			ret = 0;
 			break;
@@ -299,6 +309,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
 		RCU_INIT_POINTER(fnew->next, rtnl_dereference(pfp->next));
 		rcu_assign_pointer(*fp, fnew);
 		tcf_unbind_filter(tp, &f->res);
+		tcf_exts_get_net(&f->exts);
 		call_rcu(&f->rcu, fw_delete_filter);
 
 		*arg = fnew;

From 57767e785321a427b8cdd282db2b8b33cd218ffa Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 6 Nov 2017 13:47:26 -0800
Subject: [PATCH 27/31] cls_matchall: use tcf_exts_get_net() before call_rcu()

Hold netns refcnt before call_rcu() and release it after
the tcf_exts_destroy() is done.

Cc: Lucas Bates <lucasb@mojatatu.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_matchall.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index c33f711b9019..3684153cd8a9 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -44,13 +44,19 @@ static int mall_init(struct tcf_proto *tp)
 	return 0;
 }
 
+static void __mall_destroy(struct cls_mall_head *head)
+{
+	tcf_exts_destroy(&head->exts);
+	tcf_exts_put_net(&head->exts);
+	kfree(head);
+}
+
 static void mall_destroy_work(struct work_struct *work)
 {
 	struct cls_mall_head *head = container_of(work, struct cls_mall_head,
 						  work);
 	rtnl_lock();
-	tcf_exts_destroy(&head->exts);
-	kfree(head);
+	__mall_destroy(head);
 	rtnl_unlock();
 }
 
@@ -109,7 +115,10 @@ static void mall_destroy(struct tcf_proto *tp)
 	if (tc_should_offload(dev, head->flags))
 		mall_destroy_hw_filter(tp, head, (unsigned long) head);
 
-	call_rcu(&head->rcu, mall_destroy_rcu);
+	if (tcf_exts_get_net(&head->exts))
+		call_rcu(&head->rcu, mall_destroy_rcu);
+	else
+		__mall_destroy(head);
 }
 
 static void *mall_get(struct tcf_proto *tp, u32 handle)

From 3fd51de5e3ba447624a08a8ba29f90d94f0fe909 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 6 Nov 2017 13:47:27 -0800
Subject: [PATCH 28/31] cls_route: use tcf_exts_get_net() before call_rcu()

Hold netns refcnt before call_rcu() and release it after
the tcf_exts_destroy() is done.

Note, on ->destroy() path we have to respect the return value
of tcf_exts_get_net(), on other paths it should always return
true, so we don't need to care.

Cc: Lucas Bates <lucasb@mojatatu.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_route.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 4b14ccd8b8f2..ac9a5b8825b9 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -257,13 +257,19 @@ static int route4_init(struct tcf_proto *tp)
 	return 0;
 }
 
+static void __route4_delete_filter(struct route4_filter *f)
+{
+	tcf_exts_destroy(&f->exts);
+	tcf_exts_put_net(&f->exts);
+	kfree(f);
+}
+
 static void route4_delete_filter_work(struct work_struct *work)
 {
 	struct route4_filter *f = container_of(work, struct route4_filter, work);
 
 	rtnl_lock();
-	tcf_exts_destroy(&f->exts);
-	kfree(f);
+	__route4_delete_filter(f);
 	rtnl_unlock();
 }
 
@@ -297,7 +303,10 @@ static void route4_destroy(struct tcf_proto *tp)
 					next = rtnl_dereference(f->next);
 					RCU_INIT_POINTER(b->ht[h2], next);
 					tcf_unbind_filter(tp, &f->res);
-					call_rcu(&f->rcu, route4_delete_filter);
+					if (tcf_exts_get_net(&f->exts))
+						call_rcu(&f->rcu, route4_delete_filter);
+					else
+						__route4_delete_filter(f);
 				}
 			}
 			RCU_INIT_POINTER(head->table[h1], NULL);
@@ -338,6 +347,7 @@ static int route4_delete(struct tcf_proto *tp, void *arg, bool *last)
 
 			/* Delete it */
 			tcf_unbind_filter(tp, &f->res);
+			tcf_exts_get_net(&f->exts);
 			call_rcu(&f->rcu, route4_delete_filter);
 
 			/* Strip RTNL protected tree */
@@ -541,6 +551,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
 	*arg = f;
 	if (fold) {
 		tcf_unbind_filter(tp, &fold->res);
+		tcf_exts_get_net(&fold->exts);
 		call_rcu(&fold->rcu, route4_delete_filter);
 	}
 	return 0;

From 96585063a27f0704dcf7a09f8b78edd6a8973965 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 6 Nov 2017 13:47:28 -0800
Subject: [PATCH 29/31] cls_rsvp: use tcf_exts_get_net() before call_rcu()

Hold netns refcnt before call_rcu() and release it after
the tcf_exts_destroy() is done.

Note, on ->destroy() path we have to respect the return value
of tcf_exts_get_net(), on other paths it should always return
true, so we don't need to care.

Cc: Lucas Bates <lucasb@mojatatu.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_rsvp.h | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index bdbc541787f8..cf325625c99d 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -285,13 +285,19 @@ static int rsvp_init(struct tcf_proto *tp)
 	return -ENOBUFS;
 }
 
+static void __rsvp_delete_filter(struct rsvp_filter *f)
+{
+	tcf_exts_destroy(&f->exts);
+	tcf_exts_put_net(&f->exts);
+	kfree(f);
+}
+
 static void rsvp_delete_filter_work(struct work_struct *work)
 {
 	struct rsvp_filter *f = container_of(work, struct rsvp_filter, work);
 
 	rtnl_lock();
-	tcf_exts_destroy(&f->exts);
-	kfree(f);
+	__rsvp_delete_filter(f);
 	rtnl_unlock();
 }
 
@@ -310,7 +316,10 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
 	 * grace period, since converted-to-rcu actions are relying on that
 	 * in cleanup() callback
 	 */
-	call_rcu(&f->rcu, rsvp_delete_filter_rcu);
+	if (tcf_exts_get_net(&f->exts))
+		call_rcu(&f->rcu, rsvp_delete_filter_rcu);
+	else
+		__rsvp_delete_filter(f);
 }
 
 static void rsvp_destroy(struct tcf_proto *tp)

From f2b751053ee9314e82c178f6ca0fee7e160fac95 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 6 Nov 2017 13:47:29 -0800
Subject: [PATCH 30/31] cls_tcindex: use tcf_exts_get_net() before call_rcu()

Hold netns refcnt before call_rcu() and release it after
the tcf_exts_destroy() is done.

Note, on ->destroy() path we have to respect the return value
of tcf_exts_get_net(), on other paths it should always return
true, so we don't need to care.

Cc: Lucas Bates <lucasb@mojatatu.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_tcindex.c | 33 ++++++++++++++++++++++++++-------
 1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index beaa95e09c25..a76937ee0b2d 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -139,13 +139,19 @@ static int tcindex_init(struct tcf_proto *tp)
 	return 0;
 }
 
+static void __tcindex_destroy_rexts(struct tcindex_filter_result *r)
+{
+	tcf_exts_destroy(&r->exts);
+	tcf_exts_put_net(&r->exts);
+}
+
 static void tcindex_destroy_rexts_work(struct work_struct *work)
 {
 	struct tcindex_filter_result *r;
 
 	r = container_of(work, struct tcindex_filter_result, work);
 	rtnl_lock();
-	tcf_exts_destroy(&r->exts);
+	__tcindex_destroy_rexts(r);
 	rtnl_unlock();
 }
 
@@ -158,14 +164,20 @@ static void tcindex_destroy_rexts(struct rcu_head *head)
 	tcf_queue_work(&r->work);
 }
 
+static void __tcindex_destroy_fexts(struct tcindex_filter *f)
+{
+	tcf_exts_destroy(&f->result.exts);
+	tcf_exts_put_net(&f->result.exts);
+	kfree(f);
+}
+
 static void tcindex_destroy_fexts_work(struct work_struct *work)
 {
 	struct tcindex_filter *f = container_of(work, struct tcindex_filter,
 						work);
 
 	rtnl_lock();
-	tcf_exts_destroy(&f->result.exts);
-	kfree(f);
+	__tcindex_destroy_fexts(f);
 	rtnl_unlock();
 }
 
@@ -210,10 +222,17 @@ found:
 	 * grace period, since converted-to-rcu actions are relying on that
 	 * in cleanup() callback
 	 */
-	if (f)
-		call_rcu(&f->rcu, tcindex_destroy_fexts);
-	else
-		call_rcu(&r->rcu, tcindex_destroy_rexts);
+	if (f) {
+		if (tcf_exts_get_net(&f->result.exts))
+			call_rcu(&f->rcu, tcindex_destroy_fexts);
+		else
+			__tcindex_destroy_fexts(f);
+	} else {
+		if (tcf_exts_get_net(&r->exts))
+			call_rcu(&r->rcu, tcindex_destroy_rexts);
+		else
+			__tcindex_destroy_rexts(r);
+	}
 
 	*last = false;
 	return 0;

From 35c55fc156d85a396a975fc17636f560fc02fd65 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 6 Nov 2017 13:47:30 -0800
Subject: [PATCH 31/31] cls_u32: use tcf_exts_get_net() before call_rcu()

Hold netns refcnt before call_rcu() and release it after
the tcf_exts_destroy() is done.

Note, on ->destroy() path we have to respect the return value
of tcf_exts_get_net(), on other paths it should always return
true, so we don't need to care.

Cc: Lucas Bates <lucasb@mojatatu.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_u32.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index dadd1b344497..b58eccb21f03 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -399,6 +399,7 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
 			   bool free_pf)
 {
 	tcf_exts_destroy(&n->exts);
+	tcf_exts_put_net(&n->exts);
 	if (n->ht_down)
 		n->ht_down->refcnt--;
 #ifdef CONFIG_CLS_U32_PERF
@@ -476,6 +477,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
 				RCU_INIT_POINTER(*kp, key->next);
 
 				tcf_unbind_filter(tp, &key->res);
+				tcf_exts_get_net(&key->exts);
 				call_rcu(&key->rcu, u32_delete_key_freepf_rcu);
 				return 0;
 			}
@@ -588,7 +590,10 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
 					 rtnl_dereference(n->next));
 			tcf_unbind_filter(tp, &n->res);
 			u32_remove_hw_knode(tp, n->handle);
-			call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
+			if (tcf_exts_get_net(&n->exts))
+				call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
+			else
+				u32_destroy_key(n->tp, n, true);
 		}
 	}
 }
@@ -949,6 +954,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 
 		u32_replace_knode(tp, tp_c, new);
 		tcf_unbind_filter(tp, &n->res);
+		tcf_exts_get_net(&n->exts);
 		call_rcu(&n->rcu, u32_delete_key_rcu);
 		return 0;
 	}