From 8f67c90ee9148eab3d2b4393c3cf76489b27f87c Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Fri, 22 Feb 2019 12:33:25 +0100
Subject: [PATCH 01/44] net/sched: act_ipt: fix refcount leak when replace
 fails

After commit 4e8ddd7f1758 ("net: sched: don't release reference on action
overwrite"), the error path of all actions was converted to drop refcount
also when the action was being overwritten. But we forgot act_ipt_init(),
in case allocation of 'tname' was not successful:

 # tc action add action xt -j LOG --log-prefix hello index 100
 tablename: mangle hook: NF_IP_POST_ROUTING
         target:  LOG level warning prefix "hello" index 100
 # tc action show action xt
 total acts 1

         action order 0: tablename: mangle  hook: NF_IP_POST_ROUTING
         target  LOG level warning prefix "hello"
         index 100 ref 1 bind 0
 # tc action replace action xt -j LOG --log-prefix world index 100
 tablename: mangle hook: NF_IP_POST_ROUTING
         target:  LOG level warning prefix "world" index 100
 RTNETLINK answers: Cannot allocate memory
 We have an error talking to the kernel
 # tc action show action xt
 total acts 1

         action order 0: tablename: mangle  hook: NF_IP_POST_ROUTING
         target  LOG level warning prefix "hello"
         index 100 ref 2 bind 0

Ensure we call tcf_idr_release(), in case 'tname' allocation failed, also
when the action is being replaced.

Fixes: 4e8ddd7f1758 ("net: sched: don't release reference on action overwrite")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_ipt.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 8af6c11d2482..faa1addf89b3 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -199,8 +199,7 @@ err3:
 err2:
 	kfree(tname);
 err1:
-	if (ret == ACT_P_CREATED)
-		tcf_idr_release(*a, bind);
+	tcf_idr_release(*a, bind);
 	return err;
 }
 

From 6191da98062d25276a3b88fb2a94dcbcfb3ea65d Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Fri, 22 Feb 2019 12:33:26 +0100
Subject: [PATCH 02/44] net/sched: act_skbedit: fix refcount leak when replace
 fails

when act_skbedit was converted to use RCU in the data plane, we added an
error path, but we forgot to drop the action refcount in case of failure
during a 'replace' operation:

 # tc actions add action skbedit ptype otherhost pass index 100
 # tc action show action skbedit
 total acts 1

         action order 0: skbedit  ptype otherhost pass
          index 100 ref 1 bind 0
 # tc actions replace action skbedit ptype otherhost drop index 100
 RTNETLINK answers: Cannot allocate memory
 We have an error talking to the kernel
 # tc action show action skbedit
 total acts 1

         action order 0: skbedit  ptype otherhost pass
          index 100 ref 2 bind 0

Ensure we call tcf_idr_release(), in case 'params_new' allocation failed,
also when the action is being replaced.

Fixes: c749cdda9089 ("net/sched: act_skbedit: don't use spinlock in the data path")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_skbedit.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 64dba3708fce..cfceed28c333 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -189,8 +189,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 
 	params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
 	if (unlikely(!params_new)) {
-		if (ret == ACT_P_CREATED)
-			tcf_idr_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		return -ENOMEM;
 	}
 

From cffde20164d2a1e7d647b63b4d8fac11bf48b500 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Fri, 22 Feb 2019 20:11:13 +0100
Subject: [PATCH 03/44] net: dsa: lantiq: Add GPHY firmware files

This adds the file names of the FW files which this driver handles into
the module description.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/lantiq_gswip.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c
index 693a67f45bef..ddc1f9ca8ebc 100644
--- a/drivers/net/dsa/lantiq_gswip.c
+++ b/drivers/net/dsa/lantiq_gswip.c
@@ -1162,6 +1162,12 @@ static struct platform_driver gswip_driver = {
 
 module_platform_driver(gswip_driver);
 
+MODULE_FIRMWARE("lantiq/xrx300_phy11g_a21.bin");
+MODULE_FIRMWARE("lantiq/xrx300_phy22f_a21.bin");
+MODULE_FIRMWARE("lantiq/xrx200_phy11g_a14.bin");
+MODULE_FIRMWARE("lantiq/xrx200_phy11g_a22.bin");
+MODULE_FIRMWARE("lantiq/xrx200_phy22f_a14.bin");
+MODULE_FIRMWARE("lantiq/xrx200_phy22f_a22.bin");
 MODULE_AUTHOR("Hauke Mehrtens <hauke@hauke-m.de>");
 MODULE_DESCRIPTION("Lantiq / Intel GSWIP driver");
 MODULE_LICENSE("GPL v2");

From 71828b2240692cec0e68b8d867bc00e1745e7fae Mon Sep 17 00:00:00 2001
From: Timur Celik <mail@timurcelik.de>
Date: Sat, 23 Feb 2019 12:53:13 +0100
Subject: [PATCH 04/44] tun: fix blocking read

This patch moves setting of the current state into the loop. Otherwise
the task may end up in a busy wait loop if none of the break conditions
are met.

Signed-off-by: Timur Celik <mail@timurcelik.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index fed298c0cb39..d291762b9e9d 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2167,9 +2167,9 @@ static void *tun_ring_recv(struct tun_file *tfile, int noblock, int *err)
 	}
 
 	add_wait_queue(&tfile->wq.wait, &wait);
-	current->state = TASK_INTERRUPTIBLE;
 
 	while (1) {
+		set_current_state(TASK_INTERRUPTIBLE);
 		ptr = ptr_ring_consume(&tfile->tx_ring);
 		if (ptr)
 			break;
@@ -2185,7 +2185,7 @@ static void *tun_ring_recv(struct tun_file *tfile, int noblock, int *err)
 		schedule();
 	}
 
-	current->state = TASK_RUNNING;
+	set_current_state(TASK_RUNNING);
 	remove_wait_queue(&tfile->wq.wait, &wait);
 
 out:

From 9919a363a5cb57c2b64c4803b4d2dd45e90bf230 Mon Sep 17 00:00:00 2001
From: Wen Yang <wen.yang99@zte.com.cn>
Date: Mon, 25 Feb 2019 15:22:19 +0800
Subject: [PATCH 05/44] net: dsa: fix a leaked reference by adding missing
 of_node_put

The call to of_parse_phandle returns a node pointer with refcount
incremented thus it must be explicitly decremented after the last
usage.

Detected by coccinelle with the following warnings:
./net/dsa/port.c:294:1-7: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 284, but without a corresponding object release within this function.
./net/dsa/dsa2.c:627:3-9: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 618, but without a corresponding object release within this function.
./net/dsa/dsa2.c:630:3-9: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 618, but without a corresponding object release within this function.
./net/dsa/dsa2.c:636:3-9: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 618, but without a corresponding object release within this function.
./net/dsa/dsa2.c:639:1-7: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 618, but without a corresponding object release within this function.

Signed-off-by: Wen Yang <wen.yang99@zte.com.cn>
Reviewed-by: Vivien Didelot <vivien.didelot@gmail.com>
Cc: Andrew Lunn <andrew@lunn.ch>
Cc: Vivien Didelot <vivien.didelot@gmail.com>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Vivien Didelot <vivien.didelot@gmail.com>
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/dsa2.c | 16 ++++++++++------
 net/dsa/port.c |  1 +
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index a1917025e155..410f19148106 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -612,8 +612,8 @@ static int dsa_switch_parse_ports_of(struct dsa_switch *ds,
 {
 	struct device_node *ports, *port;
 	struct dsa_port *dp;
+	int err = 0;
 	u32 reg;
-	int err;
 
 	ports = of_get_child_by_name(dn, "ports");
 	if (!ports) {
@@ -624,19 +624,23 @@ static int dsa_switch_parse_ports_of(struct dsa_switch *ds,
 	for_each_available_child_of_node(ports, port) {
 		err = of_property_read_u32(port, "reg", &reg);
 		if (err)
-			return err;
+			goto out_put_node;
 
-		if (reg >= ds->num_ports)
-			return -EINVAL;
+		if (reg >= ds->num_ports) {
+			err = -EINVAL;
+			goto out_put_node;
+		}
 
 		dp = &ds->ports[reg];
 
 		err = dsa_port_parse_of(dp, port);
 		if (err)
-			return err;
+			goto out_put_node;
 	}
 
-	return 0;
+out_put_node:
+	of_node_put(ports);
+	return err;
 }
 
 static int dsa_switch_parse_member_of(struct dsa_switch *ds,
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 2a2a878b5ce3..c2261697ee83 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -292,6 +292,7 @@ static struct phy_device *dsa_port_get_phy_device(struct dsa_port *dp)
 		return ERR_PTR(-EPROBE_DEFER);
 	}
 
+	of_node_put(phy_dn);
 	return phydev;
 }
 

From a3df633a3c92bb96b06552c3f828d7c267774379 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 25 Feb 2019 17:28:27 +0200
Subject: [PATCH 06/44] net: sched: act_tunnel_key: fix NULL pointer
 dereference during init

Metadata pointer is only initialized for action TCA_TUNNEL_KEY_ACT_SET, but
it is unconditionally dereferenced in tunnel_key_init() error handler.
Verify that metadata pointer is not NULL before dereferencing it in
tunnel_key_init error handling code.

Fixes: ee28bb56ac5b ("net/sched: fix memory leak in act_tunnel_key_init()")
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Reviewed-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_tunnel_key.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 8b43fe0130f7..3f943de9a2c9 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -377,7 +377,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 	return ret;
 
 release_tun_meta:
-	dst_release(&metadata->dst);
+	if (metadata)
+		dst_release(&metadata->dst);
 
 err_out:
 	if (exists)

From ff7b11aa481f682e0e9711abfeb7d03f5cd612bf Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 21 Feb 2019 14:13:56 -0800
Subject: [PATCH 07/44] net: socket: set sock->sk to NULL after calling
 proto_ops::release()

Commit 9060cb719e61 ("net: crypto set sk to NULL when af_alg_release.")
fixed a use-after-free in sockfs_setattr() when an AF_ALG socket is
closed concurrently with fchownat().  However, it ignored that many
other proto_ops::release() methods don't set sock->sk to NULL and
therefore allow the same use-after-free:

    - base_sock_release
    - bnep_sock_release
    - cmtp_sock_release
    - data_sock_release
    - dn_release
    - hci_sock_release
    - hidp_sock_release
    - iucv_sock_release
    - l2cap_sock_release
    - llcp_sock_release
    - llc_ui_release
    - rawsock_release
    - rfcomm_sock_release
    - sco_sock_release
    - svc_release
    - vcc_release
    - x25_release

Rather than fixing all these and relying on every socket type to get
this right forever, just make __sock_release() set sock->sk to NULL
itself after calling proto_ops::release().

Reproducer that produces the KASAN splat when any of these socket types
are configured into the kernel:

    #include <pthread.h>
    #include <stdlib.h>
    #include <sys/socket.h>
    #include <unistd.h>

    pthread_t t;
    volatile int fd;

    void *close_thread(void *arg)
    {
        for (;;) {
            usleep(rand() % 100);
            close(fd);
        }
    }

    int main()
    {
        pthread_create(&t, NULL, close_thread, NULL);
        for (;;) {
            fd = socket(rand() % 50, rand() % 11, 0);
            fchownat(fd, "", 1000, 1000, 0x1000);
            close(fd);
        }
    }

Fixes: 86741ec25462 ("net: core: Add a UID field to struct sock.")
Signed-off-by: Eric Biggers <ebiggers@google.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/socket.c b/net/socket.c
index d80d87a395ea..320f51b22b19 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -577,6 +577,7 @@ static void __sock_release(struct socket *sock, struct inode *inode)
 		if (inode)
 			inode_lock(inode);
 		sock->ops->release(sock);
+		sock->sk = NULL;
 		if (inode)
 			inode_unlock(inode);
 		sock->ops = NULL;

From ecef67cb10db7b83b3b71c61dbb29aa070ab0112 Mon Sep 17 00:00:00 2001
From: Timur Celik <mail@timurcelik.de>
Date: Mon, 25 Feb 2019 21:13:13 +0100
Subject: [PATCH 08/44] tun: remove unnecessary memory barrier

Replace set_current_state with __set_current_state since no memory
barrier is needed at this point.

Signed-off-by: Timur Celik <mail@timurcelik.de>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index d291762b9e9d..53f4f37b0ffd 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2185,7 +2185,7 @@ static void *tun_ring_recv(struct tun_file *tfile, int noblock, int *err)
 		schedule();
 	}
 
-	set_current_state(TASK_RUNNING);
+	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(&tfile->wq.wait, &wait);
 
 out:

From 9ef6b42ad6fd7929dd1b6092cb02014e382c6a91 Mon Sep 17 00:00:00 2001
From: Nazarov Sergey <s-nazarov@yandex.ru>
Date: Mon, 25 Feb 2019 19:24:15 +0300
Subject: [PATCH 09/44] net: Add __icmp_send helper.

Add __icmp_send function having ip_options struct parameter

Signed-off-by: Sergey Nazarov <s-nazarov@yandex.ru>
Reviewed-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/icmp.h | 9 ++++++++-
 net/ipv4/icmp.c    | 7 ++++---
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/include/net/icmp.h b/include/net/icmp.h
index 6ac3a5bd0117..e0f709d26dde 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -22,6 +22,7 @@
 
 #include <net/inet_sock.h>
 #include <net/snmp.h>
+#include <net/ip.h>
 
 struct icmp_err {
   int		errno;
@@ -39,7 +40,13 @@ struct net_proto_family;
 struct sk_buff;
 struct net;
 
-void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info);
+void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
+		 const struct ip_options *opt);
+static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
+{
+	__icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt);
+}
+
 int icmp_rcv(struct sk_buff *skb);
 int icmp_err(struct sk_buff *skb, u32 info);
 int icmp_init(void);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 065997f414e6..3f24414150e2 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -570,7 +570,8 @@ relookup_failed:
  *			MUST reply to only the first fragment.
  */
 
-void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
+void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
+		 const struct ip_options *opt)
 {
 	struct iphdr *iph;
 	int room;
@@ -691,7 +692,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 					  iph->tos;
 	mark = IP4_REPLY_MARK(net, skb_in->mark);
 
-	if (ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in))
+	if (__ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in, opt))
 		goto out_unlock;
 
 
@@ -742,7 +743,7 @@ out_bh_enable:
 	local_bh_enable();
 out:;
 }
-EXPORT_SYMBOL(icmp_send);
+EXPORT_SYMBOL(__icmp_send);
 
 
 static void icmp_socket_deliver(struct sk_buff *skb, u32 info)

From 3da1ed7ac398f34fff1694017a07054d69c5f5c5 Mon Sep 17 00:00:00 2001
From: Nazarov Sergey <s-nazarov@yandex.ru>
Date: Mon, 25 Feb 2019 19:27:15 +0300
Subject: [PATCH 10/44] net: avoid use IPCB in cipso_v4_error

Extract IP options in cipso_v4_error and use __icmp_send.

Signed-off-by: Sergey Nazarov <s-nazarov@yandex.ru>
Acked-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h      |  2 ++
 net/ipv4/cipso_ipv4.c | 17 +++++++++++++++--
 net/ipv4/ip_options.c | 22 +++++++++++++++++-----
 3 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index 8866bfce6121..f0e8d064e249 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -667,6 +667,8 @@ static inline int ip_options_echo(struct net *net, struct ip_options *dopt,
 }
 
 void ip_options_fragment(struct sk_buff *skb);
+int __ip_options_compile(struct net *net, struct ip_options *opt,
+			 struct sk_buff *skb, __be32 *info);
 int ip_options_compile(struct net *net, struct ip_options *opt,
 		       struct sk_buff *skb);
 int ip_options_get(struct net *net, struct ip_options_rcu **optp,
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 777fa3b7fb13..eff86a71c1b0 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1735,13 +1735,26 @@ validate_return:
  */
 void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
 {
+	unsigned char optbuf[sizeof(struct ip_options) + 40];
+	struct ip_options *opt = (struct ip_options *)optbuf;
+
 	if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES)
 		return;
 
+	/*
+	 * We might be called above the IP layer,
+	 * so we can not use icmp_send and IPCB here.
+	 */
+
+	memset(opt, 0, sizeof(struct ip_options));
+	opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr);
+	if (__ip_options_compile(dev_net(skb->dev), opt, skb, NULL))
+		return;
+
 	if (gateway)
-		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0);
+		__icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0, opt);
 	else
-		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0);
+		__icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0, opt);
 }
 
 /**
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index ed194d46c00e..32a35043c9f5 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -251,8 +251,9 @@ static void spec_dst_fill(__be32 *spec_dst, struct sk_buff *skb)
  * If opt == NULL, then skb->data should point to IP header.
  */
 
-int ip_options_compile(struct net *net,
-		       struct ip_options *opt, struct sk_buff *skb)
+int __ip_options_compile(struct net *net,
+			 struct ip_options *opt, struct sk_buff *skb,
+			 __be32 *info)
 {
 	__be32 spec_dst = htonl(INADDR_ANY);
 	unsigned char *pp_ptr = NULL;
@@ -468,11 +469,22 @@ eol:
 		return 0;
 
 error:
-	if (skb) {
-		icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((pp_ptr-iph)<<24));
-	}
+	if (info)
+		*info = htonl((pp_ptr-iph)<<24);
 	return -EINVAL;
 }
+
+int ip_options_compile(struct net *net,
+		       struct ip_options *opt, struct sk_buff *skb)
+{
+	int ret;
+	__be32 info;
+
+	ret = __ip_options_compile(net, opt, skb, &info);
+	if (ret != 0 && skb)
+		icmp_send(skb, ICMP_PARAMETERPROB, 0, info);
+	return ret;
+}
 EXPORT_SYMBOL(ip_options_compile);
 
 /*

From 781e62823cb81b972dc8652c1827205cda2ac9ac Mon Sep 17 00:00:00 2001
From: Peng Sun <sironhide0null@gmail.com>
Date: Tue, 26 Feb 2019 22:15:37 +0800
Subject: [PATCH 11/44] bpf: decrease usercnt if bpf_map_new_fd() fails in
 bpf_map_get_fd_by_id()

In bpf/syscall.c, bpf_map_get_fd_by_id() use bpf_map_inc_not_zero()
to increase the refcount, both map->refcnt and map->usercnt. Then, if
bpf_map_new_fd() fails, should handle map->usercnt too.

Fixes: bd5f5f4ecb78 ("bpf: Add BPF_MAP_GET_FD_BY_ID")
Signed-off-by: Peng Sun <sironhide0null@gmail.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/syscall.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 8577bb7f8be6..f98328abe8fa 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1986,7 +1986,7 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
 
 	fd = bpf_map_new_fd(map, f_flags);
 	if (fd < 0)
-		bpf_map_put(map);
+		bpf_map_put_with_uref(map);
 
 	return fd;
 }

From b6e9e5df4ecf100f6a10ab2ade8e46d47a4b9779 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 26 Feb 2019 09:00:02 -0800
Subject: [PATCH 12/44] ipv4: Return error for RTA_VIA attribute

IPv4 currently does not support nexthops outside of the AF_INET family.
Specifically, it does not handle RTA_VIA attribute. If it is passed
in a route add request, the actual route added only uses the device
which is clearly not what the user intended:

  $ ip ro add 172.16.1.0/24 via inet6 2001:db8:1::1 dev eth0
  $ ip ro ls
  ...
  172.16.1.0/24 dev eth0

Catch this and fail the route add:
  $ ip ro add 172.16.1.0/24 via inet6 2001:db8:1::1 dev eth0
  Error: IPv4 does not support RTA_VIA attribute.

Fixes: 03c0566542f4c ("mpls: Netlink commands to add, remove, and dump routes")
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index fe4f6a624238..ed14ec245584 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -710,6 +710,10 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
 		case RTA_GATEWAY:
 			cfg->fc_gw = nla_get_be32(attr);
 			break;
+		case RTA_VIA:
+			NL_SET_ERR_MSG(extack, "IPv4 does not support RTA_VIA attribute");
+			err = -EINVAL;
+			goto errout;
 		case RTA_PRIORITY:
 			cfg->fc_priority = nla_get_u32(attr);
 			break;

From e3818541b49fb88650ba339d33cc53e4095da5b3 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 26 Feb 2019 09:00:03 -0800
Subject: [PATCH 13/44] ipv6: Return error for RTA_VIA attribute

IPv6 currently does not support nexthops outside of the AF_INET6 family.
Specifically, it does not handle RTA_VIA attribute. If it is passed
in a route add request, the actual route added only uses the device
which is clearly not what the user intended:

  $ ip -6 ro add 2001:db8:2::/64 via inet 172.16.1.1 dev eth0
  $ ip ro ls
  ...
  2001:db8:2::/64 dev eth0 metric 1024 pref medium

Catch this and fail the route add:
  $ ip -6 ro add 2001:db8:2::/64 via inet 172.16.1.1 dev eth0
  Error: IPv6 does not support RTA_VIA attribute.

Fixes: 03c0566542f4c ("mpls: Netlink commands to add, remove, and dump routes")
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ce15dc4ccbfa..b7a620023a52 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -4182,6 +4182,10 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
 		cfg->fc_flags |= RTF_GATEWAY;
 	}
+	if (tb[RTA_VIA]) {
+		NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
+		goto errout;
+	}
 
 	if (tb[RTA_DST]) {
 		int plen = (rtm->rtm_dst_len + 7) >> 3;

From be48220edd48ca0d569782992840488a52373a24 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 26 Feb 2019 09:00:04 -0800
Subject: [PATCH 14/44] mpls: Return error for RTA_GATEWAY attribute

MPLS does not support nexthops with an MPLS address family.
Specifically, it does not handle RTA_GATEWAY attribute. Make it
clear by returning an error.

Fixes: 03c0566542f4c ("mpls: Netlink commands to add, remove, and dump routes")
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mpls/af_mpls.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 7d55d4c04088..fa763e2e50ec 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1838,6 +1838,9 @@ static int rtm_to_route_config(struct sk_buff *skb,
 				goto errout;
 			break;
 		}
+		case RTA_GATEWAY:
+			NL_SET_ERR_MSG(extack, "MPLS does not support RTA_GATEWAY attribute");
+			goto errout;
 		case RTA_VIA:
 		{
 			if (nla_get_via(nla, &cfg->rc_via_alen,

From bf48648d650db1146b75b9bd358502431e86cf4f Mon Sep 17 00:00:00 2001
From: Haiyang Zhang <haiyangz@microsoft.com>
Date: Fri, 22 Feb 2019 18:25:03 +0000
Subject: [PATCH 15/44] hv_netvsc: Fix IP header checksum for coalesced packets

Incoming packets may have IP header checksum verified by the host.
They may not have IP header checksum computed after coalescing.
This patch re-compute the checksum when necessary, otherwise the
packets may be dropped, because Linux network stack always checks it.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/netvsc_drv.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 256adbd044f5..cf4897043e83 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -744,6 +744,14 @@ void netvsc_linkstatus_callback(struct net_device *net,
 	schedule_delayed_work(&ndev_ctx->dwork, 0);
 }
 
+static void netvsc_comp_ipcsum(struct sk_buff *skb)
+{
+	struct iphdr *iph = (struct iphdr *)skb->data;
+
+	iph->check = 0;
+	iph->check = ip_fast_csum(iph, iph->ihl);
+}
+
 static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
 					     struct netvsc_channel *nvchan)
 {
@@ -770,9 +778,17 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
 	/* skb is already created with CHECKSUM_NONE */
 	skb_checksum_none_assert(skb);
 
-	/*
-	 * In Linux, the IP checksum is always checked.
-	 * Do L4 checksum offload if enabled and present.
+	/* Incoming packets may have IP header checksum verified by the host.
+	 * They may not have IP header checksum computed after coalescing.
+	 * We compute it here if the flags are set, because on Linux, the IP
+	 * checksum is always checked.
+	 */
+	if (csum_info && csum_info->receive.ip_checksum_value_invalid &&
+	    csum_info->receive.ip_checksum_succeeded &&
+	    skb->protocol == htons(ETH_P_IP))
+		netvsc_comp_ipcsum(skb);
+
+	/* Do L4 checksum offload if enabled and present.
 	 */
 	if (csum_info && (net->features & NETIF_F_RXCSUM)) {
 		if (csum_info->receive.tcp_checksum_succeeded ||

From bfd07f3dd4f111b884d7922b37eb239280f83d8c Mon Sep 17 00:00:00 2001
From: Tung Nguyen <tung.q.nguyen@dektech.com.au>
Date: Mon, 25 Feb 2019 10:57:20 +0700
Subject: [PATCH 16/44] tipc: fix race condition causing hung sendto

When sending multicast messages via blocking socket,
if sending link is congested (tsk->cong_link_cnt is set to 1),
the sending thread will be put into sleeping state. However,
tipc_sk_filter_rcv() is called under socket spin lock but
tipc_wait_for_cond() is not. So, there is no guarantee that
the setting of tsk->cong_link_cnt to 0 in tipc_sk_proto_rcv() in
CPU-1 will be perceived by CPU-0. If that is the case, the sending
thread in CPU-0 after being waken up, will continue to see
tsk->cong_link_cnt as 1 and put the sending thread into sleeping
state again. The sending thread will sleep forever.

CPU-0                                | CPU-1
tipc_wait_for_cond()                 |
{                                    |
 // condition_ = !tsk->cong_link_cnt |
 while ((rc_ = !(condition_))) {     |
  ...                                |
  release_sock(sk_);                 |
  wait_woken();                      |
                                     | if (!sock_owned_by_user(sk))
                                     |  tipc_sk_filter_rcv()
                                     |  {
                                     |   ...
                                     |   tipc_sk_proto_rcv()
                                     |   {
                                     |    ...
                                     |    tsk->cong_link_cnt--;
                                     |    ...
                                     |    sk->sk_write_space(sk);
                                     |    ...
                                     |   }
                                     |   ...
                                     |  }
  sched_annotate_sleep();            |
  lock_sock(sk_);                    |
  remove_wait_queue();               |
 }                                   |
}                                    |

This commit fixes it by adding memory barrier to tipc_sk_proto_rcv()
and tipc_wait_for_cond().

Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Tung Nguyen <tung.q.nguyen@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 684f2125fc6b..70343ac448b1 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -379,11 +379,13 @@ static int tipc_sk_sock_err(struct socket *sock, long *timeout)
 
 #define tipc_wait_for_cond(sock_, timeo_, condition_)			       \
 ({                                                                             \
+	DEFINE_WAIT_FUNC(wait_, woken_wake_function);                          \
 	struct sock *sk_;						       \
 	int rc_;							       \
 									       \
 	while ((rc_ = !(condition_))) {					       \
-		DEFINE_WAIT_FUNC(wait_, woken_wake_function);	               \
+		/* coupled with smp_wmb() in tipc_sk_proto_rcv() */            \
+		smp_rmb();                                                     \
 		sk_ = (sock_)->sk;					       \
 		rc_ = tipc_sk_sock_err((sock_), timeo_);		       \
 		if (rc_)						       \
@@ -1983,6 +1985,8 @@ static void tipc_sk_proto_rcv(struct sock *sk,
 		return;
 	case SOCK_WAKEUP:
 		tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0);
+		/* coupled with smp_rmb() in tipc_wait_for_cond() */
+		smp_wmb();
 		tsk->cong_link_cnt--;
 		wakeup = true;
 		break;

From 2b3c6885386020b1b9d92d45e8349637e27d1f66 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Wed, 27 Feb 2019 03:58:53 -0500
Subject: [PATCH 17/44] bnxt_en: Drop oversize TX packets to prevent errors.

There have been reports of oversize UDP packets being sent to the
driver to be transmitted, causing error conditions.  The issue is
likely caused by the dst of the SKB switching between 'lo' with
64K MTU and the hardware device with a smaller MTU.  Patches are
being proposed by Mahesh Bandewar <maheshb@google.com> to fix the
issue.

In the meantime, add a quick length check in the driver to prevent
the error.  The driver uses the TX packet size as index to look up an
array to setup the TX BD.  The array is large enough to support all MTU
sizes supported by the driver.  The oversize TX packet causes the
driver to index beyond the array and put garbage values into the
TX BD.  Add a simple check to prevent this.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index d95730c6e0f2..803f7990d32b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -500,6 +500,12 @@ normal_tx:
 	}
 
 	length >>= 9;
+	if (unlikely(length >= ARRAY_SIZE(bnxt_lhint_arr))) {
+		dev_warn_ratelimited(&pdev->dev, "Dropped oversize %d bytes TX packet.\n",
+				     skb->len);
+		i = 0;
+		goto tx_dma_error;
+	}
 	flags |= bnxt_lhint_arr[length];
 	txbd->tx_bd_len_flags_type = cpu_to_le32(flags);
 

From f4d7b3e23d259c44f1f1c39645450680fcd935d6 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 27 Feb 2019 13:37:26 +0300
Subject: [PATCH 18/44] net: dev: Use unsigned integer as an argument to
 left-shift

1 << 31 is Undefined Behaviour according to the C standard.
Use U type modifier to avoid theoretical overflow.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 86dbb3e29139..848b54b7ec91 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3861,7 +3861,7 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits)
 	if (debug_value == 0)	/* no output */
 		return 0;
 	/* set low N bits */
-	return (1 << debug_value) - 1;
+	return (1U << debug_value) - 1;
 }
 
 static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)

From 287beb284f14796160be00db15f87ab3531715a2 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 27 Feb 2019 13:45:35 +0300
Subject: [PATCH 19/44] enc28j60: Correct description of debug module parameter

The netif_msg_init() API takes on input the amount of bits to be set. The
description of debug parameter in the enc28j60 module is misleading in this
sense and passing 0xffff does not give an expected behaviour.

Fix the description of debug module parameter to show what exactly is expected.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/enc28j60.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c
index f6ecfa778660..8f72587b5a2c 100644
--- a/drivers/net/ethernet/microchip/enc28j60.c
+++ b/drivers/net/ethernet/microchip/enc28j60.c
@@ -1681,5 +1681,5 @@ MODULE_DESCRIPTION(DRV_NAME " ethernet driver");
 MODULE_AUTHOR("Claudio Lanconelli <lanconelli.claudio@eptar.com>");
 MODULE_LICENSE("GPL");
 module_param_named(debug, debug.msg_enable, int, 0);
-MODULE_PARM_DESC(debug, "Debug verbosity level (0=none, ..., ffff=all)");
+MODULE_PARM_DESC(debug, "Debug verbosity level in amount of bits set (0=none, ..., 31=all)");
 MODULE_ALIAS("spi:" DRV_NAME);

From 232ba3a51cc224b339c7114888ed7f0d4d95695e Mon Sep 17 00:00:00 2001
From: Rajasingh Thavamani <T.Rajasingh@landisgyr.com>
Date: Wed, 27 Feb 2019 17:43:19 +0530
Subject: [PATCH 20/44] net: phy: Micrel KSZ8061: link failure after cable
 connect

With Micrel KSZ8061 PHY, the link may occasionally not come up after
Ethernet cable connect. The vendor's (Microchip, former Micrel) errata
sheet 80000688A.pdf descripes the problem and possible workarounds in
detail, see below.
The batch implements workaround 1, which permanently fixes the issue.

DESCRIPTION
Link-up may not occur properly when the Ethernet cable is initially
connected. This issue occurs more commonly when the cable is connected
slowly, but it may occur any time a cable is connected. This issue occurs
in the auto-negotiation circuit, and will not occur if auto-negotiation
is disabled (which requires that the two link partners be set to the
same speed and duplex).

END USER IMPLICATIONS
When this issue occurs, link is not established. Subsequent cable
plug/unplaug cycle will not correct the issue.

WORk AROUND
There are four approaches to work around this issue:
1. This issue can be prevented by setting bit 15 in MMD device address 1,
   register 2, prior to connecting the cable or prior to setting the
   Restart Auto-negotiation bit in register 0h. The MMD registers are
   accessed via the indirect access registers Dh and Eh, or via the Micrel
   EthUtil utility as shown here:
   . if using the EthUtil utility (usually with a Micrel KSZ8061
     Evaluation Board), type the following commands:
     > address 1
     > mmd 1
     > iw 2 b61a
   . Alternatively, write the following registers to write to the
     indirect MMD register:
     Write register Dh, data 0001h
     Write register Eh, data 0002h
     Write register Dh, data 4001h
     Write register Eh, data B61Ah
2. The issue can be avoided by disabling auto-negotiation in the KSZ8061,
   either by the strapping option, or by clearing bit 12 in register 0h.
   Care must be taken to ensure that the KSZ8061 and the link partner
   will link with the same speed and duplex. Note that the KSZ8061
   defaults to full-duplex when auto-negotiation is off, but other
   devices may default to half-duplex in the event of failed
   auto-negotiation.
3. The issue can be avoided by connecting the cable prior to powering-up
   or resetting the KSZ8061, and leaving it plugged in thereafter.
4. If the above measures are not taken and the problem occurs, link can
   be recovered by setting the Restart Auto-Negotiation bit in
   register 0h, or by resetting or power cycling the device. Reset may
   be either hardware reset or software reset (register 0h, bit 15).

PLAN
This errata will not be corrected in the future revision.

Fixes: 7ab59dc15e2f ("drivers/net/phy/micrel_phy: Add support for new PHYs")
Signed-off-by: Alexander Onnasch <alexander.onnasch@landisgyr.com>
Signed-off-by: Rajasingh Thavamani <T.Rajasingh@landisgyr.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/micrel.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index b1f959935f50..b7df0295a3ca 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -344,6 +344,17 @@ static int ksz8041_config_aneg(struct phy_device *phydev)
 	return genphy_config_aneg(phydev);
 }
 
+static int ksz8061_config_init(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = phy_write_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_DEVID1, 0xB61A);
+	if (ret)
+		return ret;
+
+	return kszphy_config_init(phydev);
+}
+
 static int ksz9021_load_values_from_of(struct phy_device *phydev,
 				       const struct device_node *of_node,
 				       u16 reg,
@@ -1040,7 +1051,7 @@ static struct phy_driver ksphy_driver[] = {
 	.name		= "Micrel KSZ8061",
 	.phy_id_mask	= MICREL_PHY_ID_MASK,
 	.features	= PHY_BASIC_FEATURES,
-	.config_init	= kszphy_config_init,
+	.config_init	= ksz8061_config_init,
 	.ack_interrupt	= kszphy_ack_interrupt,
 	.config_intr	= kszphy_config_intr,
 	.suspend	= genphy_suspend,

From 58bdd544e2933a21a51eecf17c3f5f94038261b5 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 22 Feb 2019 15:37:58 +0800
Subject: [PATCH 21/44] net: nfc: Fix NULL dereference on nfc_llcp_build_tlv
 fails

KASAN report this:

BUG: KASAN: null-ptr-deref in nfc_llcp_build_gb+0x37f/0x540 [nfc]
Read of size 3 at addr 0000000000000000 by task syz-executor.0/5401

CPU: 0 PID: 5401 Comm: syz-executor.0 Not tainted 5.0.0-rc7+ #45
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0xfa/0x1ce lib/dump_stack.c:113
 kasan_report+0x171/0x18d mm/kasan/report.c:321
 memcpy+0x1f/0x50 mm/kasan/common.c:130
 nfc_llcp_build_gb+0x37f/0x540 [nfc]
 nfc_llcp_register_device+0x6eb/0xb50 [nfc]
 nfc_register_device+0x50/0x1d0 [nfc]
 nfcsim_device_new+0x394/0x67d [nfcsim]
 ? 0xffffffffc1080000
 nfcsim_init+0x6b/0x1000 [nfcsim]
 do_one_initcall+0xfa/0x5ca init/main.c:887
 do_init_module+0x204/0x5f6 kernel/module.c:3460
 load_module+0x66b2/0x8570 kernel/module.c:3808
 __do_sys_finit_module+0x238/0x2a0 kernel/module.c:3902
 do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x462e99
Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007f9cb79dcc58 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99
RDX: 0000000000000000 RSI: 0000000020000280 RDI: 0000000000000003
RBP: 00007f9cb79dcc70 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00007f9cb79dd6bc
R13: 00000000004bcefb R14: 00000000006f7030 R15: 0000000000000004

nfc_llcp_build_tlv will return NULL on fails, caller should check it,
otherwise will trigger a NULL dereference.

Reported-by: Hulk Robot <hulkci@huawei.com>
Fixes: eda21f16a5ed ("NFC: Set MIU and RW values from CONNECT and CC LLCP frames")
Fixes: d646960f7986 ("NFC: Initial LLCP support")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/nfc/llcp_commands.c | 20 ++++++++++++++++++++
 net/nfc/llcp_core.c     | 24 ++++++++++++++++++++----
 2 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c
index 6a196e438b6c..d1fc019e932e 100644
--- a/net/nfc/llcp_commands.c
+++ b/net/nfc/llcp_commands.c
@@ -419,6 +419,10 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock)
 						      sock->service_name,
 						      sock->service_name_len,
 						      &service_name_tlv_length);
+		if (!service_name_tlv) {
+			err = -ENOMEM;
+			goto error_tlv;
+		}
 		size += service_name_tlv_length;
 	}
 
@@ -429,9 +433,17 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock)
 
 	miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0,
 				      &miux_tlv_length);
+	if (!miux_tlv) {
+		err = -ENOMEM;
+		goto error_tlv;
+	}
 	size += miux_tlv_length;
 
 	rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length);
+	if (!rw_tlv) {
+		err = -ENOMEM;
+		goto error_tlv;
+	}
 	size += rw_tlv_length;
 
 	pr_debug("SKB size %d SN length %zu\n", size, sock->service_name_len);
@@ -484,9 +496,17 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock)
 
 	miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0,
 				      &miux_tlv_length);
+	if (!miux_tlv) {
+		err = -ENOMEM;
+		goto error_tlv;
+	}
 	size += miux_tlv_length;
 
 	rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length);
+	if (!rw_tlv) {
+		err = -ENOMEM;
+		goto error_tlv;
+	}
 	size += rw_tlv_length;
 
 	skb = llcp_allocate_pdu(sock, LLCP_PDU_CC, size);
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index ef4026a23e80..4fa015208aab 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -532,10 +532,10 @@ static u8 nfc_llcp_reserve_sdp_ssap(struct nfc_llcp_local *local)
 
 static int nfc_llcp_build_gb(struct nfc_llcp_local *local)
 {
-	u8 *gb_cur, *version_tlv, version, version_length;
-	u8 *lto_tlv, lto_length;
-	u8 *wks_tlv, wks_length;
-	u8 *miux_tlv, miux_length;
+	u8 *gb_cur, version, version_length;
+	u8 lto_length, wks_length, miux_length;
+	u8 *version_tlv = NULL, *lto_tlv = NULL,
+	   *wks_tlv = NULL, *miux_tlv = NULL;
 	__be16 wks = cpu_to_be16(local->local_wks);
 	u8 gb_len = 0;
 	int ret = 0;
@@ -543,17 +543,33 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local)
 	version = LLCP_VERSION_11;
 	version_tlv = nfc_llcp_build_tlv(LLCP_TLV_VERSION, &version,
 					 1, &version_length);
+	if (!version_tlv) {
+		ret = -ENOMEM;
+		goto out;
+	}
 	gb_len += version_length;
 
 	lto_tlv = nfc_llcp_build_tlv(LLCP_TLV_LTO, &local->lto, 1, &lto_length);
+	if (!lto_tlv) {
+		ret = -ENOMEM;
+		goto out;
+	}
 	gb_len += lto_length;
 
 	pr_debug("Local wks 0x%lx\n", local->local_wks);
 	wks_tlv = nfc_llcp_build_tlv(LLCP_TLV_WKS, (u8 *)&wks, 2, &wks_length);
+	if (!wks_tlv) {
+		ret = -ENOMEM;
+		goto out;
+	}
 	gb_len += wks_length;
 
 	miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0,
 				      &miux_length);
+	if (!miux_tlv) {
+		ret = -ENOMEM;
+		goto out;
+	}
 	gb_len += miux_length;
 
 	gb_len += ARRAY_SIZE(llcp_magic);

From 72a7d452b0f09dcd5d3e18cff1e3839f8b1acc1f Mon Sep 17 00:00:00 2001
From: Max Uvarov <muvarov@gmail.com>
Date: Mon, 25 Feb 2019 12:15:10 +0300
Subject: [PATCH 22/44] net: phy: dp83867: add soft reset delay

Similar to dp83640 delay after soft reset
is needed to set up registers correctly.

Signed-off-by: Max Uvarov <muvarov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/dp83867.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index da6a67d47ce9..56fa3606cb9c 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -19,6 +19,7 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/phy.h>
+#include <linux/delay.h>
 
 #include <dt-bindings/net/ti-dp83867.h>
 
@@ -325,6 +326,8 @@ static int dp83867_phy_reset(struct phy_device *phydev)
 	if (err < 0)
 		return err;
 
+	usleep_range(10, 20);
+
 	return dp83867_config_init(phydev);
 }
 

From 651eb32e569e50564803891cf2a8d22e49e979a5 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 25 Feb 2019 16:08:36 +0100
Subject: [PATCH 23/44] selftests: pmtu: disable DAD in all namespaces

Otherwise, the configured IPv6 address could be still "tentative"
at test time, possibly causing tests failures.
We can also drop some sleep along the code and decrease the
timeout for most commands so that the test runtime decreases.

v1 -> v2:
 - fix comment (Stefano)

Fixes: d1f1b9cbf34c ("selftests: net: Introduce first PMTU test")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/pmtu.sh | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index e2c94e47707c..89aec2fdf4fa 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -263,8 +263,6 @@ setup_fou_or_gue() {
 
 	${ns_a} ip link set ${encap}_a up
 	${ns_b} ip link set ${encap}_b up
-
-	sleep 1
 }
 
 setup_fou44() {
@@ -302,6 +300,10 @@ setup_gue66() {
 setup_namespaces() {
 	for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
 		ip netns add ${n} || return 1
+
+		# Disable DAD, so that we don't have to wait to use the
+		# configured IPv6 addresses
+		ip netns exec ${n} sysctl -q net/ipv6/conf/default/accept_dad=0
 	done
 }
 
@@ -337,8 +339,6 @@ setup_vti() {
 
 	${ns_a} ip link set vti${proto}_a up
 	${ns_b} ip link set vti${proto}_b up
-
-	sleep 1
 }
 
 setup_vti4() {
@@ -375,8 +375,6 @@ setup_vxlan_or_geneve() {
 
 	${ns_a} ip link set ${type}_a up
 	${ns_b} ip link set ${type}_b up
-
-	sleep 1
 }
 
 setup_geneve4() {
@@ -588,8 +586,8 @@ test_pmtu_ipvX() {
 	mtu "${ns_b}"  veth_B-R2 1500
 
 	# Create route exceptions
-	${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1800 ${dst1} > /dev/null
-	${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1800 ${dst2} > /dev/null
+	${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1} > /dev/null
+	${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2} > /dev/null
 
 	# Check that exceptions have been created with the correct PMTU
 	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
@@ -621,7 +619,7 @@ test_pmtu_ipvX() {
 	# Decrease remote MTU on path via R2, get new exception
 	mtu "${ns_r2}" veth_R2-B 400
 	mtu "${ns_b}"  veth_B-R2 400
-	${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1400 ${dst2} > /dev/null
+	${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2} > /dev/null
 	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
 	check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
 
@@ -638,7 +636,7 @@ test_pmtu_ipvX() {
 	check_pmtu_value "1500" "${pmtu_2}" "increasing local MTU" || return 1
 
 	# Get new exception
-	${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1400 ${dst2} > /dev/null
+	${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2} > /dev/null
 	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
 	check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
 }
@@ -687,7 +685,7 @@ test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() {
 
 	mtu "${ns_a}" ${type}_a $((${ll_mtu} + 1000))
 	mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
-	${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s $((${ll_mtu} + 500)) ${dst} > /dev/null
+	${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst} > /dev/null
 
 	# Check that exception was created
 	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
@@ -767,7 +765,7 @@ test_pmtu_ipvX_over_fouY_or_gueY() {
 
 	mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000))
 	mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000))
-	${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s $((${ll_mtu} + 500)) ${dst} > /dev/null
+	${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst} > /dev/null
 
 	# Check that exception was created
 	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
@@ -825,13 +823,13 @@ test_pmtu_vti4_exception() {
 
 	# Send DF packet without exceeding link layer MTU, check that no
 	# exception is created
-	${ns_a} ping -q -M want -i 0.1 -w 2 -s ${ping_payload} ${tunnel4_b_addr} > /dev/null
+	${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr} > /dev/null
 	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
 	check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
 
 	# Now exceed link layer MTU by one byte, check that exception is created
 	# with the right PMTU value
-	${ns_a} ping -q -M want -i 0.1 -w 2 -s $((ping_payload + 1)) ${tunnel4_b_addr} > /dev/null
+	${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr} > /dev/null
 	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
 	check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))"
 }
@@ -847,7 +845,7 @@ test_pmtu_vti6_exception() {
 	mtu "${ns_b}" veth_b 4000
 	mtu "${ns_a}" vti6_a 5000
 	mtu "${ns_b}" vti6_b 5000
-	${ns_a} ${ping6} -q -i 0.1 -w 2 -s 60000 ${tunnel6_b_addr} > /dev/null
+	${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr} > /dev/null
 
 	# Check that exception was created
 	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"

From b3cc4f8a8a411bbf07a7d7213ce9c7571ded38b9 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 25 Feb 2019 16:08:37 +0100
Subject: [PATCH 24/44] selftests: pmtu: add explicit tests for PMTU exceptions
 cleanup

Add a couple of new tests, explicitly checking that the kernel
timely releases PMTU exceptions on related device removal.
This is mostly a regression test vs the issue fixed by
commit f5b51fe804ec ("ipv6: route: purge exception on removal")

Only 2 new test cases have been added, instead of extending all
the existing ones, because the reproducer requires executing
several commands and would slow down too much the tests otherwise.

v2 -> v3:
 - more cleanup, still from Stefano

v1 -> v2:
 - several script cleanups, as suggested by Stefano

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/pmtu.sh | 68 ++++++++++++++++++++++++++++-
 1 file changed, 67 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 89aec2fdf4fa..912b2dc50be3 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -103,6 +103,15 @@
 #	and check that configured MTU is used on link creation and changes, and
 #	that MTU is properly calculated instead when MTU is not configured from
 #	userspace
+#
+# - cleanup_ipv4_exception
+#	Similar to pmtu_ipv4_vxlan4_exception, but explicitly generate PMTU
+#	exceptions on multiple CPUs and check that the veth device tear-down
+# 	happens in a timely manner
+#
+# - cleanup_ipv6_exception
+#	Same as above, but use IPv6 transport from A to B
+
 
 # Kselftest framework requirement - SKIP code is 4.
 ksft_skip=4
@@ -135,7 +144,9 @@ tests="
 	pmtu_vti6_default_mtu		vti6: default MTU assignment
 	pmtu_vti4_link_add_mtu		vti4: MTU setting on link creation
 	pmtu_vti6_link_add_mtu		vti6: MTU setting on link creation
-	pmtu_vti6_link_change_mtu	vti6: MTU changes on link changes"
+	pmtu_vti6_link_change_mtu	vti6: MTU changes on link changes
+	cleanup_ipv4_exception		ipv4: cleanup of cached exceptions
+	cleanup_ipv6_exception		ipv6: cleanup of cached exceptions"
 
 NS_A="ns-$(mktemp -u XXXXXX)"
 NS_B="ns-$(mktemp -u XXXXXX)"
@@ -1006,6 +1017,61 @@ test_pmtu_vti6_link_change_mtu() {
 	return ${fail}
 }
 
+check_command() {
+	cmd=${1}
+
+	if ! which ${cmd} > /dev/null 2>&1; then
+		err "  missing required command: '${cmd}'"
+		return 1
+	fi
+	return 0
+}
+
+test_cleanup_vxlanX_exception() {
+	outer="${1}"
+	encap="vxlan"
+	ll_mtu=4000
+
+	check_command taskset || return 2
+	cpu_list=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2)
+
+	setup namespaces routing ${encap}${outer} || return 2
+	trace "${ns_a}" ${encap}_a   "${ns_b}"  ${encap}_b \
+	      "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
+	      "${ns_b}" veth_B-R1    "${ns_r1}" veth_R1-B
+
+	# Create route exception by exceeding link layer MTU
+	mtu "${ns_a}"  veth_A-R1 $((${ll_mtu} + 1000))
+	mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
+	mtu "${ns_b}"  veth_B-R1 ${ll_mtu}
+	mtu "${ns_r1}" veth_R1-B ${ll_mtu}
+
+	mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000))
+	mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000))
+
+	# Fill exception cache for multiple CPUs (2)
+	# we can always use inner IPv4 for that
+	for cpu in ${cpu_list}; do
+		taskset --cpu-list ${cpu} ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${tunnel4_b_addr} > /dev/null
+	done
+
+	${ns_a} ip link del dev veth_A-R1 &
+	iplink_pid=$!
+	sleep 1
+	if [ "$(cat /proc/${iplink_pid}/cmdline 2>/dev/null | tr -d '\0')" = "iplinkdeldevveth_A-R1" ]; then
+		err "  can't delete veth device in a timely manner, PMTU dst likely leaked"
+		return 1
+	fi
+}
+
+test_cleanup_ipv6_exception() {
+	test_cleanup_vxlanX_exception 6
+}
+
+test_cleanup_ipv4_exception() {
+	test_cleanup_vxlanX_exception 4
+}
+
 usage() {
 	echo
 	echo "$0 [OPTIONS] [TEST]..."

From a1fd1ad2552fad9e649eeb85fd79301e2880a886 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 25 Feb 2019 13:55:48 -0800
Subject: [PATCH 25/44] ipv4: Pass original device to ip_rcv_finish_core

ip_route_input_rcu expects the original ingress device (e.g., for
proper multicast handling). The skb->dev can be changed by l3mdev_ip_rcv,
so dev needs to be saved prior to calling it. This was the behavior prior
to the listify changes.

Fixes: 5fa12739a53d0 ("net: ipv4: listify ip_rcv_finish")
Cc: Edward Cree <ecree@solarflare.com>
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_input.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 51d8efba6de2..1f4737b77067 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -307,11 +307,10 @@ drop:
 }
 
 static int ip_rcv_finish_core(struct net *net, struct sock *sk,
-			      struct sk_buff *skb)
+			      struct sk_buff *skb, struct net_device *dev)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	int (*edemux)(struct sk_buff *skb);
-	struct net_device *dev = skb->dev;
 	struct rtable *rt;
 	int err;
 
@@ -400,6 +399,7 @@ drop_error:
 
 static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
+	struct net_device *dev = skb->dev;
 	int ret;
 
 	/* if ingress device is enslaved to an L3 master device pass the
@@ -409,7 +409,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 	if (!skb)
 		return NET_RX_SUCCESS;
 
-	ret = ip_rcv_finish_core(net, sk, skb);
+	ret = ip_rcv_finish_core(net, sk, skb, dev);
 	if (ret != NET_RX_DROP)
 		ret = dst_input(skb);
 	return ret;
@@ -545,6 +545,7 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk,
 
 	INIT_LIST_HEAD(&sublist);
 	list_for_each_entry_safe(skb, next, head, list) {
+		struct net_device *dev = skb->dev;
 		struct dst_entry *dst;
 
 		skb_list_del_init(skb);
@@ -554,7 +555,7 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk,
 		skb = l3mdev_ip_rcv(skb);
 		if (!skb)
 			continue;
-		if (ip_rcv_finish_core(net, sk, skb) == NET_RX_DROP)
+		if (ip_rcv_finish_core(net, sk, skb, dev) == NET_RX_DROP)
 			continue;
 
 		dst = skb_dst(skb);

From 5578de4834fe0f2a34fedc7374be691443396d1f Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Mon, 25 Feb 2019 19:06:06 -0500
Subject: [PATCH 26/44] netlabel: fix out-of-bounds memory accesses

There are two array out-of-bounds memory accesses, one in
cipso_v4_map_lvl_valid(), the other in netlbl_bitmap_walk().  Both
errors are embarassingly simple, and the fixes are straightforward.

As a FYI for anyone backporting this patch to kernels prior to v4.8,
you'll want to apply the netlbl_bitmap_walk() patch to
cipso_v4_bitmap_walk() as netlbl_bitmap_walk() doesn't exist before
Linux v4.8.

Reported-by: Jann Horn <jannh@google.com>
Fixes: 446fda4f2682 ("[NetLabel]: CIPSOv4 engine")
Fixes: 3faa8f982f95 ("netlabel: Move bitmap manipulation functions to the NetLabel core.")
Signed-off-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/cipso_ipv4.c        | 3 ++-
 net/netlabel/netlabel_kapi.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index eff86a71c1b0..f0165c5f376b 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -667,7 +667,8 @@ static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level)
 	case CIPSO_V4_MAP_PASS:
 		return 0;
 	case CIPSO_V4_MAP_TRANS:
-		if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL)
+		if ((level < doi_def->map.std->lvl.cipso_size) &&
+		    (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL))
 			return 0;
 		break;
 	}
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index ea7c67050792..ee3e5b6471a6 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -903,7 +903,8 @@ int netlbl_bitmap_walk(const unsigned char *bitmap, u32 bitmap_len,
 		    (state == 0 && (byte & bitmask) == 0))
 			return bit_spot;
 
-		bit_spot++;
+		if (++bit_spot >= bitmap_len)
+			return -1;
 		bitmask >>= 1;
 		if (bitmask == 0) {
 			byte = bitmap[++byte_offset];

From 5845f706388a4cde0f6b80f9e5d33527e942b7d9 Mon Sep 17 00:00:00 2001
From: Sheng Lan <lansheng@huawei.com>
Date: Thu, 28 Feb 2019 18:47:58 +0800
Subject: [PATCH 27/44] net: netem: fix skb length BUG_ON in __skb_to_sgvec

It can be reproduced by following steps:
1. virtio_net NIC is configured with gso/tso on
2. configure nginx as http server with an index file bigger than 1M bytes
3. use tc netem to produce duplicate packets and delay:
   tc qdisc add dev eth0 root netem delay 100ms 10ms 30% duplicate 90%
4. continually curl the nginx http server to get index file on client
5. BUG_ON is seen quickly

[10258690.371129] kernel BUG at net/core/skbuff.c:4028!
[10258690.371748] invalid opcode: 0000 [#1] SMP PTI
[10258690.372094] CPU: 5 PID: 0 Comm: swapper/5 Tainted: G        W         5.0.0-rc6 #2
[10258690.372094] RSP: 0018:ffffa05797b43da0 EFLAGS: 00010202
[10258690.372094] RBP: 00000000000005ea R08: 0000000000000000 R09: 00000000000005ea
[10258690.372094] R10: ffffa0579334d800 R11: 00000000000002c0 R12: 0000000000000002
[10258690.372094] R13: 0000000000000000 R14: ffffa05793122900 R15: ffffa0578f7cb028
[10258690.372094] FS:  0000000000000000(0000) GS:ffffa05797b40000(0000) knlGS:0000000000000000
[10258690.372094] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[10258690.372094] CR2: 00007f1a6dc00868 CR3: 000000001000e000 CR4: 00000000000006e0
[10258690.372094] Call Trace:
[10258690.372094]  <IRQ>
[10258690.372094]  skb_to_sgvec+0x11/0x40
[10258690.372094]  start_xmit+0x38c/0x520 [virtio_net]
[10258690.372094]  dev_hard_start_xmit+0x9b/0x200
[10258690.372094]  sch_direct_xmit+0xff/0x260
[10258690.372094]  __qdisc_run+0x15e/0x4e0
[10258690.372094]  net_tx_action+0x137/0x210
[10258690.372094]  __do_softirq+0xd6/0x2a9
[10258690.372094]  irq_exit+0xde/0xf0
[10258690.372094]  smp_apic_timer_interrupt+0x74/0x140
[10258690.372094]  apic_timer_interrupt+0xf/0x20
[10258690.372094]  </IRQ>

In __skb_to_sgvec(), the skb->len is not equal to the sum of the skb's
linear data size and nonlinear data size, thus BUG_ON triggered.
Because the skb is cloned and a part of nonlinear data is split off.

Duplicate packet is cloned in netem_enqueue() and may be delayed
some time in qdisc. When qdisc len reached the limit and returns
NET_XMIT_DROP, the skb will be retransmit later in write queue.
the skb will be fragmented by tso_fragment(), the limit size
that depends on cwnd and mss decrease, the skb's nonlinear
data will be split off. The length of the skb cloned by netem
will not be updated. When we use virtio_net NIC and invoke skb_to_sgvec(),
the BUG_ON trigger.

To fix it, netem returns NET_XMIT_SUCCESS to upper stack
when it clones a duplicate packet.

Fixes: 35d889d1 ("sch_netem: fix skb leak in netem_enqueue()")
Signed-off-by: Sheng Lan <lansheng@huawei.com>
Reported-by: Qin Ji <jiqin.ji@huawei.com>
Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_netem.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 75046ec72144..cc9d8133afcd 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -447,6 +447,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	int nb = 0;
 	int count = 1;
 	int rc = NET_XMIT_SUCCESS;
+	int rc_drop = NET_XMIT_DROP;
 
 	/* Do not fool qdisc_drop_all() */
 	skb->prev = NULL;
@@ -486,6 +487,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		q->duplicate = 0;
 		rootq->enqueue(skb2, rootq, to_free);
 		q->duplicate = dupsave;
+		rc_drop = NET_XMIT_SUCCESS;
 	}
 
 	/*
@@ -498,7 +500,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		if (skb_is_gso(skb)) {
 			segs = netem_segment(skb, sch, to_free);
 			if (!segs)
-				return NET_XMIT_DROP;
+				return rc_drop;
 		} else {
 			segs = skb;
 		}
@@ -521,8 +523,10 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 			1<<(prandom_u32() % 8);
 	}
 
-	if (unlikely(sch->q.qlen >= sch->limit))
-		return qdisc_drop_all(skb, sch, to_free);
+	if (unlikely(sch->q.qlen >= sch->limit)) {
+		qdisc_drop_all(skb, sch, to_free);
+		return rc_drop;
+	}
 
 	qdisc_qstats_backlog_inc(sch, skb);
 

From ac5105052dc8be5cef34d79e1f4186d39b2f3ca3 Mon Sep 17 00:00:00 2001
From: Matthias Maennich <maennich@google.com>
Date: Thu, 28 Feb 2019 11:36:52 +0000
Subject: [PATCH 28/44] sctp: chunk.c: correct format string for size_t in
 printk

According to Documentation/core-api/printk-formats.rst, size_t should be
printed with %zu, rather than %Zu.

In addition, using %Zu triggers a warning on clang (-Wformat-extra-args):

net/sctp/chunk.c:196:25: warning: data argument not used by format string [-Wformat-extra-args]
                                    __func__, asoc, max_data);
                                    ~~~~~~~~~~~~~~~~^~~~~~~~~
./include/linux/printk.h:440:49: note: expanded from macro 'pr_warn_ratelimited'
        printk_ratelimited(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~
./include/linux/printk.h:424:17: note: expanded from macro 'printk_ratelimited'
                printk(fmt, ##__VA_ARGS__);                             \
                       ~~~    ^

Fixes: 5b5e0928f742 ("lib/vsprintf.c: remove %Z support")
Link: https://github.com/ClangBuiltLinux/linux/issues/378
Signed-off-by: Matthias Maennich <maennich@google.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/chunk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 64bef313d436..5cb7c1ff97e9 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -192,7 +192,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 	if (unlikely(!max_data)) {
 		max_data = sctp_min_frag_point(sctp_sk(asoc->base.sk),
 					       sctp_datachk_len(&asoc->stream));
-		pr_warn_ratelimited("%s: asoc:%p frag_point is zero, forcing max_data to default minimum (%Zu)",
+		pr_warn_ratelimited("%s: asoc:%p frag_point is zero, forcing max_data to default minimum (%zu)",
 				    __func__, asoc, max_data);
 	}
 

From 99e87f56b48f490fb16b6e0f74691c1e664dea95 Mon Sep 17 00:00:00 2001
From: Igor Druzhinin <igor.druzhinin@citrix.com>
Date: Thu, 28 Feb 2019 12:48:03 +0000
Subject: [PATCH 29/44] xen-netback: fix occasional leak of grant ref mappings
 under memory pressure

Zero-copy callback flag is not yet set on frag list skb at the moment
xenvif_handle_frag_list() returns -ENOMEM. This eventually results in
leaking grant ref mappings since xenvif_zerocopy_callback() is never
called for these fragments. Those eventually build up and cause Xen
to kill Dom0 as the slots get reused for new mappings:

"d0v0 Attempt to implicitly unmap a granted PTE c010000329fce005"

That behavior is observed under certain workloads where sudden spikes
of page cache writes coexist with active atomic skb allocations from
network traffic. Additionally, rework the logic to deal with frag_list
deallocation in a single place.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Signed-off-by: Igor Druzhinin <igor.druzhinin@citrix.com>
Acked-by: Wei Liu <wei.liu2@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/netback.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 80aae3a32c2a..f09948b009dd 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1072,11 +1072,6 @@ static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *s
 		skb_frag_size_set(&frags[i], len);
 	}
 
-	/* Copied all the bits from the frag list -- free it. */
-	skb_frag_list_init(skb);
-	xenvif_skb_zerocopy_prepare(queue, nskb);
-	kfree_skb(nskb);
-
 	/* Release all the original (foreign) frags. */
 	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
 		skb_frag_unref(skb, f);
@@ -1145,6 +1140,8 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
 		xenvif_fill_frags(queue, skb);
 
 		if (unlikely(skb_has_frag_list(skb))) {
+			struct sk_buff *nskb = skb_shinfo(skb)->frag_list;
+			xenvif_skb_zerocopy_prepare(queue, nskb);
 			if (xenvif_handle_frag_list(queue, skb)) {
 				if (net_ratelimit())
 					netdev_err(queue->vif->dev,
@@ -1153,6 +1150,9 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
 				kfree_skb(skb);
 				continue;
 			}
+			/* Copied all the bits from the frag list -- free it. */
+			skb_frag_list_init(skb);
+			kfree_skb(nskb);
 		}
 
 		skb->dev      = queue->vif->dev;

From a2288d4e355992d369c50c45d017a85f6061ff71 Mon Sep 17 00:00:00 2001
From: Igor Druzhinin <igor.druzhinin@citrix.com>
Date: Thu, 28 Feb 2019 14:11:26 +0000
Subject: [PATCH 30/44] xen-netback: don't populate the hash cache on XenBus
 disconnect

Occasionally, during the disconnection procedure on XenBus which
includes hash cache deinitialization there might be some packets
still in-flight on other processors. Handling of these packets includes
hashing and hash cache population that finally results in hash cache
data structure corruption.

In order to avoid this we prevent hashing of those packets if there
are no queues initialized. In that case RCU protection of queues guards
the hash cache as well.

Signed-off-by: Igor Druzhinin <igor.druzhinin@citrix.com>
Reviewed-by: Paul Durrant <paul.durrant@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/hash.c      | 2 ++
 drivers/net/xen-netback/interface.c | 7 +++++++
 2 files changed, 9 insertions(+)

diff --git a/drivers/net/xen-netback/hash.c b/drivers/net/xen-netback/hash.c
index 0ccb021f1e78..10d580c3dea3 100644
--- a/drivers/net/xen-netback/hash.c
+++ b/drivers/net/xen-netback/hash.c
@@ -454,6 +454,8 @@ void xenvif_init_hash(struct xenvif *vif)
 	if (xenvif_hash_cache_size == 0)
 		return;
 
+	BUG_ON(vif->hash.cache.count);
+
 	spin_lock_init(&vif->hash.cache.lock);
 	INIT_LIST_HEAD(&vif->hash.cache.list);
 }
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 182d6770f102..6da12518e693 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -153,6 +153,13 @@ static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb,
 {
 	struct xenvif *vif = netdev_priv(dev);
 	unsigned int size = vif->hash.size;
+	unsigned int num_queues;
+
+	/* If queues are not set up internally - always return 0
+	 * as the packet going to be dropped anyway */
+	num_queues = READ_ONCE(vif->num_queues);
+	if (num_queues < 1)
+		return 0;
 
 	if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE)
 		return fallback(dev, skb, NULL) % dev->real_num_tx_queues;

From 6e46e2d821bb22b285ae8187959096b65d063b0d Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Thu, 28 Feb 2019 18:14:03 +0100
Subject: [PATCH 31/44] net: dsa: mv88e6xxx: Fix u64 statistics

The switch maintains u64 counters for the number of octets sent and
received. These are kept as two u32's which need to be combined.  Fix
the combing, which wrongly worked on u16's.

Fixes: 80c4627b2719 ("dsa: mv88x6xxx: Refactor getting a single statistic")
Reported-by: Chris Healy <Chris.Healy@zii.aero>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 12fd7ce3f1ff..9d36d2b5fbc6 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -896,7 +896,7 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip,
 	default:
 		return U64_MAX;
 	}
-	value = (((u64)high) << 16) | low;
+	value = (((u64)high) << 32) | low;
 	return value;
 }
 

From d235c48b40d399328585a68f3f9bf7cc3062d586 Mon Sep 17 00:00:00 2001
From: Maxime Chevallier <maxime.chevallier@bootlin.com>
Date: Thu, 28 Feb 2019 22:14:33 +0100
Subject: [PATCH 32/44] net: dsa: mv88e6xxx: power serdes on/off for 10G
 interfaces on 6390X

Upon setting the cmode on 6390 and 6390X, the associated serdes
interfaces must be powered off/on.

Both 6390X and 6390 share code to do so, but it currently uses the 6390
specific helper mv88e6390_serdes_power() to disable and enable the
serdes interface.

This call will fail silently on 6390X when trying so set a 10G interface
such as XAUI or RXAUI, since mv88e6390_serdes_power() internally grabs
the lane number based on modes supported by the 6390, and returns 0 when
getting -ENODEV as a lane number.

Using mv88e6390x_serdes_power() should be safe here, since we explicitly
rule-out all ports but the 9 and 10, and because modes supported by 6390
ports 9 and 10 are a subset of those supported on 6390X.

This was tested on 6390X using RXAUI mode.

Fixes: 364e9d7776a3 ("net: dsa: mv88e6xxx: Power on/off SERDES on cmode change")
Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/port.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c
index ebd26b6a93e6..4b02c96f6786 100644
--- a/drivers/net/dsa/mv88e6xxx/port.c
+++ b/drivers/net/dsa/mv88e6xxx/port.c
@@ -408,7 +408,7 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
 			return err;
 	}
 
-	err = mv88e6390_serdes_power(chip, port, false);
+	err = mv88e6390x_serdes_power(chip, port, false);
 	if (err)
 		return err;
 
@@ -424,7 +424,7 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
 		if (err)
 			return err;
 
-		err = mv88e6390_serdes_power(chip, port, true);
+		err = mv88e6390x_serdes_power(chip, port, true);
 		if (err)
 			return err;
 

From 352d20d611414715353ee65fc206ee57ab1a6984 Mon Sep 17 00:00:00 2001
From: Peng Sun <sironhide0null@gmail.com>
Date: Wed, 27 Feb 2019 22:36:25 +0800
Subject: [PATCH 33/44] bpf: drop refcount if bpf_map_new_fd() fails in
 map_create()

In bpf/syscall.c, map_create() first set map->usercnt to 1, a file
descriptor is supposed to return to userspace. When bpf_map_new_fd()
fails, drop the refcount.

Fixes: bd5f5f4ecb78 ("bpf: Add BPF_MAP_GET_FD_BY_ID")
Signed-off-by: Peng Sun <sironhide0null@gmail.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/syscall.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index f98328abe8fa..84470d1480aa 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -559,12 +559,12 @@ static int map_create(union bpf_attr *attr)
 	err = bpf_map_new_fd(map, f_flags);
 	if (err < 0) {
 		/* failed to allocate fd.
-		 * bpf_map_put() is needed because the above
+		 * bpf_map_put_with_uref() is needed because the above
 		 * bpf_map_alloc_id() has published the map
 		 * to the userspace and the userspace may
 		 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID.
 		 */
-		bpf_map_put(map);
+		bpf_map_put_with_uref(map);
 		return err;
 	}
 

From ada641ff6ed34a125fbf62ec79733352ffd4305d Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 26 Feb 2019 15:27:43 +0100
Subject: [PATCH 34/44] selftests: fixes for UDP GRO

The current implementation for UDP GRO tests is racy: the receiver
may flush the RX queue while the sending is still transmitting and
incorrectly report RX errors, with a wrong number of packet received.

Add explicit timeouts to the receiver for both connection activation
(first packet received for UDP) and reception completion, so that
in the above critical scenario the receiver will wait for the
transfer completion.

Fixes: 3327a9c46352 ("selftests: add functionals test for UDP GRO")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/udpgro.sh         |  8 ++--
 tools/testing/selftests/net/udpgso_bench_rx.c | 42 +++++++++++++------
 2 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh
index aeac53a99aeb..ac2a30be9b32 100755
--- a/tools/testing/selftests/net/udpgro.sh
+++ b/tools/testing/selftests/net/udpgro.sh
@@ -37,7 +37,7 @@ run_one() {
 
 	cfg_veth
 
-	ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} && \
+	ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} && \
 		echo "ok" || \
 		echo "failed" &
 
@@ -81,7 +81,7 @@ run_one_nat() {
 	# will land on the 'plain' one
 	ip netns exec "${PEER_NS}" ./udpgso_bench_rx -G ${family} -b ${addr1} -n 0 &
 	pid=$!
-	ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${family} -b ${addr2%/*} ${rx_args} && \
+	ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} && \
 		echo "ok" || \
 		echo "failed"&
 
@@ -99,8 +99,8 @@ run_one_2sock() {
 
 	cfg_veth
 
-	ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -p 12345 &
-	ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} && \
+	ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} -p 12345 &
+	ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} && \
 		echo "ok" || \
 		echo "failed" &
 
diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c
index 0c960f673324..db3d4a8b5a4c 100644
--- a/tools/testing/selftests/net/udpgso_bench_rx.c
+++ b/tools/testing/selftests/net/udpgso_bench_rx.c
@@ -45,6 +45,8 @@ static int  cfg_alen 		= sizeof(struct sockaddr_in6);
 static int  cfg_expected_pkt_nr;
 static int  cfg_expected_pkt_len;
 static int  cfg_expected_gso_size;
+static int  cfg_connect_timeout_ms;
+static int  cfg_rcv_timeout_ms;
 static struct sockaddr_storage cfg_bind_addr;
 
 static bool interrupted;
@@ -87,7 +89,7 @@ static unsigned long gettimeofday_ms(void)
 	return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
 }
 
-static void do_poll(int fd)
+static void do_poll(int fd, int timeout_ms)
 {
 	struct pollfd pfd;
 	int ret;
@@ -102,8 +104,16 @@ static void do_poll(int fd)
 			break;
 		if (ret == -1)
 			error(1, errno, "poll");
-		if (ret == 0)
-			continue;
+		if (ret == 0) {
+			if (!timeout_ms)
+				continue;
+
+			timeout_ms -= 10;
+			if (timeout_ms <= 0) {
+				interrupted = true;
+				break;
+			}
+		}
 		if (pfd.revents != POLLIN)
 			error(1, errno, "poll: 0x%x expected 0x%x\n",
 					pfd.revents, POLLIN);
@@ -134,7 +144,7 @@ static int do_socket(bool do_tcp)
 		if (listen(accept_fd, 1))
 			error(1, errno, "listen");
 
-		do_poll(accept_fd);
+		do_poll(accept_fd, cfg_connect_timeout_ms);
 		if (interrupted)
 			exit(0);
 
@@ -273,7 +283,9 @@ static void do_flush_udp(int fd)
 
 static void usage(const char *filepath)
 {
-	error(1, 0, "Usage: %s [-Grtv] [-b addr] [-p port] [-l pktlen] [-n packetnr] [-S gsosize]", filepath);
+	error(1, 0, "Usage: %s [-C connect_timeout] [-Grtv] [-b addr] [-p port]"
+	      " [-l pktlen] [-n packetnr] [-R rcv_timeout] [-S gsosize]",
+	      filepath);
 }
 
 static void parse_opts(int argc, char **argv)
@@ -282,7 +294,7 @@ static void parse_opts(int argc, char **argv)
 
 	/* bind to any by default */
 	setup_sockaddr(PF_INET6, "::", &cfg_bind_addr);
-	while ((c = getopt(argc, argv, "4b:Gl:n:p:rS:tv")) != -1) {
+	while ((c = getopt(argc, argv, "4b:C:Gl:n:p:rR:S:tv")) != -1) {
 		switch (c) {
 		case '4':
 			cfg_family = PF_INET;
@@ -292,6 +304,9 @@ static void parse_opts(int argc, char **argv)
 		case 'b':
 			setup_sockaddr(cfg_family, optarg, &cfg_bind_addr);
 			break;
+		case 'C':
+			cfg_connect_timeout_ms = strtoul(optarg, NULL, 0);
+			break;
 		case 'G':
 			cfg_gro_segment = true;
 			break;
@@ -307,6 +322,9 @@ static void parse_opts(int argc, char **argv)
 		case 'r':
 			cfg_read_all = true;
 			break;
+		case 'R':
+			cfg_rcv_timeout_ms = strtoul(optarg, NULL, 0);
+			break;
 		case 'S':
 			cfg_expected_gso_size = strtol(optarg, NULL, 0);
 			break;
@@ -329,8 +347,9 @@ static void parse_opts(int argc, char **argv)
 
 static void do_recv(void)
 {
+	int timeout_ms = cfg_tcp ? cfg_rcv_timeout_ms : cfg_connect_timeout_ms;
 	unsigned long tnow, treport;
-	int fd, loop = 0;
+	int fd;
 
 	fd = do_socket(cfg_tcp);
 
@@ -342,12 +361,7 @@ static void do_recv(void)
 
 	treport = gettimeofday_ms() + 1000;
 	do {
-		/* force termination after the second poll(); this cope both
-		 * with sender slower than receiver and missing packet errors
-		 */
-		if (cfg_expected_pkt_nr && loop++)
-			interrupted = true;
-		do_poll(fd);
+		do_poll(fd, timeout_ms);
 
 		if (cfg_tcp)
 			do_flush_tcp(fd);
@@ -365,6 +379,8 @@ static void do_recv(void)
 			treport = tnow + 1000;
 		}
 
+		timeout_ms = cfg_rcv_timeout_ms;
+
 	} while (!interrupted);
 
 	if (cfg_expected_pkt_nr && (packets != cfg_expected_pkt_nr))

From 15f3ddf53d4d26c4e338c355abffb3eaf4b3112f Mon Sep 17 00:00:00 2001
From: Dmitry Bogdanov <dmitry.bogdanov@aquantia.com>
Date: Tue, 26 Feb 2019 15:39:13 +0000
Subject: [PATCH 35/44] net: aquantia: regression on cpus with high cores: set
 mode with 8 queues

Recently the maximum number of queues was increased up to 8, but
NIC was not fully configured for 8 queues. In setups with more than 4 CPU
cores parts of TX traffic gets lost if the kernel routes it to queues 4th-8th.

This patch sets a tx hw traffic mode with 8 queues.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202651

Fixes: 71a963cfc50b ("net: aquantia: increase max number of hw queues")
Reported-by: Nicholas Johnson <nicholas.johnson@outlook.com.au>
Signed-off-by: Dmitry Bogdanov <dmitry.bogdanov@aquantia.com>
Signed-off-by: Igor Russkikh <igor.russkikh@aquantia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c   |  3 +++
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c  |  9 +++++++++
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h  |  4 ++++
 .../aquantia/atlantic/hw_atl/hw_atl_llh_internal.h  | 13 +++++++++++++
 4 files changed, 29 insertions(+)

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index b58ca7cb8e9d..fbba300c1d01 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -275,6 +275,9 @@ static int hw_atl_b0_hw_offload_set(struct aq_hw_s *self,
 
 static int hw_atl_b0_hw_init_tx_path(struct aq_hw_s *self)
 {
+	/* Tx TC/Queue number config */
+	hw_atl_rpb_tps_tx_tc_mode_set(self, 1U);
+
 	hw_atl_thm_lso_tcp_flag_of_first_pkt_set(self, 0x0FF6U);
 	hw_atl_thm_lso_tcp_flag_of_middle_pkt_set(self, 0x0FF6U);
 	hw_atl_thm_lso_tcp_flag_of_last_pkt_set(self, 0x0F7FU);
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
index 939f77e2e117..8ac7a67b15c1 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
@@ -1274,6 +1274,15 @@ void hw_atl_tpb_tx_buff_en_set(struct aq_hw_s *aq_hw, u32 tx_buff_en)
 			    HW_ATL_TPB_TX_BUF_EN_SHIFT, tx_buff_en);
 }
 
+void hw_atl_rpb_tps_tx_tc_mode_set(struct aq_hw_s *aq_hw,
+				   u32 tx_traf_class_mode)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL_TPB_TX_TC_MODE_ADDR,
+			HW_ATL_TPB_TX_TC_MODE_MSK,
+			HW_ATL_TPB_TX_TC_MODE_SHIFT,
+			tx_traf_class_mode);
+}
+
 void hw_atl_tpb_tx_buff_hi_threshold_per_tc_set(struct aq_hw_s *aq_hw,
 						u32 tx_buff_hi_threshold_per_tc,
 					 u32 buffer)
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
index 03c570d115fe..f529540bfd7e 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
@@ -605,6 +605,10 @@ void hw_atl_thm_lso_tcp_flag_of_middle_pkt_set(struct aq_hw_s *aq_hw,
 
 /* tpb */
 
+/* set TX Traffic Class Mode */
+void hw_atl_rpb_tps_tx_tc_mode_set(struct aq_hw_s *aq_hw,
+				   u32 tx_traf_class_mode);
+
 /* set tx buffer enable */
 void hw_atl_tpb_tx_buff_en_set(struct aq_hw_s *aq_hw, u32 tx_buff_en);
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
index 8470d92db812..e91ffce005f1 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
@@ -1948,6 +1948,19 @@
 /* default value of bitfield tx_buf_en */
 #define HW_ATL_TPB_TX_BUF_EN_DEFAULT 0x0
 
+/* register address for bitfield tx_tc_mode */
+#define HW_ATL_TPB_TX_TC_MODE_ADDR 0x00007900
+/* bitmask for bitfield tx_tc_mode */
+#define HW_ATL_TPB_TX_TC_MODE_MSK 0x00000100
+/* inverted bitmask for bitfield tx_tc_mode */
+#define HW_ATL_TPB_TX_TC_MODE_MSKN 0xFFFFFEFF
+/* lower bit position of bitfield tx_tc_mode */
+#define HW_ATL_TPB_TX_TC_MODE_SHIFT 8
+/* width of bitfield tx_tc_mode */
+#define HW_ATL_TPB_TX_TC_MODE_WIDTH 1
+/* default value of bitfield tx_tc_mode */
+#define HW_ATL_TPB_TX_TC_MODE_DEFAULT 0x0
+
 /* tx tx{b}_hi_thresh[c:0] bitfield definitions
  * preprocessor definitions for the bitfield "tx{b}_hi_thresh[c:0]".
  * parameter: buffer {b} | stride size 0x10 | range [0, 7]

From d25ed413d5e51644e18f66e34eec049f17a7abcb Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Tue, 26 Feb 2019 19:29:22 +0100
Subject: [PATCH 36/44] net: phy: phylink: fix uninitialized variable in
 phylink_get_mac_state

When debugging an issue I found implausible values in state->pause.
Reason in that state->pause isn't initialized and later only single
bits are changed. Also the struct itself isn't initialized in
phylink_resolve(). So better initialize state->pause and other
not yet initialized fields.

v2:
- use right function name in subject
v3:
- initialize additional fields

Fixes: 9525ae83959b ("phylink: add phylink infrastructure")
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phylink.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 938803237d7f..85987aac31c4 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -320,6 +320,10 @@ static int phylink_get_mac_state(struct phylink *pl, struct phylink_link_state *
 	linkmode_zero(state->lp_advertising);
 	state->interface = pl->link_config.interface;
 	state->an_enabled = pl->link_config.an_enabled;
+	state->speed = SPEED_UNKNOWN;
+	state->duplex = DUPLEX_UNKNOWN;
+	state->pause = MLO_PAUSE_NONE;
+	state->an_complete = 0;
 	state->link = 1;
 
 	return pl->ops->mac_link_state(ndev, state);

From 90490ef7269906423a1c1b917fc24be8b1602658 Mon Sep 17 00:00:00 2001
From: Bryan Whitehead <Bryan.Whitehead@microchip.com>
Date: Tue, 26 Feb 2019 14:06:26 -0500
Subject: [PATCH 37/44] lan743x: Fix TX Stall Issue

It has been observed that tx queue stalls while downloading
from certain web sites (example www.speedtest.net)

The cause has been tracked down to a corner case where
dma descriptors where not setup properly. And there for a tx
completion interrupt was not signaled.

This fix corrects the problem by properly marking the end of
a multi descriptor transmission.

Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver")
Signed-off-by: Bryan Whitehead <Bryan.Whitehead@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/lan743x_main.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index 310807ef328b..4d1b4a24907f 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -1400,7 +1400,8 @@ static int lan743x_tx_frame_start(struct lan743x_tx *tx,
 }
 
 static void lan743x_tx_frame_add_lso(struct lan743x_tx *tx,
-				     unsigned int frame_length)
+				     unsigned int frame_length,
+				     int nr_frags)
 {
 	/* called only from within lan743x_tx_xmit_frame.
 	 * assuming tx->ring_lock has already been acquired.
@@ -1410,6 +1411,10 @@ static void lan743x_tx_frame_add_lso(struct lan743x_tx *tx,
 
 	/* wrap up previous descriptor */
 	tx->frame_data0 |= TX_DESC_DATA0_EXT_;
+	if (nr_frags <= 0) {
+		tx->frame_data0 |= TX_DESC_DATA0_LS_;
+		tx->frame_data0 |= TX_DESC_DATA0_IOC_;
+	}
 	tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
 	tx_descriptor->data0 = tx->frame_data0;
 
@@ -1514,8 +1519,11 @@ static void lan743x_tx_frame_end(struct lan743x_tx *tx,
 	u32 tx_tail_flags = 0;
 
 	/* wrap up previous descriptor */
-	tx->frame_data0 |= TX_DESC_DATA0_LS_;
-	tx->frame_data0 |= TX_DESC_DATA0_IOC_;
+	if ((tx->frame_data0 & TX_DESC_DATA0_DTYPE_MASK_) ==
+	    TX_DESC_DATA0_DTYPE_DATA_) {
+		tx->frame_data0 |= TX_DESC_DATA0_LS_;
+		tx->frame_data0 |= TX_DESC_DATA0_IOC_;
+	}
 
 	tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
 	buffer_info = &tx->buffer_info[tx->frame_tail];
@@ -1600,7 +1608,7 @@ static netdev_tx_t lan743x_tx_xmit_frame(struct lan743x_tx *tx,
 	}
 
 	if (gso)
-		lan743x_tx_frame_add_lso(tx, frame_length);
+		lan743x_tx_frame_add_lso(tx, frame_length, nr_frags);
 
 	if (nr_frags <= 0)
 		goto finish;

From d1a2930d8a992fb6ac2529449f81a0056e1b98d1 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Fri, 1 Mar 2019 22:58:09 +0000
Subject: [PATCH 38/44] MIPS: eBPF: Fix icache flush end address

The MIPS eBPF JIT calls flush_icache_range() in order to ensure the
icache observes the code that we just wrote. Unfortunately it gets the
end address calculation wrong due to some bad pointer arithmetic.

The struct jit_ctx target field is of type pointer to u32, and as such
adding one to it will increment the address being pointed to by 4 bytes.
Therefore in order to find the address of the end of the code we simply
need to add the number of 4 byte instructions emitted, but we mistakenly
add the number of instructions multiplied by 4. This results in the call
to flush_icache_range() operating on a memory region 4x larger than
intended, which is always wasteful and can cause crashes if we overrun
into an unmapped page.

Fix this by correcting the pointer arithmetic to remove the bogus
multiplication, and use braces to remove the need for a set of brackets
whilst also making it obvious that the target field is a pointer.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Fixes: b6bd53f9c4e8 ("MIPS: Add missing file for eBPF JIT.")
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Martin KaFai Lau <kafai@fb.com>
Cc: Song Liu <songliubraving@fb.com>
Cc: Yonghong Song <yhs@fb.com>
Cc: netdev@vger.kernel.org
Cc: bpf@vger.kernel.org
Cc: linux-mips@vger.kernel.org
Cc: stable@vger.kernel.org # v4.13+
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 arch/mips/net/ebpf_jit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c
index 76e9bf88d3b9..0effd3cba9a7 100644
--- a/arch/mips/net/ebpf_jit.c
+++ b/arch/mips/net/ebpf_jit.c
@@ -1819,7 +1819,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 
 	/* Update the icache */
 	flush_icache_range((unsigned long)ctx.target,
-			   (unsigned long)(ctx.target + ctx.idx * sizeof(u32)));
+			   (unsigned long)&ctx.target[ctx.idx]);
 
 	if (bpf_jit_enable > 1)
 		/* Dump JIT code */

From 5e1a99eae84999a2536f50a0beaf5d5262337f40 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 27 Feb 2019 16:15:29 +0800
Subject: [PATCH 39/44] ipv4: Add ICMPv6 support when parse route ipproto

For ip rules, we need to use 'ipproto ipv6-icmp' to match ICMPv6 headers.
But for ip -6 route, currently we only support tcp, udp and icmp.

Add ICMPv6 support so we can match ipv6-icmp rules for route lookup.

v2: As David Ahern and Sabrina Dubroca suggested, Add an argument to
rtm_getroute_parse_ip_proto() to handle ICMP/ICMPv6 with different family.

Reported-by: Jianlin Shi <jishi@redhat.com>
Fixes: eacb9384a3fe ("ipv6: support sport, dport and ip_proto in RTM_GETROUTE")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h   |  2 +-
 net/ipv4/netlink.c | 19 ++++++++++++++-----
 net/ipv4/route.c   |  2 +-
 net/ipv6/route.c   |  3 ++-
 4 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index f0e8d064e249..be3cad9c2e4c 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -718,7 +718,7 @@ extern int sysctl_icmp_msgs_burst;
 int ip_misc_proc_init(void);
 #endif
 
-int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto,
+int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, u8 family,
 				struct netlink_ext_ack *extack);
 
 #endif	/* _IP_H */
diff --git a/net/ipv4/netlink.c b/net/ipv4/netlink.c
index f86bb4f06609..d8e3a1fb8e82 100644
--- a/net/ipv4/netlink.c
+++ b/net/ipv4/netlink.c
@@ -3,9 +3,10 @@
 #include <linux/types.h>
 #include <net/net_namespace.h>
 #include <net/netlink.h>
+#include <linux/in6.h>
 #include <net/ip.h>
 
-int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto,
+int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, u8 family,
 				struct netlink_ext_ack *extack)
 {
 	*ip_proto = nla_get_u8(attr);
@@ -13,11 +14,19 @@ int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto,
 	switch (*ip_proto) {
 	case IPPROTO_TCP:
 	case IPPROTO_UDP:
-	case IPPROTO_ICMP:
 		return 0;
-	default:
-		NL_SET_ERR_MSG(extack, "Unsupported ip proto");
-		return -EOPNOTSUPP;
+	case IPPROTO_ICMP:
+		if (family != AF_INET)
+			break;
+		return 0;
+#if IS_ENABLED(CONFIG_IPV6)
+	case IPPROTO_ICMPV6:
+		if (family != AF_INET6)
+			break;
+		return 0;
+#endif
 	}
+	NL_SET_ERR_MSG(extack, "Unsupported ip proto");
+	return -EOPNOTSUPP;
 }
 EXPORT_SYMBOL_GPL(rtm_getroute_parse_ip_proto);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 5163b64f8fb3..7bb9128c8363 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2803,7 +2803,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 
 	if (tb[RTA_IP_PROTO]) {
 		err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
-						  &ip_proto, extack);
+						  &ip_proto, AF_INET, extack);
 		if (err)
 			return err;
 	}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b7a620023a52..8dad1d690b78 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -4893,7 +4893,8 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 
 	if (tb[RTA_IP_PROTO]) {
 		err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
-						  &fl6.flowi6_proto, extack);
+						  &fl6.flowi6_proto, AF_INET6,
+						  extack);
 		if (err)
 			goto errout;
 	}

From 3612af783cf52c74a031a2f11b82247b2599d3cd Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 1 Mar 2019 22:05:29 +0100
Subject: [PATCH 40/44] bpf: fix sanitation rewrite in case of non-pointers

Marek reported that he saw an issue with the below snippet in that
timing measurements where off when loaded as unpriv while results
were reasonable when loaded as privileged:

    [...]
    uint64_t a = bpf_ktime_get_ns();
    uint64_t b = bpf_ktime_get_ns();
    uint64_t delta = b - a;
    if ((int64_t)delta > 0) {
    [...]

Turns out there is a bug where a corner case is missing in the fix
d3bd7413e0ca ("bpf: fix sanitation of alu op with pointer / scalar
type from different paths"), namely fixup_bpf_calls() only checks
whether aux has a non-zero alu_state, but it also needs to test for
the case of BPF_ALU_NON_POINTER since in both occasions we need to
skip the masking rewrite (as there is nothing to mask).

Fixes: d3bd7413e0ca ("bpf: fix sanitation of alu op with pointer / scalar type from different paths")
Reported-by: Marek Majkowski <marek@cloudflare.com>
Reported-by: Arthur Fabre <afabre@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/netdev/CAJPywTJqP34cK20iLM5YmUMz9KXQOdu1-+BZrGMAGgLuBWz7fg@mail.gmail.com/T/
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8f295b790297..5fcce2f4209d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6920,7 +6920,8 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 			u32 off_reg;
 
 			aux = &env->insn_aux_data[i + delta];
-			if (!aux->alu_state)
+			if (!aux->alu_state ||
+			    aux->alu_state == BPF_ALU_NON_POINTER)
 				continue;
 
 			isneg = aux->alu_state & BPF_ALU_NEG_VALUE;

From ed8fe20205ac054bf585156709de3913d1890f30 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 28 Feb 2019 07:39:15 +0100
Subject: [PATCH 41/44] net: dsa: mv88e6xxx: prevent interrupt storm caused by
 mv88e6390x_port_set_cmode

When debugging another issue I faced an interrupt storm in this
driver (88E6390, port 9 in SGMII mode), consisting of alternating
link-up / link-down interrupts. Analysis showed that the driver
wanted to set a cmode that was set already. But so far
mv88e6390x_port_set_cmode() doesn't check this and powers down
SERDES, what causes the link to break, and eventually results in
the described interrupt storm.

Fix this by checking whether the cmode actually changes. We want
that the very first call to mv88e6390x_port_set_cmode() always
configures the registers, therefore initialize port.cmode with
a value that is different from any supported cmode value.
We have to take care that we only init the ports cmode once
chip->info->num_ports is set.

v2:
- add small helper and init the number of actual ports only

Fixes: 364e9d7776a3 ("net: dsa: mv88e6xxx: Power on/off SERDES on cmode change")
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 10 ++++++++++
 drivers/net/dsa/mv88e6xxx/port.c |  4 ++++
 drivers/net/dsa/mv88e6xxx/port.h |  1 +
 3 files changed, 15 insertions(+)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 9d36d2b5fbc6..f5cad42875e9 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -4595,6 +4595,14 @@ static int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip,
 	return 0;
 }
 
+static void mv88e6xxx_ports_cmode_init(struct mv88e6xxx_chip *chip)
+{
+	int i;
+
+	for (i = 0; i < mv88e6xxx_num_ports(chip); i++)
+		chip->ports[i].cmode = MV88E6XXX_PORT_STS_CMODE_INVALID;
+}
+
 static enum dsa_tag_protocol mv88e6xxx_get_tag_protocol(struct dsa_switch *ds,
 							int port)
 {
@@ -4631,6 +4639,8 @@ static const char *mv88e6xxx_drv_probe(struct device *dsa_dev,
 	if (err)
 		goto free;
 
+	mv88e6xxx_ports_cmode_init(chip);
+
 	mutex_lock(&chip->reg_lock);
 	err = mv88e6xxx_switch_reset(chip);
 	mutex_unlock(&chip->reg_lock);
diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c
index 4b02c96f6786..79ab51e69aee 100644
--- a/drivers/net/dsa/mv88e6xxx/port.c
+++ b/drivers/net/dsa/mv88e6xxx/port.c
@@ -398,6 +398,10 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
 		cmode = 0;
 	}
 
+	/* cmode doesn't change, nothing to do for us */
+	if (cmode == chip->ports[port].cmode)
+		return 0;
+
 	lane = mv88e6390x_serdes_get_lane(chip, port);
 	if (lane < 0)
 		return lane;
diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h
index e583641de758..4aadf321edb7 100644
--- a/drivers/net/dsa/mv88e6xxx/port.h
+++ b/drivers/net/dsa/mv88e6xxx/port.h
@@ -52,6 +52,7 @@
 #define MV88E6185_PORT_STS_CMODE_1000BASE_X	0x0005
 #define MV88E6185_PORT_STS_CMODE_PHY		0x0006
 #define MV88E6185_PORT_STS_CMODE_DISABLED	0x0007
+#define MV88E6XXX_PORT_STS_CMODE_INVALID	0xff
 
 /* Offset 0x01: MAC (or PCS or Physical) Control Register */
 #define MV88E6XXX_PORT_MAC_CTL				0x01

From cf1c9ccba7308e48a68fa77f476287d9d614e4c7 Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Thu, 28 Feb 2019 14:56:04 +0100
Subject: [PATCH 42/44] geneve: correctly handle ipv6.disable module parameter

When IPv6 is compiled but disabled at runtime, geneve_sock_add returns
-EAFNOSUPPORT. For metadata based tunnels, this causes failure of the whole
operation of bringing up the tunnel.

Ignore failure of IPv6 socket creation for metadata based tunnels caused by
IPv6 not being available.

This is the same fix as what commit d074bf960044 ("vxlan: correctly handle
ipv6.disable module parameter") is doing for vxlan.

Note there's also commit c0a47e44c098 ("geneve: should not call rt6_lookup()
when ipv6 was disabled") which fixes a similar issue but for regular
tunnels, while this patch is needed for metadata based tunnels.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/geneve.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 3377ac66a347..5583d993480d 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -692,15 +692,20 @@ out:
 static int geneve_open(struct net_device *dev)
 {
 	struct geneve_dev *geneve = netdev_priv(dev);
-	bool ipv6 = !!(geneve->info.mode & IP_TUNNEL_INFO_IPV6);
 	bool metadata = geneve->collect_md;
+	bool ipv4, ipv6;
 	int ret = 0;
 
+	ipv6 = geneve->info.mode & IP_TUNNEL_INFO_IPV6 || metadata;
+	ipv4 = !ipv6 || metadata;
 #if IS_ENABLED(CONFIG_IPV6)
-	if (ipv6 || metadata)
+	if (ipv6) {
 		ret = geneve_sock_add(geneve, true);
+		if (ret < 0 && ret != -EAFNOSUPPORT)
+			ipv4 = false;
+	}
 #endif
-	if (!ret && (!ipv6 || metadata))
+	if (ipv4)
 		ret = geneve_sock_add(geneve, false);
 	if (ret < 0)
 		geneve_sock_release(geneve);

From a6da21bb0eae459a375d5bd48baed821d14301d0 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Fri, 1 Mar 2019 23:43:39 +0100
Subject: [PATCH 43/44] net: dsa: mv88e6xxx: Fix statistics on mv88e6161

Despite what the datesheet says, the silicon implements the older way
of snapshoting the statistics. Change the op.

Reported-by: Chris.Healy@zii.aero
Tested-by: Chris.Healy@zii.aero
Fixes: 0ac64c394900 ("net: dsa: mv88e6xxx: mv88e6161 uses mv88e6320 stats snapshot")
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index f5cad42875e9..7e3c00bd9532 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -3093,7 +3093,7 @@ static const struct mv88e6xxx_ops mv88e6161_ops = {
 	.port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
 	.port_link_state = mv88e6352_port_link_state,
 	.port_get_cmode = mv88e6185_port_get_cmode,
-	.stats_snapshot = mv88e6320_g1_stats_snapshot,
+	.stats_snapshot = mv88e6xxx_g1_stats_snapshot,
 	.stats_set_histogram = mv88e6095_g1_stats_set_histogram,
 	.stats_get_sset_count = mv88e6095_stats_get_sset_count,
 	.stats_get_strings = mv88e6095_stats_get_strings,

From 07f12b26e21ab359261bf75cfcb424fdc7daeb6d Mon Sep 17 00:00:00 2001
From: Mao Wenan <maowenan@huawei.com>
Date: Fri, 1 Mar 2019 23:06:40 +0800
Subject: [PATCH 44/44] net: sit: fix memory leak in sit_init_net()

If register_netdev() is failed to register sitn->fb_tunnel_dev,
it will go to err_reg_dev and forget to free netdev(sitn->fb_tunnel_dev).

BUG: memory leak
unreferenced object 0xffff888378daad00 (size 512):
  comm "syz-executor.1", pid 4006, jiffies 4295121142 (age 16.115s)
  hex dump (first 32 bytes):
    00 e6 ed c0 83 88 ff ff 00 00 00 00 00 00 00 00  ................
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
backtrace:
    [<00000000d6dcb63e>] kvmalloc include/linux/mm.h:577 [inline]
    [<00000000d6dcb63e>] kvzalloc include/linux/mm.h:585 [inline]
    [<00000000d6dcb63e>] netif_alloc_netdev_queues net/core/dev.c:8380 [inline]
    [<00000000d6dcb63e>] alloc_netdev_mqs+0x600/0xcc0 net/core/dev.c:8970
    [<00000000867e172f>] sit_init_net+0x295/0xa40 net/ipv6/sit.c:1848
    [<00000000871019fa>] ops_init+0xad/0x3e0 net/core/net_namespace.c:129
    [<00000000319507f6>] setup_net+0x2ba/0x690 net/core/net_namespace.c:314
    [<0000000087db4f96>] copy_net_ns+0x1dc/0x330 net/core/net_namespace.c:437
    [<0000000057efc651>] create_new_namespaces+0x382/0x730 kernel/nsproxy.c:107
    [<00000000676f83de>] copy_namespaces+0x2ed/0x3d0 kernel/nsproxy.c:165
    [<0000000030b74bac>] copy_process.part.27+0x231e/0x6db0 kernel/fork.c:1919
    [<00000000fff78746>] copy_process kernel/fork.c:1713 [inline]
    [<00000000fff78746>] _do_fork+0x1bc/0xe90 kernel/fork.c:2224
    [<000000001c2e0d1c>] do_syscall_64+0xc8/0x580 arch/x86/entry/common.c:290
    [<00000000ec48bd44>] entry_SYSCALL_64_after_hwframe+0x49/0xbe
    [<0000000039acff8a>] 0xffffffffffffffff

Signed-off-by: Mao Wenan <maowenan@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index e8a1dabef803..09e440e8dfae 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1873,6 +1873,7 @@ static int __net_init sit_init_net(struct net *net)
 
 err_reg_dev:
 	ipip6_dev_free(sitn->fb_tunnel_dev);
+	free_netdev(sitn->fb_tunnel_dev);
 err_alloc_dev:
 	return err;
 }