From 4288ea006c73e37c2a4f60dfaef20dd167b8df31 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Fri, 28 Sep 2018 14:33:21 +0100
Subject: [PATCH 01/26] bpf: harden flags check in cgroup_storage_update_elem()

cgroup_storage_update_elem() shouldn't accept any flags
argument values except BPF_ANY and BPF_EXIST to guarantee
the backward compatibility, had a new flag value been added.

Fixes: de9cbbaadba5 ("bpf: introduce cgroup storage maps")
Signed-off-by: Roman Gushchin <guro@fb.com>
Reported-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/local_storage.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 22ad967d1e5f..94126cbffc88 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -129,7 +129,7 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
 	struct bpf_cgroup_storage *storage;
 	struct bpf_storage_buffer *new;
 
-	if (flags & BPF_NOEXIST)
+	if (flags != BPF_ANY && flags != BPF_EXIST)
 		return -EINVAL;
 
 	storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map,

From 848e616e66d4592fe9afc40743d3504deb7632b4 Mon Sep 17 00:00:00 2001
From: Stefan Seyfried <seife+kernel@b1-systems.com>
Date: Sun, 30 Sep 2018 12:53:00 +0200
Subject: [PATCH 02/26] cfg80211: fix wext-compat memory leak

cfg80211_wext_giwrate and sinfo.pertid might allocate sinfo.pertid via
rdev_get_station(), but never release it. Fix that.

Fixes: 8689c051a201 ("cfg80211: dynamically allocate per-tid stats for station info")
Signed-off-by: Stefan Seyfried <seife+kernel@b1-systems.com>
[johannes: fix error path, use cfg80211_sinfo_release_content(), add Fixes]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/wext-compat.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 167f7025ac98..06943d9c9835 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1278,12 +1278,16 @@ static int cfg80211_wext_giwrate(struct net_device *dev,
 	if (err)
 		return err;
 
-	if (!(sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE)))
-		return -EOPNOTSUPP;
+	if (!(sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE))) {
+		err = -EOPNOTSUPP;
+		goto free;
+	}
 
 	rate->value = 100000 * cfg80211_calculate_bitrate(&sinfo.txrate);
 
-	return 0;
+free:
+	cfg80211_sinfo_release_content(&sinfo);
+	return err;
 }
 
 /* Get wireless statistics.  Called by /proc/net/wireless and by SIOCGIWSTATS */
@@ -1293,7 +1297,7 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	/* we are under RTNL - globally locked - so can use static structs */
 	static struct iw_statistics wstats;
-	static struct station_info sinfo;
+	static struct station_info sinfo = {};
 	u8 bssid[ETH_ALEN];
 
 	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION)
@@ -1352,6 +1356,8 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
 	if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_FAILED))
 		wstats.discard.retries = sinfo.tx_failed;
 
+	cfg80211_sinfo_release_content(&sinfo);
+
 	return &wstats;
 }
 

From 211710ca74adf790b46ab3867fcce8047b573cd1 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Sat, 29 Sep 2018 16:01:58 +0200
Subject: [PATCH 03/26] mac80211: fix setting IEEE80211_KEY_FLAG_RX_MGMT for AP
 mode keys

key->sta is only valid after ieee80211_key_link, which is called later
in this function. Because of that, the IEEE80211_KEY_FLAG_RX_MGMT is
never set when management frame protection is enabled.

Fixes: e548c49e6dc6b ("mac80211: add key flag for management keys")
Cc: stable@vger.kernel.org
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/cfg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index d25da0e66da1..5d22eda8a6b1 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -427,7 +427,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 	case NL80211_IFTYPE_AP:
 	case NL80211_IFTYPE_AP_VLAN:
 		/* Keys without a station are used for TX only */
-		if (key->sta && test_sta_flag(key->sta, WLAN_STA_MFP))
+		if (sta && test_sta_flag(sta, WLAN_STA_MFP))
 			key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT;
 		break;
 	case NL80211_IFTYPE_ADHOC:

From 1db58529454742f67ebd96e3588315e880b72837 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Thu, 27 Sep 2018 17:05:04 -0600
Subject: [PATCH 04/26] cfg80211: fix use-after-free in reg_process_hint()

reg_process_hint_country_ie() can free regulatory_request and return
REG_REQ_ALREADY_SET. We shouldn't use regulatory_request after it's
called. KASAN error was observed when this happens.

BUG: KASAN: use-after-free in reg_process_hint+0x839/0x8aa [cfg80211]
Read of size 4 at addr ffff8800c430d434 by task kworker/1:3/89
<snipped>
Workqueue: events reg_todo [cfg80211]
Call Trace:
 dump_stack+0xc1/0x10c
 ? _atomic_dec_and_lock+0x1ad/0x1ad
 ? _raw_spin_lock_irqsave+0xa0/0xd2
 print_address_description+0x86/0x26f
 ? reg_process_hint+0x839/0x8aa [cfg80211]
 kasan_report+0x241/0x29b
 reg_process_hint+0x839/0x8aa [cfg80211]
 reg_todo+0x204/0x5b9 [cfg80211]
 process_one_work+0x55f/0x8d0
 ? worker_detach_from_pool+0x1b5/0x1b5
 ? _raw_spin_unlock_irq+0x65/0xdd
 ? _raw_spin_unlock_irqrestore+0xf3/0xf3
 worker_thread+0x5dd/0x841
 ? kthread_parkme+0x1d/0x1d
 kthread+0x270/0x285
 ? pr_cont_work+0xe3/0xe3
 ? rcu_read_unlock_sched_notrace+0xca/0xca
 ret_from_fork+0x22/0x40

Allocated by task 2718:
 set_track+0x63/0xfa
 __kmalloc+0x119/0x1ac
 regulatory_hint_country_ie+0x38/0x329 [cfg80211]
 __cfg80211_connect_result+0x854/0xadd [cfg80211]
 cfg80211_rx_assoc_resp+0x3bc/0x4f0 [cfg80211]
smsc95xx v1.0.6
 ieee80211_sta_rx_queued_mgmt+0x1803/0x7ed5 [mac80211]
 ieee80211_iface_work+0x411/0x696 [mac80211]
 process_one_work+0x55f/0x8d0
 worker_thread+0x5dd/0x841
 kthread+0x270/0x285
 ret_from_fork+0x22/0x40

Freed by task 89:
 set_track+0x63/0xfa
 kasan_slab_free+0x6a/0x87
 kfree+0xdc/0x470
 reg_process_hint+0x31e/0x8aa [cfg80211]
 reg_todo+0x204/0x5b9 [cfg80211]
 process_one_work+0x55f/0x8d0
 worker_thread+0x5dd/0x841
 kthread+0x270/0x285
 ret_from_fork+0x22/0x40
<snipped>

Signed-off-by: Yu Zhao <yuzhao@google.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/reg.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 765dedb12361..24cfa2776f50 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2661,11 +2661,12 @@ static void reg_process_hint(struct regulatory_request *reg_request)
 {
 	struct wiphy *wiphy = NULL;
 	enum reg_request_treatment treatment;
+	enum nl80211_reg_initiator initiator = reg_request->initiator;
 
 	if (reg_request->wiphy_idx != WIPHY_IDX_INVALID)
 		wiphy = wiphy_idx_to_wiphy(reg_request->wiphy_idx);
 
-	switch (reg_request->initiator) {
+	switch (initiator) {
 	case NL80211_REGDOM_SET_BY_CORE:
 		treatment = reg_process_hint_core(reg_request);
 		break;
@@ -2683,7 +2684,7 @@ static void reg_process_hint(struct regulatory_request *reg_request)
 		treatment = reg_process_hint_country_ie(wiphy, reg_request);
 		break;
 	default:
-		WARN(1, "invalid initiator %d\n", reg_request->initiator);
+		WARN(1, "invalid initiator %d\n", initiator);
 		goto out_free;
 	}
 
@@ -2698,7 +2699,7 @@ static void reg_process_hint(struct regulatory_request *reg_request)
 	 */
 	if (treatment == REG_REQ_ALREADY_SET && wiphy &&
 	    wiphy->regulatory_flags & REGULATORY_STRICT_REG) {
-		wiphy_update_regulatory(wiphy, reg_request->initiator);
+		wiphy_update_regulatory(wiphy, initiator);
 		wiphy_all_share_dfs_chan_state(wiphy);
 		reg_check_channels();
 	}

From b0584ea66d73919cbf5878a3420a837f06ab8396 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Tue, 2 Oct 2018 02:41:53 +0000
Subject: [PATCH 05/26] bpf: don't accept cgroup local storage with zero value
 size

Explicitly forbid creating cgroup local storage maps with zero value
size, as it makes no sense and might even cause a panic.

Reported-by: syzbot+18628320d3b14a5c459c@syzkaller.appspotmail.com
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/local_storage.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 94126cbffc88..830d7f095748 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -195,6 +195,9 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
 	if (attr->key_size != sizeof(struct bpf_cgroup_storage_key))
 		return ERR_PTR(-EINVAL);
 
+	if (attr->value_size == 0)
+		return ERR_PTR(-EINVAL);
+
 	if (attr->value_size > PAGE_SIZE)
 		return ERR_PTR(-E2BIG);
 

From f3c84a8e3e922afdcbc55f04df8fdf8a548f5a21 Mon Sep 17 00:00:00 2001
From: Nir Dotan <nird@mellanox.com>
Date: Thu, 4 Oct 2018 15:48:02 +0000
Subject: [PATCH 06/26] mlxsw: pci: Derive event type from event queue number

Due to a hardware issue in Spectrum-2, the field event_type of the event
queue element (EQE) has become reserved. It was used to distinguish between
command interface completion events and completion events.

Use queue number to determine event type, as command interface completion
events are always received on EQ0 and mlxsw driver maps completion events
to EQ1.

Fixes: c3ab435466d5 ("mlxsw: spectrum: Extend to support Spectrum-2 ASIC")
Signed-off-by: Nir Dotan <nird@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/pci.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 4d271fb3de3d..5890fdfd62c3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -718,14 +718,17 @@ static void mlxsw_pci_eq_tasklet(unsigned long data)
 	memset(&active_cqns, 0, sizeof(active_cqns));
 
 	while ((eqe = mlxsw_pci_eq_sw_eqe_get(q))) {
-		u8 event_type = mlxsw_pci_eqe_event_type_get(eqe);
 
-		switch (event_type) {
-		case MLXSW_PCI_EQE_EVENT_TYPE_CMD:
+		/* Command interface completion events are always received on
+		 * queue MLXSW_PCI_EQ_ASYNC_NUM (EQ0) and completion events
+		 * are mapped to queue MLXSW_PCI_EQ_COMP_NUM (EQ1).
+		 */
+		switch (q->num) {
+		case MLXSW_PCI_EQ_ASYNC_NUM:
 			mlxsw_pci_eq_cmd_event(mlxsw_pci, eqe);
 			q->u.eq.ev_cmd_count++;
 			break;
-		case MLXSW_PCI_EQE_EVENT_TYPE_COMP:
+		case MLXSW_PCI_EQ_COMP_NUM:
 			cqn = mlxsw_pci_eqe_cqn_get(eqe);
 			set_bit(cqn, active_cqns);
 			cq_handle = true;

From c360867ec46a4ec5cb19e5c329d65dff522cc69d Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 4 Oct 2018 15:48:03 +0000
Subject: [PATCH 07/26] mlxsw: spectrum: Delete RIF when VLAN device is removed

In commit 602b74eda813 ("mlxsw: spectrum_switchdev: Do not leak RIFs
when removing bridge") I handled the case where RIFs created for VLAN
devices were not properly cleaned up when their real device (a bridge)
was removed.

However, I forgot to handle the case of the VLAN device itself being
removed. Do so now when the VLAN device is being unlinked from its real
device.

Fixes: 99f44bb3527b ("mlxsw: spectrum: Enable L3 interfaces on top of bridge devices")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Reported-by: Artem Shvorin <art@qrator.net>
Tested-by: Artem Shvorin <art@qrator.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index b492152c8881..30bb2c533cec 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4845,6 +4845,8 @@ static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev,
 		upper_dev = info->upper_dev;
 		if (info->linking)
 			break;
+		if (is_vlan_dev(upper_dev))
+			mlxsw_sp_rif_destroy_by_dev(mlxsw_sp, upper_dev);
 		if (netif_is_macvlan(upper_dev))
 			mlxsw_sp_rif_macvlan_del(mlxsw_sp, upper_dev);
 		break;

From f7b2a56e1f3dcbdb4cf09b2b63e859ffe0e09df8 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Fri, 28 Sep 2018 17:04:30 -0600
Subject: [PATCH 08/26] net/usb: cancel pending work when unbinding smsc75xx

Cancel pending work before freeing smsc75xx private data structure
during binding. This fixes the following crash in the driver:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000050
IP: mutex_lock+0x2b/0x3f
<snipped>
Workqueue: events smsc75xx_deferred_multicast_write [smsc75xx]
task: ffff8caa83e85700 task.stack: ffff948b80518000
RIP: 0010:mutex_lock+0x2b/0x3f
<snipped>
Call Trace:
 smsc75xx_deferred_multicast_write+0x40/0x1af [smsc75xx]
 process_one_work+0x18d/0x2fc
 worker_thread+0x1a2/0x269
 ? pr_cont_work+0x58/0x58
 kthread+0xfa/0x10a
 ? pr_cont_work+0x58/0x58
 ? rcu_read_unlock_sched_notrace+0x48/0x48
 ret_from_fork+0x22/0x40

Signed-off-by: Yu Zhao <yuzhao@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/smsc75xx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index e5a4cbb366dc..ec287c9741e8 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -1520,6 +1520,7 @@ static void smsc75xx_unbind(struct usbnet *dev, struct usb_interface *intf)
 {
 	struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]);
 	if (pdata) {
+		cancel_work_sync(&pdata->set_multicast);
 		netif_dbg(dev, ifdown, dev->net, "free pdata\n");
 		kfree(pdata);
 		pdata = NULL;

From 471b83bd8bbe4e89743683ef8ecb78f7029d8288 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Mon, 1 Oct 2018 12:21:59 +0300
Subject: [PATCH 09/26] team: Forbid enslaving team device to itself

team's ndo_add_slave() acquires 'team->lock' and later tries to open the
newly enslaved device via dev_open(). This emits a 'NETDEV_UP' event
that causes the VLAN driver to add VLAN 0 on the team device. team's
ndo_vlan_rx_add_vid() will also try to acquire 'team->lock' and
deadlock.

Fix this by checking early at the enslavement function that a team
device is not being enslaved to itself.

A similar check was added to the bond driver in commit 09a89c219baf
("bonding: disallow enslaving a bond to itself").

WARNING: possible recursive locking detected
4.18.0-rc7+ #176 Not tainted
--------------------------------------------
syz-executor4/6391 is trying to acquire lock:
(____ptrval____) (&team->lock){+.+.}, at: team_vlan_rx_add_vid+0x3b/0x1e0 drivers/net/team/team.c:1868

but task is already holding lock:
(____ptrval____) (&team->lock){+.+.}, at: team_add_slave+0xdb/0x1c30 drivers/net/team/team.c:1947

other info that might help us debug this:
 Possible unsafe locking scenario:

       CPU0
       ----
  lock(&team->lock);
  lock(&team->lock);

 *** DEADLOCK ***

 May be due to missing lock nesting notation

2 locks held by syz-executor4/6391:
 #0: (____ptrval____) (rtnl_mutex){+.+.}, at: rtnl_lock net/core/rtnetlink.c:77 [inline]
 #0: (____ptrval____) (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x412/0xc30 net/core/rtnetlink.c:4662
 #1: (____ptrval____) (&team->lock){+.+.}, at: team_add_slave+0xdb/0x1c30 drivers/net/team/team.c:1947

stack backtrace:
CPU: 1 PID: 6391 Comm: syz-executor4 Not tainted 4.18.0-rc7+ #176
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113
 print_deadlock_bug kernel/locking/lockdep.c:1765 [inline]
 check_deadlock kernel/locking/lockdep.c:1809 [inline]
 validate_chain kernel/locking/lockdep.c:2405 [inline]
 __lock_acquire.cold.64+0x1fb/0x486 kernel/locking/lockdep.c:3435
 lock_acquire+0x1e4/0x540 kernel/locking/lockdep.c:3924
 __mutex_lock_common kernel/locking/mutex.c:757 [inline]
 __mutex_lock+0x176/0x1820 kernel/locking/mutex.c:894
 mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:909
 team_vlan_rx_add_vid+0x3b/0x1e0 drivers/net/team/team.c:1868
 vlan_add_rx_filter_info+0x14a/0x1d0 net/8021q/vlan_core.c:210
 __vlan_vid_add net/8021q/vlan_core.c:278 [inline]
 vlan_vid_add+0x63e/0x9d0 net/8021q/vlan_core.c:308
 vlan_device_event.cold.12+0x2a/0x2f net/8021q/vlan.c:381
 notifier_call_chain+0x180/0x390 kernel/notifier.c:93
 __raw_notifier_call_chain kernel/notifier.c:394 [inline]
 raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401
 call_netdevice_notifiers_info+0x3f/0x90 net/core/dev.c:1735
 call_netdevice_notifiers net/core/dev.c:1753 [inline]
 dev_open+0x173/0x1b0 net/core/dev.c:1433
 team_port_add drivers/net/team/team.c:1219 [inline]
 team_add_slave+0xa8b/0x1c30 drivers/net/team/team.c:1948
 do_set_master+0x1c9/0x220 net/core/rtnetlink.c:2248
 do_setlink+0xba4/0x3e10 net/core/rtnetlink.c:2382
 rtnl_setlink+0x2a9/0x400 net/core/rtnetlink.c:2636
 rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4665
 netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2455
 rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4683
 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
 netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343
 netlink_sendmsg+0xa18/0xfd0 net/netlink/af_netlink.c:1908
 sock_sendmsg_nosec net/socket.c:642 [inline]
 sock_sendmsg+0xd5/0x120 net/socket.c:652
 ___sys_sendmsg+0x7fd/0x930 net/socket.c:2126
 __sys_sendmsg+0x11d/0x290 net/socket.c:2164
 __do_sys_sendmsg net/socket.c:2173 [inline]
 __se_sys_sendmsg net/socket.c:2171 [inline]
 __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2171
 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x456b29
Code: fd b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007f9706bf8c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00007f9706bf96d4 RCX: 0000000000456b29
RDX: 0000000000000000 RSI: 0000000020000240 RDI: 0000000000000004
RBP: 00000000009300a0 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff
R13: 00000000004d3548 R14: 00000000004c8227 R15: 0000000000000000

Fixes: 87002b03baab ("net: introduce vlan_vid_[add/del] and use them instead of direct [add/kill]_vid ndo calls")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reported-and-tested-by: syzbot+bd051aba086537515cdb@syzkaller.appspotmail.com
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 6a047d30e8c6..d887016e54b6 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1167,6 +1167,12 @@ static int team_port_add(struct team *team, struct net_device *port_dev,
 		return -EBUSY;
 	}
 
+	if (dev == port_dev) {
+		NL_SET_ERR_MSG(extack, "Cannot enslave team device to itself");
+		netdev_err(dev, "Cannot enslave team device to itself\n");
+		return -EINVAL;
+	}
+
 	if (port_dev->features & NETIF_F_VLAN_CHALLENGED &&
 	    vlan_uses_dev(dev)) {
 		NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team device has VLAN set up");

From dbe80d446c859873820eedfff4abc61c71f1927b Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Fri, 5 Oct 2018 00:26:00 -0400
Subject: [PATCH 10/26] bnxt_en: Fix VNIC reservations on the PF.

The enables bit for VNIC was set wrong when calling the HWRM_FUNC_CFG
firmware call to reserve VNICs.  This has the effect that the firmware
will keep a large number of VNICs for the PF, and having very few for
VFs.  DPDK driver running on the VFs, which requires more VNICs, may not
work properly as a result.

Fixes: 674f50a5b026 ("bnxt_en: Implement new method to reserve rings.")
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 0478e562abac..2564a92dcb02 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -4650,7 +4650,7 @@ __bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, struct hwrm_func_cfg_input *req,
 				      FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
 		enables |= ring_grps ?
 			   FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0;
-		enables |= vnics ? FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS : 0;
+		enables |= vnics ? FUNC_CFG_REQ_ENABLES_NUM_VNICS : 0;
 
 		req->num_rx_rings = cpu_to_le16(rx_rings);
 		req->num_hw_ring_grps = cpu_to_le16(ring_grps);

From 5db0e0969af6501ad45fe0494039d3b9c797822b Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Date: Fri, 5 Oct 2018 00:26:01 -0400
Subject: [PATCH 11/26] bnxt_en: Fix enables field in HWRM_QUEUE_COS2BW_CFG
 request

In HWRM_QUEUE_COS2BW_CFG request, enables field should have the bits
set only for the queue ids which are having the valid parameters.

This causes firmware to return error when the TC to hardware CoS queue
mapping is not 1:1 during DCBNL ETS setup.

Fixes: 2e8ef77ee0ff ("bnxt_en: Add TC to hardware QoS queue mapping logic.")
Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
index ddc98c359488..a85d2be986af 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
@@ -98,13 +98,13 @@ static int bnxt_hwrm_queue_cos2bw_cfg(struct bnxt *bp, struct ieee_ets *ets,
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_COS2BW_CFG, -1, -1);
 	for (i = 0; i < max_tc; i++) {
-		u8 qidx;
+		u8 qidx = bp->tc_to_qidx[i];
 
 		req.enables |= cpu_to_le32(
-			QUEUE_COS2BW_CFG_REQ_ENABLES_COS_QUEUE_ID0_VALID << i);
+			QUEUE_COS2BW_CFG_REQ_ENABLES_COS_QUEUE_ID0_VALID <<
+			qidx);
 
 		memset(&cos2bw, 0, sizeof(cos2bw));
-		qidx = bp->tc_to_qidx[i];
 		cos2bw.queue_id = bp->q_info[qidx].queue_id;
 		if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_STRICT) {
 			cos2bw.tsa =

From a2bf74f4e1b82395dad2b08d2a911d9151db71c1 Mon Sep 17 00:00:00 2001
From: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Date: Fri, 5 Oct 2018 00:26:02 -0400
Subject: [PATCH 12/26] bnxt_en: free hwrm resources, if driver probe fails.

When the driver probe fails, all the resources that were allocated prior
to the failure must be freed. However, hwrm dma response memory is not
getting freed.

This patch fixes the problem described above.

Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.")
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 2564a92dcb02..3718984a8185 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -3017,10 +3017,11 @@ static void bnxt_free_hwrm_resources(struct bnxt *bp)
 {
 	struct pci_dev *pdev = bp->pdev;
 
-	dma_free_coherent(&pdev->dev, PAGE_SIZE, bp->hwrm_cmd_resp_addr,
-			  bp->hwrm_cmd_resp_dma_addr);
-
-	bp->hwrm_cmd_resp_addr = NULL;
+	if (bp->hwrm_cmd_resp_addr) {
+		dma_free_coherent(&pdev->dev, PAGE_SIZE, bp->hwrm_cmd_resp_addr,
+				  bp->hwrm_cmd_resp_dma_addr);
+		bp->hwrm_cmd_resp_addr = NULL;
+	}
 }
 
 static int bnxt_alloc_hwrm_resources(struct bnxt *bp)
@@ -9057,6 +9058,7 @@ init_err_cleanup_tc:
 	bnxt_clear_int_mode(bp);
 
 init_err_pci_clean:
+	bnxt_free_hwrm_resources(bp);
 	bnxt_cleanup_pci(bp);
 
 init_err_free:

From c78fe058879bdea919d44f23e21da26f603e9166 Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Date: Fri, 5 Oct 2018 00:26:03 -0400
Subject: [PATCH 13/26] bnxt_en: get the reduced max_irqs by the ones used by
 RDMA

When getting the max rings supported, get the reduced max_irqs
by the ones used by RDMA.

If the number MSIX is the limiting factor, this bug may cause the
max ring count to be higher than it should be when RDMA driver is
loaded and may result in ring allocation failures.

Fixes: 30f529473ec9 ("bnxt_en: Do not modify max IRQ count after RDMA driver requests/frees IRQs.")
Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 3718984a8185..e2d92548226a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -8622,7 +8622,7 @@ static void _bnxt_get_max_rings(struct bnxt *bp, int *max_rx, int *max_tx,
 	*max_tx = hw_resc->max_tx_rings;
 	*max_rx = hw_resc->max_rx_rings;
 	*max_cp = min_t(int, bnxt_get_max_func_cp_rings_for_en(bp),
-			hw_resc->max_irqs);
+			hw_resc->max_irqs - bnxt_get_ulp_msix_num(bp));
 	*max_cp = min_t(int, *max_cp, hw_resc->max_stat_ctxs);
 	max_ring_grps = hw_resc->max_hw_ring_grps;
 	if (BNXT_CHIP_TYPE_NITRO_A0(bp) && BNXT_PF(bp)) {

From 17c357efe5eceebdc3971a48b3d4d61a03c1178b Mon Sep 17 00:00:00 2001
From: Flavio Leitner <fbl@redhat.com>
Date: Fri, 28 Sep 2018 14:51:28 -0300
Subject: [PATCH 14/26] openvswitch: load NAT helper

Load the respective NAT helper module if the flow uses it.

Signed-off-by: Flavio Leitner <fbl@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/conntrack.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 0aeb34c6389d..35ae64cbef33 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1312,6 +1312,10 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
 
 	rcu_assign_pointer(help->helper, helper);
 	info->helper = helper;
+
+	if (info->nat)
+		request_module("ip_nat_%s", name);
+
 	return 0;
 }
 

From ca8931948344c485569b04821d1f6bcebccd376b Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 4 Oct 2018 20:24:13 -0700
Subject: [PATCH 15/26] net: dsa: b53: Keep CPU port as tagged in all VLANs

Commit c499696e7901 ("net: dsa: b53: Stop using dev->cpu_port
incorrectly") was a bit too trigger happy in removing the CPU port from
the VLAN membership because we rely on DSA to program the CPU port VLAN,
which it does, except it does not bother itself with tagged/untagged and
just usese untagged.

Having the CPU port "follow" the user ports tagged/untagged is not great
and does not allow for properly differentiating, so keep the CPU port
tagged in all VLANs.

Reported-by: Gerhard Wiesinger <lists@wiesinger.com>
Fixes: c499696e7901 ("net: dsa: b53: Stop using dev->cpu_port incorrectly")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/b53/b53_common.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index d93c790bfbe8..ad534b90ef21 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1107,7 +1107,7 @@ void b53_vlan_add(struct dsa_switch *ds, int port,
 		b53_get_vlan_entry(dev, vid, vl);
 
 		vl->members |= BIT(port);
-		if (untagged)
+		if (untagged && !dsa_is_cpu_port(ds, port))
 			vl->untag |= BIT(port);
 		else
 			vl->untag &= ~BIT(port);
@@ -1149,7 +1149,7 @@ int b53_vlan_del(struct dsa_switch *ds, int port,
 				pvid = 0;
 		}
 
-		if (untagged)
+		if (untagged && !dsa_is_cpu_port(ds, port))
 			vl->untag &= ~(BIT(port));
 
 		b53_set_vlan_entry(dev, vid, vl);

From 9d2f67e43b73e8af7438be219b66a5de0cfa8bd9 Mon Sep 17 00:00:00 2001
From: Jianfeng Tan <jianfeng.tan@linux.alibaba.com>
Date: Sat, 29 Sep 2018 15:41:27 +0000
Subject: [PATCH 16/26] net/packet: fix packet drop as of virtio gso

When we use raw socket as the vhost backend, a packet from virito with
gso offloading information, cannot be sent out in later validaton at
xmit path, as we did not set correct skb->protocol which is further used
for looking up the gso function.

To fix this, we set this field according to virito hdr information.

Fixes: e858fae2b0b8f4 ("virtio_net: use common code for virtio_net_hdr and skb GSO conversion")
Signed-off-by: Jianfeng Tan <jianfeng.tan@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/virtio_net.h | 18 ++++++++++++++++++
 net/packet/af_packet.c     | 11 +++++++----
 2 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 9397628a1967..cb462f9ab7dd 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -5,6 +5,24 @@
 #include <linux/if_vlan.h>
 #include <uapi/linux/virtio_net.h>
 
+static inline int virtio_net_hdr_set_proto(struct sk_buff *skb,
+					   const struct virtio_net_hdr *hdr)
+{
+	switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
+	case VIRTIO_NET_HDR_GSO_TCPV4:
+	case VIRTIO_NET_HDR_GSO_UDP:
+		skb->protocol = cpu_to_be16(ETH_P_IP);
+		break;
+	case VIRTIO_NET_HDR_GSO_TCPV6:
+		skb->protocol = cpu_to_be16(ETH_P_IPV6);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
 					const struct virtio_net_hdr *hdr,
 					bool little_endian)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 75c92a87e7b2..d6e94dc7e290 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2715,10 +2715,12 @@ tpacket_error:
 			}
 		}
 
-		if (po->has_vnet_hdr && virtio_net_hdr_to_skb(skb, vnet_hdr,
-							      vio_le())) {
-			tp_len = -EINVAL;
-			goto tpacket_error;
+		if (po->has_vnet_hdr) {
+			if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) {
+				tp_len = -EINVAL;
+				goto tpacket_error;
+			}
+			virtio_net_hdr_set_proto(skb, vnet_hdr);
 		}
 
 		skb->destructor = tpacket_destruct_skb;
@@ -2915,6 +2917,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 		if (err)
 			goto out_free;
 		len += sizeof(vnet_hdr);
+		virtio_net_hdr_set_proto(skb, &vnet_hdr);
 	}
 
 	skb_probe_transport_header(skb, reserve);

From 2d52527e80c2dc0c5f43f50adf183781262ec565 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Wed, 3 Oct 2018 15:20:58 +0200
Subject: [PATCH 17/26] be2net: don't flip hw_features when VXLANs are
 added/deleted

the be2net implementation of .ndo_tunnel_{add,del}() changes the value of
NETIF_F_GSO_UDP_TUNNEL bit in 'features' and 'hw_features', but it forgets
to call netdev_features_change(). Moreover, ethtool setting for that bit
can potentially be reverted after a tunnel is added or removed.

GSO already does software segmentation when 'hw_enc_features' is 0, even
if VXLAN offload is turned on. In addition, commit 096de2f83ebc ("benet:
stricter vxlan offloading check in be_features_check") avoids hardware
segmentation of non-VXLAN tunneled packets, or VXLAN packets having wrong
destination port. So, it's safe to avoid flipping the above feature on
addition/deletion of VXLAN tunnels.

Fixes: 630f4b70567f ("be2net: Export tunnel offloads only when a VxLAN tunnel is created")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/emulex/benet/be_main.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 74d122616e76..534787291b44 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -4002,8 +4002,6 @@ static int be_enable_vxlan_offloads(struct be_adapter *adapter)
 	netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 				   NETIF_F_TSO | NETIF_F_TSO6 |
 				   NETIF_F_GSO_UDP_TUNNEL;
-	netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
-	netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
 
 	dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
 		 be16_to_cpu(port));
@@ -4025,8 +4023,6 @@ static void be_disable_vxlan_offloads(struct be_adapter *adapter)
 	adapter->vxlan_port = 0;
 
 	netdev->hw_enc_features = 0;
-	netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
-	netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
 }
 
 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
@@ -5320,6 +5316,7 @@ static void be_netdev_init(struct net_device *netdev)
 	struct be_adapter *adapter = netdev_priv(netdev);
 
 	netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
+		NETIF_F_GSO_UDP_TUNNEL |
 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
 		NETIF_F_HW_VLAN_CTAG_TX;
 	if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))

From 7e4183752735deb7543e179a44f4f4b44917cd6f Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Wed, 3 Oct 2018 19:04:49 +0300
Subject: [PATCH 18/26] net: phy: phylink: fix SFP interface autodetection

When connecting SFP PHY to phylink use the detected interface.
Otherwise, the link fails to come up when the configured 'phy-mode'
differs from the SFP detected mode.

Move most of phylink_connect_phy() into __phylink_connect_phy(), and
leave phylink_connect_phy() as a wrapper. phylink_sfp_connect_phy() can
now pass the SFP detected PHY interface to __phylink_connect_phy().

This fixes 1GB SFP module link up on eth3 of the Macchiatobin board that
is configured in the DT to "2500base-x" phy-mode.

Fixes: 9525ae83959b6 ("phylink: add phylink infrastructure")
Suggested-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phylink.c | 48 +++++++++++++++++++++++----------------
 1 file changed, 28 insertions(+), 20 deletions(-)

diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 3ba5cf2a8a5f..7abca86c3aa9 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -717,6 +717,30 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy)
 	return 0;
 }
 
+static int __phylink_connect_phy(struct phylink *pl, struct phy_device *phy,
+		phy_interface_t interface)
+{
+	int ret;
+
+	if (WARN_ON(pl->link_an_mode == MLO_AN_FIXED ||
+		    (pl->link_an_mode == MLO_AN_INBAND &&
+		     phy_interface_mode_is_8023z(interface))))
+		return -EINVAL;
+
+	if (pl->phydev)
+		return -EBUSY;
+
+	ret = phy_attach_direct(pl->netdev, phy, 0, interface);
+	if (ret)
+		return ret;
+
+	ret = phylink_bringup_phy(pl, phy);
+	if (ret)
+		phy_detach(phy);
+
+	return ret;
+}
+
 /**
  * phylink_connect_phy() - connect a PHY to the phylink instance
  * @pl: a pointer to a &struct phylink returned from phylink_create()
@@ -734,31 +758,13 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy)
  */
 int phylink_connect_phy(struct phylink *pl, struct phy_device *phy)
 {
-	int ret;
-
-	if (WARN_ON(pl->link_an_mode == MLO_AN_FIXED ||
-		    (pl->link_an_mode == MLO_AN_INBAND &&
-		     phy_interface_mode_is_8023z(pl->link_interface))))
-		return -EINVAL;
-
-	if (pl->phydev)
-		return -EBUSY;
-
 	/* Use PHY device/driver interface */
 	if (pl->link_interface == PHY_INTERFACE_MODE_NA) {
 		pl->link_interface = phy->interface;
 		pl->link_config.interface = pl->link_interface;
 	}
 
-	ret = phy_attach_direct(pl->netdev, phy, 0, pl->link_interface);
-	if (ret)
-		return ret;
-
-	ret = phylink_bringup_phy(pl, phy);
-	if (ret)
-		phy_detach(phy);
-
-	return ret;
+	return __phylink_connect_phy(pl, phy, pl->link_interface);
 }
 EXPORT_SYMBOL_GPL(phylink_connect_phy);
 
@@ -1672,7 +1678,9 @@ static void phylink_sfp_link_up(void *upstream)
 
 static int phylink_sfp_connect_phy(void *upstream, struct phy_device *phy)
 {
-	return phylink_connect_phy(upstream, phy);
+	struct phylink *pl = upstream;
+
+	return __phylink_connect_phy(upstream, phy, pl->link_config.interface);
 }
 
 static void phylink_sfp_disconnect_phy(void *upstream)

From b799207e1e1816b09e7a5920fbb2d5fcf6edd681 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Fri, 5 Oct 2018 18:17:59 +0200
Subject: [PATCH 19/26] bpf: 32-bit RSH verification must truncate input before
 the ALU op

When I wrote commit 468f6eafa6c4 ("bpf: fix 32-bit ALU op verification"), I
assumed that, in order to emulate 64-bit arithmetic with 32-bit logic, it
is sufficient to just truncate the output to 32 bits; and so I just moved
the register size coercion that used to be at the start of the function to
the end of the function.

That assumption is true for almost every op, but not for 32-bit right
shifts, because those can propagate information towards the least
significant bit. Fix it by always truncating inputs for 32-bit ops to 32
bits.

Also get rid of the coerce_reg_to_size() after the ALU op, since that has
no effect.

Fixes: 468f6eafa6c4 ("bpf: fix 32-bit ALU op verification")
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/verifier.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index bb07e74b34a2..465952a8e465 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2896,6 +2896,15 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
 	u64 umin_val, umax_val;
 	u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
 
+	if (insn_bitness == 32) {
+		/* Relevant for 32-bit RSH: Information can propagate towards
+		 * LSB, so it isn't sufficient to only truncate the output to
+		 * 32 bits.
+		 */
+		coerce_reg_to_size(dst_reg, 4);
+		coerce_reg_to_size(&src_reg, 4);
+	}
+
 	smin_val = src_reg.smin_value;
 	smax_val = src_reg.smax_value;
 	umin_val = src_reg.umin_value;
@@ -3131,7 +3140,6 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
 	if (BPF_CLASS(insn->code) != BPF_ALU64) {
 		/* 32-bit ALU ops are (32,32)->32 */
 		coerce_reg_to_size(dst_reg, 4);
-		coerce_reg_to_size(&src_reg, 4);
 	}
 
 	__reg_deduce_bounds(dst_reg);

From 2c05d88818ab6571816b93edce4d53703870d7ae Mon Sep 17 00:00:00 2001
From: Wenwen Wang <wang6495@umn.edu>
Date: Fri, 5 Oct 2018 08:48:27 -0500
Subject: [PATCH 20/26] net: cxgb3_main: fix a missing-check bug

In cxgb_extension_ioctl(), the command of the ioctl is firstly copied from
the user-space buffer 'useraddr' to 'cmd' and checked through the
switch statement. If the command is not as expected, an error code
EOPNOTSUPP is returned. In the following execution, i.e., the cases of the
switch statement, the whole buffer of 'useraddr' is copied again to a
specific data structure, according to what kind of command is requested.
However, after the second copy, there is no re-check on the newly-copied
command. Given that the buffer 'useraddr' is in the user space, a malicious
user can race to change the command between the two copies. By doing so,
the attacker can supply malicious data to the kernel and cause undefined
behavior.

This patch adds a re-check in each case of the switch statement if there is
a second copy in that case, to re-check whether the command obtained in the
second copy is the same as the one in the first copy. If not, an error code
EINVAL is returned.

Signed-off-by: Wenwen Wang <wang6495@umn.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
index a19172dbe6be..c34ea385fe4a 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
@@ -2159,6 +2159,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
 			return -EPERM;
 		if (copy_from_user(&t, useraddr, sizeof(t)))
 			return -EFAULT;
+		if (t.cmd != CHELSIO_SET_QSET_PARAMS)
+			return -EINVAL;
 		if (t.qset_idx >= SGE_QSETS)
 			return -EINVAL;
 		if (!in_range(t.intr_lat, 0, M_NEWTIMER) ||
@@ -2258,6 +2260,9 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
 		if (copy_from_user(&t, useraddr, sizeof(t)))
 			return -EFAULT;
 
+		if (t.cmd != CHELSIO_GET_QSET_PARAMS)
+			return -EINVAL;
+
 		/* Display qsets for all ports when offload enabled */
 		if (test_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map)) {
 			q1 = 0;
@@ -2303,6 +2308,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
 			return -EBUSY;
 		if (copy_from_user(&edata, useraddr, sizeof(edata)))
 			return -EFAULT;
+		if (edata.cmd != CHELSIO_SET_QSET_NUM)
+			return -EINVAL;
 		if (edata.val < 1 ||
 			(edata.val > 1 && !(adapter->flags & USING_MSIX)))
 			return -EINVAL;
@@ -2343,6 +2350,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
 			return -EPERM;
 		if (copy_from_user(&t, useraddr, sizeof(t)))
 			return -EFAULT;
+		if (t.cmd != CHELSIO_LOAD_FW)
+			return -EINVAL;
 		/* Check t.len sanity ? */
 		fw_data = memdup_user(useraddr + sizeof(t), t.len);
 		if (IS_ERR(fw_data))
@@ -2366,6 +2375,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
 			return -EBUSY;
 		if (copy_from_user(&m, useraddr, sizeof(m)))
 			return -EFAULT;
+		if (m.cmd != CHELSIO_SETMTUTAB)
+			return -EINVAL;
 		if (m.nmtus != NMTUS)
 			return -EINVAL;
 		if (m.mtus[0] < 81)	/* accommodate SACK */
@@ -2407,6 +2418,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
 			return -EBUSY;
 		if (copy_from_user(&m, useraddr, sizeof(m)))
 			return -EFAULT;
+		if (m.cmd != CHELSIO_SET_PM)
+			return -EINVAL;
 		if (!is_power_of_2(m.rx_pg_sz) ||
 			!is_power_of_2(m.tx_pg_sz))
 			return -EINVAL;	/* not power of 2 */
@@ -2440,6 +2453,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
 			return -EIO;	/* need the memory controllers */
 		if (copy_from_user(&t, useraddr, sizeof(t)))
 			return -EFAULT;
+		if (t.cmd != CHELSIO_GET_MEM)
+			return -EINVAL;
 		if ((t.addr & 7) || (t.len & 7))
 			return -EINVAL;
 		if (t.mem_id == MEM_CM)
@@ -2492,6 +2507,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
 			return -EAGAIN;
 		if (copy_from_user(&t, useraddr, sizeof(t)))
 			return -EFAULT;
+		if (t.cmd != CHELSIO_SET_TRACE_FILTER)
+			return -EINVAL;
 
 		tp = (const struct trace_params *)&t.sip;
 		if (t.config_tx)

From 33aa8da1f8a7dc050b9d68f1db761ab787621065 Mon Sep 17 00:00:00 2001
From: Shanthosh RK <shanthosh.rk@gmail.com>
Date: Fri, 5 Oct 2018 20:57:48 +0530
Subject: [PATCH 21/26] net: bpfilter: Fix type cast and pointer warnings

Fixes the following Sparse warnings:

net/bpfilter/bpfilter_kern.c:62:21: warning: cast removes address space
of expression
net/bpfilter/bpfilter_kern.c:101:49: warning: Using plain integer as
NULL pointer

Signed-off-by: Shanthosh RK <shanthosh.rk@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bpfilter/bpfilter_kern.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c
index f0fc182d3db7..b64e1649993b 100644
--- a/net/bpfilter/bpfilter_kern.c
+++ b/net/bpfilter/bpfilter_kern.c
@@ -59,7 +59,7 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname,
 	req.is_set = is_set;
 	req.pid = current->pid;
 	req.cmd = optname;
-	req.addr = (long)optval;
+	req.addr = (long __force __user)optval;
 	req.len = optlen;
 	mutex_lock(&bpfilter_lock);
 	if (!info.pid)
@@ -98,7 +98,7 @@ static int __init load_umh(void)
 	pr_info("Loaded bpfilter_umh pid %d\n", info.pid);
 
 	/* health check that usermode process started correctly */
-	if (__bpfilter_process_sockopt(NULL, 0, 0, 0, 0) != 0) {
+	if (__bpfilter_process_sockopt(NULL, 0, NULL, 0, 0) != 0) {
 		stop_umh();
 		return -EFAULT;
 	}

From 0781168e23a2fc8dceb989f11fc5b39b3ccacc35 Mon Sep 17 00:00:00 2001
From: Wenwen Wang <wang6495@umn.edu>
Date: Fri, 5 Oct 2018 10:59:36 -0500
Subject: [PATCH 22/26] yam: fix a missing-check bug

In yam_ioctl(), the concrete ioctl command is firstly copied from the
user-space buffer 'ifr->ifr_data' to 'ioctl_cmd' and checked through the
following switch statement. If the command is not as expected, an error
code EINVAL is returned. In the following execution the buffer
'ifr->ifr_data' is copied again in the cases of the switch statement to
specific data structures according to what kind of ioctl command is
requested. However, after the second copy, no re-check is enforced on the
newly-copied command. Given that the buffer 'ifr->ifr_data' is in the user
space, a malicious user can race to change the command between the two
copies. This way, the attacker can inject inconsistent data and cause
undefined behavior.

This patch adds a re-check in each case of the switch statement if there is
a second copy in that case, to re-check whether the command obtained in the
second copy is the same as the one in the first copy. If not, an error code
EINVAL will be returned.

Signed-off-by: Wenwen Wang <wang6495@umn.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hamradio/yam.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c
index 16ec7af6ab7b..ba9df430fca6 100644
--- a/drivers/net/hamradio/yam.c
+++ b/drivers/net/hamradio/yam.c
@@ -966,6 +966,8 @@ static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 				 sizeof(struct yamdrv_ioctl_mcs));
 		if (IS_ERR(ym))
 			return PTR_ERR(ym);
+		if (ym->cmd != SIOCYAMSMCS)
+			return -EINVAL;
 		if (ym->bitrate > YAM_MAXBITRATE) {
 			kfree(ym);
 			return -EINVAL;
@@ -981,6 +983,8 @@ static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		if (copy_from_user(&yi, ifr->ifr_data, sizeof(struct yamdrv_ioctl_cfg)))
 			 return -EFAULT;
 
+		if (yi.cmd != SIOCYAMSCFG)
+			return -EINVAL;
 		if ((yi.cfg.mask & YAM_IOBASE) && netif_running(dev))
 			return -EINVAL;		/* Cannot change this parameter when up */
 		if ((yi.cfg.mask & YAM_IRQ) && netif_running(dev))

From bd961c9bc66497f0c63f4ba1d02900bb85078366 Mon Sep 17 00:00:00 2001
From: Mauricio Faria de Oliveira <mfo@canonical.com>
Date: Mon, 1 Oct 2018 22:46:40 -0300
Subject: [PATCH 23/26] rtnetlink: fix rtnl_fdb_dump() for ndmsg header

Currently, rtnl_fdb_dump() assumes the family header is 'struct ifinfomsg',
which is not always true -- 'struct ndmsg' is used by iproute2 ('ip neigh').

The problem is, the function bails out early if nlmsg_parse() fails, which
does occur for iproute2 usage of 'struct ndmsg' because the payload length
is shorter than the family header alone (as 'struct ifinfomsg' is assumed).

This breaks backward compatibility with userspace -- nothing is sent back.

Some examples with iproute2 and netlink library for go [1]:

 1) $ bridge fdb show
    33:33:00:00:00:01 dev ens3 self permanent
    01:00:5e:00:00:01 dev ens3 self permanent
    33:33:ff:15:98:30 dev ens3 self permanent

      This one works, as it uses 'struct ifinfomsg'.

      fdb_show() @ iproute2/bridge/fdb.c
        """
        .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
        ...
        if (rtnl_dump_request(&rth, RTM_GETNEIGH, [...]
        """

 2) $ ip --family bridge neigh
    RTNETLINK answers: Invalid argument
    Dump terminated

      This one fails, as it uses 'struct ndmsg'.

      do_show_or_flush() @ iproute2/ip/ipneigh.c
        """
        .n.nlmsg_type = RTM_GETNEIGH,
        .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)),
        """

 3) $ ./neighlist
    < no output >

      This one fails, as it uses 'struct ndmsg'-based.

      neighList() @ netlink/neigh_linux.go
        """
        req := h.newNetlinkRequest(unix.RTM_GETNEIGH, [...]
        msg := Ndmsg{
        """

The actual breakage was introduced by commit 0ff50e83b512 ("net: rtnetlink:
bail out from rtnl_fdb_dump() on parse error"), because nlmsg_parse() fails
if the payload length (with the _actual_ family header) is less than the
family header length alone (which is assumed, in parameter 'hdrlen').
This is true in the examples above with struct ndmsg, with size and payload
length shorter than struct ifinfomsg.

However, that commit just intends to fix something under the assumption the
family header is indeed an 'struct ifinfomsg' - by preventing access to the
payload as such (via 'ifm' pointer) if the payload length is not sufficient
to actually contain it.

The assumption was introduced by commit 5e6d24358799 ("bridge: netlink dump
interface at par with brctl"), to support iproute2's 'bridge fdb' command
(not 'ip neigh') which indeed uses 'struct ifinfomsg', thus is not broken.

So, in order to unbreak the 'struct ndmsg' family headers and still allow
'struct ifinfomsg' to continue to work, check for the known message sizes
used with 'struct ndmsg' in iproute2 (with zero or one attribute which is
not used in this function anyway) then do not parse the data as ifinfomsg.

Same examples with this patch applied (or revert/before the original fix):

    $ bridge fdb show
    33:33:00:00:00:01 dev ens3 self permanent
    01:00:5e:00:00:01 dev ens3 self permanent
    33:33:ff:15:98:30 dev ens3 self permanent

    $ ip --family bridge neigh
    dev ens3 lladdr 33:33:00:00:00:01 PERMANENT
    dev ens3 lladdr 01:00:5e:00:00:01 PERMANENT
    dev ens3 lladdr 33:33:ff:15:98:30 PERMANENT

    $ ./neighlist
    netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x33, 0x33, 0x0, 0x0, 0x0, 0x1}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0}
    netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x1, 0x0, 0x5e, 0x0, 0x0, 0x1}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0}
    netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x33, 0x33, 0xff, 0x15, 0x98, 0x30}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0}

Tested on mainline (v4.19-rc6) and net-next (3bd09b05b068).

References:

[1] netlink library for go (test-case)
    https://github.com/vishvananda/netlink

    $ cat ~/go/src/neighlist/main.go
    package main
    import ("fmt"; "syscall"; "github.com/vishvananda/netlink")
    func main() {
        neighs, _ := netlink.NeighList(0, syscall.AF_BRIDGE)
        for _, neigh := range neighs { fmt.Printf("%#v\n", neigh) }
    }

    $ export GOPATH=~/go
    $ go get github.com/vishvananda/netlink
    $ go build neighlist
    $ ~/go/src/neighlist/neighlist

Thanks to David Ahern for suggestions to improve this patch.

Fixes: 0ff50e83b512 ("net: rtnetlink: bail out from rtnl_fdb_dump() on parse error")
Fixes: 5e6d24358799 ("bridge: netlink dump interface at par with brctl")
Reported-by: Aidan Obley <aobley@pivotal.io>
Signed-off-by: Mauricio Faria de Oliveira <mfo@canonical.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 448703312fed..37c7936124e6 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3748,16 +3748,27 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	int err = 0;
 	int fidx = 0;
 
-	err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb,
-			  IFLA_MAX, ifla_policy, NULL);
-	if (err < 0) {
-		return -EINVAL;
-	} else if (err == 0) {
-		if (tb[IFLA_MASTER])
-			br_idx = nla_get_u32(tb[IFLA_MASTER]);
-	}
+	/* A hack to preserve kernel<->userspace interface.
+	 * Before Linux v4.12 this code accepted ndmsg since iproute2 v3.3.0.
+	 * However, ndmsg is shorter than ifinfomsg thus nlmsg_parse() bails.
+	 * So, check for ndmsg with an optional u32 attribute (not used here).
+	 * Fortunately these sizes don't conflict with the size of ifinfomsg
+	 * with an optional attribute.
+	 */
+	if (nlmsg_len(cb->nlh) != sizeof(struct ndmsg) &&
+	    (nlmsg_len(cb->nlh) != sizeof(struct ndmsg) +
+	     nla_attr_size(sizeof(u32)))) {
+		err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb,
+				  IFLA_MAX, ifla_policy, NULL);
+		if (err < 0) {
+			return -EINVAL;
+		} else if (err == 0) {
+			if (tb[IFLA_MASTER])
+				br_idx = nla_get_u32(tb[IFLA_MASTER]);
+		}
 
-	brport_idx = ifm->ifi_index;
+		brport_idx = ifm->ifi_index;
+	}
 
 	if (br_idx) {
 		br_dev = __dev_get_by_index(net, br_idx);

From 8b4c3cdd9dd8290343ce959a132d3b334062c5b9 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Wed, 3 Oct 2018 15:05:36 -0700
Subject: [PATCH 24/26] net: sched: Add policy validation for tc attributes

A number of TC attributes are processed without proper validation
(e.g., length checks). Add a tca policy for all input attributes and use
when invoking nlmsg_parse.

The 2 Fixes tags below cover the latest additions. The other attributes
are a string (KIND), nested attribute (OPTIONS which does seem to have
validation in most cases), for dumps only or a flag.

Fixes: 5bc1701881e39 ("net: sched: introduce multichain support for filters")
Fixes: d47a6b0e7c492 ("net: sched: introduce ingress/egress block index attributes for qdisc")
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 98541c6399db..85e73f48e48f 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1311,6 +1311,18 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
  * Delete/get qdisc.
  */
 
+const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
+	[TCA_KIND]		= { .type = NLA_STRING },
+	[TCA_OPTIONS]		= { .type = NLA_NESTED },
+	[TCA_RATE]		= { .type = NLA_BINARY,
+				    .len = sizeof(struct tc_estimator) },
+	[TCA_STAB]		= { .type = NLA_NESTED },
+	[TCA_DUMP_INVISIBLE]	= { .type = NLA_FLAG },
+	[TCA_CHAIN]		= { .type = NLA_U32 },
+	[TCA_INGRESS_BLOCK]	= { .type = NLA_U32 },
+	[TCA_EGRESS_BLOCK]	= { .type = NLA_U32 },
+};
+
 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
 			struct netlink_ext_ack *extack)
 {
@@ -1327,7 +1339,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
-	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
+	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
+			  extack);
 	if (err < 0)
 		return err;
 
@@ -1411,7 +1424,8 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
 
 replay:
 	/* Reinit, just in case something touches this. */
-	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
+	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
+			  extack);
 	if (err < 0)
 		return err;
 
@@ -1645,7 +1659,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 	idx = 0;
 	ASSERT_RTNL();
 
-	err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, NULL, NULL);
+	err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
+			  rtm_tca_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -1864,7 +1879,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
-	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
+	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
+			  extack);
 	if (err < 0)
 		return err;
 

From a688caa34beb2fd2a92f1b6d33e40cde433ba160 Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Thu, 4 Oct 2018 10:12:37 -0700
Subject: [PATCH 25/26] ipv6: take rcu lock in rawv6_send_hdrinc()

In rawv6_send_hdrinc(), in order to avoid an extra dst_hold(), we
directly assign the dst to skb and set passed in dst to NULL to avoid
double free.
However, in error case, we free skb and then do stats update with the
dst pointer passed in. This causes use-after-free on the dst.
Fix it by taking rcu read lock right before dst could get released to
make sure dst does not get freed until the stats update is done.
Note: we don't have this issue in ipv4 cause dst is not used for stats
update in v4.

Syzkaller reported following crash:
BUG: KASAN: use-after-free in rawv6_send_hdrinc net/ipv6/raw.c:692 [inline]
BUG: KASAN: use-after-free in rawv6_sendmsg+0x4421/0x4630 net/ipv6/raw.c:921
Read of size 8 at addr ffff8801d95ba730 by task syz-executor0/32088

CPU: 1 PID: 32088 Comm: syz-executor0 Not tainted 4.19.0-rc2+ #93
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1c4/0x2b4 lib/dump_stack.c:113
 print_address_description.cold.8+0x9/0x1ff mm/kasan/report.c:256
 kasan_report_error mm/kasan/report.c:354 [inline]
 kasan_report.cold.9+0x242/0x309 mm/kasan/report.c:412
 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433
 rawv6_send_hdrinc net/ipv6/raw.c:692 [inline]
 rawv6_sendmsg+0x4421/0x4630 net/ipv6/raw.c:921
 inet_sendmsg+0x1a1/0x690 net/ipv4/af_inet.c:798
 sock_sendmsg_nosec net/socket.c:621 [inline]
 sock_sendmsg+0xd5/0x120 net/socket.c:631
 ___sys_sendmsg+0x7fd/0x930 net/socket.c:2114
 __sys_sendmsg+0x11d/0x280 net/socket.c:2152
 __do_sys_sendmsg net/socket.c:2161 [inline]
 __se_sys_sendmsg net/socket.c:2159 [inline]
 __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2159
 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x457099
Code: fd b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007f83756edc78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00007f83756ee6d4 RCX: 0000000000457099
RDX: 0000000000000000 RSI: 0000000020003840 RDI: 0000000000000004
RBP: 00000000009300a0 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff
R13: 00000000004d4b30 R14: 00000000004c90b1 R15: 0000000000000000

Allocated by task 32088:
 save_stack+0x43/0xd0 mm/kasan/kasan.c:448
 set_track mm/kasan/kasan.c:460 [inline]
 kasan_kmalloc+0xc7/0xe0 mm/kasan/kasan.c:553
 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:490
 kmem_cache_alloc+0x12e/0x730 mm/slab.c:3554
 dst_alloc+0xbb/0x1d0 net/core/dst.c:105
 ip6_dst_alloc+0x35/0xa0 net/ipv6/route.c:353
 ip6_rt_cache_alloc+0x247/0x7b0 net/ipv6/route.c:1186
 ip6_pol_route+0x8f8/0xd90 net/ipv6/route.c:1895
 ip6_pol_route_output+0x54/0x70 net/ipv6/route.c:2093
 fib6_rule_lookup+0x277/0x860 net/ipv6/fib6_rules.c:122
 ip6_route_output_flags+0x2c5/0x350 net/ipv6/route.c:2121
 ip6_route_output include/net/ip6_route.h:88 [inline]
 ip6_dst_lookup_tail+0xe27/0x1d60 net/ipv6/ip6_output.c:951
 ip6_dst_lookup_flow+0xc8/0x270 net/ipv6/ip6_output.c:1079
 rawv6_sendmsg+0x12d9/0x4630 net/ipv6/raw.c:905
 inet_sendmsg+0x1a1/0x690 net/ipv4/af_inet.c:798
 sock_sendmsg_nosec net/socket.c:621 [inline]
 sock_sendmsg+0xd5/0x120 net/socket.c:631
 ___sys_sendmsg+0x7fd/0x930 net/socket.c:2114
 __sys_sendmsg+0x11d/0x280 net/socket.c:2152
 __do_sys_sendmsg net/socket.c:2161 [inline]
 __se_sys_sendmsg net/socket.c:2159 [inline]
 __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2159
 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Freed by task 5356:
 save_stack+0x43/0xd0 mm/kasan/kasan.c:448
 set_track mm/kasan/kasan.c:460 [inline]
 __kasan_slab_free+0x102/0x150 mm/kasan/kasan.c:521
 kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528
 __cache_free mm/slab.c:3498 [inline]
 kmem_cache_free+0x83/0x290 mm/slab.c:3756
 dst_destroy+0x267/0x3c0 net/core/dst.c:141
 dst_destroy_rcu+0x16/0x19 net/core/dst.c:154
 __rcu_reclaim kernel/rcu/rcu.h:236 [inline]
 rcu_do_batch kernel/rcu/tree.c:2576 [inline]
 invoke_rcu_callbacks kernel/rcu/tree.c:2880 [inline]
 __rcu_process_callbacks kernel/rcu/tree.c:2847 [inline]
 rcu_process_callbacks+0xf23/0x2670 kernel/rcu/tree.c:2864
 __do_softirq+0x30b/0xad8 kernel/softirq.c:292

Fixes: 1789a640f556 ("raw: avoid two atomics in xmit")
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/raw.c | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 413d98bf24f4..5e0efd3954e9 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -651,8 +651,6 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
 	skb->tstamp = sockc->transmit_time;
-	skb_dst_set(skb, &rt->dst);
-	*dstp = NULL;
 
 	skb_put(skb, length);
 	skb_reset_network_header(skb);
@@ -665,8 +663,14 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
 
 	skb->transport_header = skb->network_header;
 	err = memcpy_from_msg(iph, msg, length);
-	if (err)
-		goto error_fault;
+	if (err) {
+		err = -EFAULT;
+		kfree_skb(skb);
+		goto error;
+	}
+
+	skb_dst_set(skb, &rt->dst);
+	*dstp = NULL;
 
 	/* if egress device is enslaved to an L3 master device pass the
 	 * skb to its handler for processing
@@ -675,21 +679,28 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
 	if (unlikely(!skb))
 		return 0;
 
+	/* Acquire rcu_read_lock() in case we need to use rt->rt6i_idev
+	 * in the error path. Since skb has been freed, the dst could
+	 * have been queued for deletion.
+	 */
+	rcu_read_lock();
 	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
 	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
 		      NULL, rt->dst.dev, dst_output);
 	if (err > 0)
 		err = net_xmit_errno(err);
-	if (err)
-		goto error;
+	if (err) {
+		IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
+		rcu_read_unlock();
+		goto error_check;
+	}
+	rcu_read_unlock();
 out:
 	return 0;
 
-error_fault:
-	err = -EFAULT;
-	kfree_skb(skb);
 error:
 	IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
+error_check:
 	if (err == -ENOBUFS && !np->recverr)
 		err = 0;
 	return err;

From 35f3625c21852ad839f20c91c7d81c4c1101e207 Mon Sep 17 00:00:00 2001
From: Maxime Chevallier <maxime.chevallier@bootlin.com>
Date: Fri, 5 Oct 2018 09:04:40 +0200
Subject: [PATCH 26/26] net: mvpp2: Extract the correct ethtype from the skb
 for tx csum offload

When offloading the L3 and L4 csum computation on TX, we need to extract
the l3_proto from the ethtype, independently of the presence of a vlan
tag.

The actual driver uses skb->protocol as-is, resulting in packets with
the wrong L4 checksum being sent when there's a vlan tag in the packet
header and checksum offloading is enabled.

This commit makes use of vlan_protocol_get() to get the correct ethtype
regardless the presence of a vlan tag.

Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit")
Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 38cc01beea79..a74002b43b51 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -1725,7 +1725,7 @@ static void mvpp2_txq_desc_put(struct mvpp2_tx_queue *txq)
 }
 
 /* Set Tx descriptors fields relevant for CSUM calculation */
-static u32 mvpp2_txq_desc_csum(int l3_offs, int l3_proto,
+static u32 mvpp2_txq_desc_csum(int l3_offs, __be16 l3_proto,
 			       int ip_hdr_len, int l4_proto)
 {
 	u32 command;
@@ -2600,14 +2600,15 @@ static u32 mvpp2_skb_tx_csum(struct mvpp2_port *port, struct sk_buff *skb)
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		int ip_hdr_len = 0;
 		u8 l4_proto;
+		__be16 l3_proto = vlan_get_protocol(skb);
 
-		if (skb->protocol == htons(ETH_P_IP)) {
+		if (l3_proto == htons(ETH_P_IP)) {
 			struct iphdr *ip4h = ip_hdr(skb);
 
 			/* Calculate IPv4 checksum and L4 checksum */
 			ip_hdr_len = ip4h->ihl;
 			l4_proto = ip4h->protocol;
-		} else if (skb->protocol == htons(ETH_P_IPV6)) {
+		} else if (l3_proto == htons(ETH_P_IPV6)) {
 			struct ipv6hdr *ip6h = ipv6_hdr(skb);
 
 			/* Read l4_protocol from one of IPv6 extra headers */
@@ -2619,7 +2620,7 @@ static u32 mvpp2_skb_tx_csum(struct mvpp2_port *port, struct sk_buff *skb)
 		}
 
 		return mvpp2_txq_desc_csum(skb_network_offset(skb),
-				skb->protocol, ip_hdr_len, l4_proto);
+					   l3_proto, ip_hdr_len, l4_proto);
 	}
 
 	return MVPP2_TXD_L4_CSUM_NOT | MVPP2_TXD_IP_CSUM_DISABLE;