From 3b9ea7206d7e1fdd7419cbd10badd3b2c80d04b4 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Sun, 14 Feb 2021 19:49:11 +0100
Subject: [PATCH 001/164] ath9k: fix transmitting to stations in dynamic SMPS
 mode

When transmitting to a receiver in dynamic SMPS mode, all transmissions that
use multiple spatial streams need to be sent using CTS-to-self or RTS/CTS to
give the receiver's extra chains some time to wake up.
This fixes the tx rate getting stuck at <= MCS7 for some clients, especially
Intel ones, which make aggressive use of SMPS.

Cc: stable@vger.kernel.org
Reported-by: Martin Kennedy <hurricos@gmail.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210214184911.96702-1-nbd@nbd.name
---
 drivers/net/wireless/ath/ath9k/ath9k.h | 3 ++-
 drivers/net/wireless/ath/ath9k/xmit.c  | 6 ++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath9k/ath9k.h b/drivers/net/wireless/ath/ath9k/ath9k.h
index 13b4f5f50f8a..ef6f5ea06c1f 100644
--- a/drivers/net/wireless/ath/ath9k/ath9k.h
+++ b/drivers/net/wireless/ath/ath9k/ath9k.h
@@ -177,7 +177,8 @@ struct ath_frame_info {
 	s8 txq;
 	u8 keyix;
 	u8 rtscts_rate;
-	u8 retries : 7;
+	u8 retries : 6;
+	u8 dyn_smps : 1;
 	u8 baw_tracked : 1;
 	u8 tx_power;
 	enum ath9k_key_type keytype:2;
diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
index e60d4737fc6e..5691bd6eb82c 100644
--- a/drivers/net/wireless/ath/ath9k/xmit.c
+++ b/drivers/net/wireless/ath/ath9k/xmit.c
@@ -1271,6 +1271,11 @@ static void ath_buf_set_rate(struct ath_softc *sc, struct ath_buf *bf,
 				 is_40, is_sgi, is_sp);
 			if (rix < 8 && (tx_info->flags & IEEE80211_TX_CTL_STBC))
 				info->rates[i].RateFlags |= ATH9K_RATESERIES_STBC;
+			if (rix >= 8 && fi->dyn_smps) {
+				info->rates[i].RateFlags |=
+					ATH9K_RATESERIES_RTS_CTS;
+				info->flags |= ATH9K_TXDESC_CTSENA;
+			}
 
 			info->txpower[i] = ath_get_rate_txpower(sc, bf, rix,
 								is_40, false);
@@ -2114,6 +2119,7 @@ static void setup_frame_info(struct ieee80211_hw *hw,
 		fi->keyix = an->ps_key;
 	else
 		fi->keyix = ATH9K_TXKEYIX_INVALID;
+	fi->dyn_smps = sta && sta->smps_mode == IEEE80211_SMPS_DYNAMIC;
 	fi->keytype = keytype;
 	fi->framelen = framelen;
 	fi->tx_power = txpower;

From ae064fc0e32a4d28389086d9f4b260a0c157cfee Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 16 Feb 2021 14:51:18 +0100
Subject: [PATCH 002/164] mt76: fix tx skb error handling in
 mt76_dma_tx_queue_skb

When running out of room in the tx queue after calling drv->tx_prepare_skb,
the buffer list will already have been modified on MT7615 and newer drivers.
This can leak a DMA mapping and will show up as swiotlb allocation failures
on x86.

Fix this by moving the queue length check further up. This is less accurate,
since it can overestimate the needed room in the queue on MT7615 and newer,
but the difference is small enough to not matter in practice.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210216135119.23809-1-nbd@nbd.name
---
 drivers/net/wireless/mediatek/mt76/dma.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index 19098b852d0a..abdc8d364361 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -345,7 +345,6 @@ mt76_dma_tx_queue_skb(struct mt76_dev *dev, struct mt76_queue *q,
 	};
 	struct ieee80211_hw *hw;
 	int len, n = 0, ret = -ENOMEM;
-	struct mt76_queue_entry e;
 	struct mt76_txwi_cache *t;
 	struct sk_buff *iter;
 	dma_addr_t addr;
@@ -387,6 +386,11 @@ mt76_dma_tx_queue_skb(struct mt76_dev *dev, struct mt76_queue *q,
 	}
 	tx_info.nbuf = n;
 
+	if (q->queued + (tx_info.nbuf + 1) / 2 >= q->ndesc - 1) {
+		ret = -ENOMEM;
+		goto unmap;
+	}
+
 	dma_sync_single_for_cpu(dev->dev, t->dma_addr, dev->drv->txwi_size,
 				DMA_TO_DEVICE);
 	ret = dev->drv->tx_prepare_skb(dev, txwi, q->qid, wcid, sta, &tx_info);
@@ -395,11 +399,6 @@ mt76_dma_tx_queue_skb(struct mt76_dev *dev, struct mt76_queue *q,
 	if (ret < 0)
 		goto unmap;
 
-	if (q->queued + (tx_info.nbuf + 1) / 2 >= q->ndesc - 1) {
-		ret = -ENOMEM;
-		goto unmap;
-	}
-
 	return mt76_dma_add_buf(dev, q, tx_info.buf, tx_info.nbuf,
 				tx_info.info, tx_info.skb, t);
 
@@ -419,9 +418,7 @@ free:
 	}
 #endif
 
-	e.skb = tx_info.skb;
-	e.txwi = t;
-	dev->drv->tx_complete_skb(dev, &e);
+	dev_kfree_skb(tx_info.skb);
 	mt76_put_txwi(dev, t);
 	return ret;
 }

From 94f0e6256c2ab6803c935634aa1f653174c94879 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 16 Feb 2021 14:51:19 +0100
Subject: [PATCH 003/164] mt76: mt7915: only modify tx buffer list after
 allocating tx token id

Modifying the tx buffer list too early can leak DMA mappings

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210216135119.23809-2-nbd@nbd.name
---
 drivers/net/wireless/mediatek/mt76/mt7915/mac.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index eb889f8d6fea..e5a258958ac9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -967,11 +967,6 @@ int mt7915_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
 	}
 	txp->nbuf = nbuf;
 
-	/* pass partial skb header to fw */
-	tx_info->buf[1].len = MT_CT_PARSE_LEN;
-	tx_info->buf[1].skip_unmap = true;
-	tx_info->nbuf = MT_CT_DMA_BUF_NUM;
-
 	txp->flags = cpu_to_le16(MT_CT_INFO_APPLY_TXD | MT_CT_INFO_FROM_HOST);
 
 	if (!key)
@@ -1009,6 +1004,11 @@ int mt7915_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
 		txp->rept_wds_wcid = cpu_to_le16(0x3ff);
 	tx_info->skb = DMA_DUMMY_DATA;
 
+	/* pass partial skb header to fw */
+	tx_info->buf[1].len = MT_CT_PARSE_LEN;
+	tx_info->buf[1].skip_unmap = true;
+	tx_info->nbuf = MT_CT_DMA_BUF_NUM;
+
 	return 0;
 }
 

From 4538c5ed0f7e892f1b643472e48146757d1e60c5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 19 Feb 2021 13:35:07 +0100
Subject: [PATCH 004/164] iwlwifi: avoid crash on unsupported debug collection

If the opmode doesn't support debug collection (DVM) then don't
crash, but just skip the callback.

Fixes: d01293154c0a ("iwlwifi: dbg: add op_mode callback for collecting debug data.")
Reported-by: Andy Lavr <andy.lavr@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210219133506.ecabe285bc7d.I73d230d555c595fa2d9bf284f80078729fe18aa4@changeid
---
 drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h b/drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h
index 868da7e79a45..e6d2e0994317 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h
@@ -205,6 +205,8 @@ static inline void iwl_op_mode_time_point(struct iwl_op_mode *op_mode,
 					  enum iwl_fw_ini_time_point tp_id,
 					  union iwl_dbg_tlv_tp_data *tp_data)
 {
+	if (!op_mode || !op_mode->ops || !op_mode->ops->time_point)
+		return;
 	op_mode->ops->time_point(op_mode, tp_id, tp_data);
 }
 

From b29dd96b905f3dd543f4ca729447286adf934dd6 Mon Sep 17 00:00:00 2001
From: Brendan Jackman <jackmanb@google.com>
Date: Tue, 16 Feb 2021 12:53:07 +0000
Subject: [PATCH 005/164] bpf, x86: Fix BPF_FETCH atomic and/or/xor with r0 as
 src

This code generates a CMPXCHG loop in order to implement atomic_fetch
bitwise operations. Because CMPXCHG is hard-coded to use rax (which
holds the BPF r0 value), it saves the _real_ r0 value into the
internal "ax" temporary register and restores it once the loop is
complete.

In the middle of the loop, the actual bitwise operation is performed
using src_reg. The bug occurs when src_reg is r0: as described above,
r0 has been clobbered and the real r0 value is in the ax register.

Therefore, perform this operation on the ax register instead, when
src_reg is r0.

Fixes: 981f94c3e921 ("bpf: Add bitwise atomic instructions")
Signed-off-by: Brendan Jackman <jackmanb@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: KP Singh <kpsingh@kernel.org>
Link: https://lore.kernel.org/bpf/20210216125307.1406237-1-jackmanb@google.com
---
 arch/x86/net/bpf_jit_comp.c                   | 10 +++++---
 .../selftests/bpf/verifier/atomic_and.c       | 23 +++++++++++++++++++
 2 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 79e7a0ec1da5..6926d0ca6c71 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1349,6 +1349,7 @@ st:			if (is_imm8(insn->off))
 			    insn->imm == (BPF_XOR | BPF_FETCH)) {
 				u8 *branch_target;
 				bool is64 = BPF_SIZE(insn->code) == BPF_DW;
+				u32 real_src_reg = src_reg;
 
 				/*
 				 * Can't be implemented with a single x86 insn.
@@ -1357,6 +1358,9 @@ st:			if (is_imm8(insn->off))
 
 				/* Will need RAX as a CMPXCHG operand so save R0 */
 				emit_mov_reg(&prog, true, BPF_REG_AX, BPF_REG_0);
+				if (src_reg == BPF_REG_0)
+					real_src_reg = BPF_REG_AX;
+
 				branch_target = prog;
 				/* Load old value */
 				emit_ldx(&prog, BPF_SIZE(insn->code),
@@ -1366,9 +1370,9 @@ st:			if (is_imm8(insn->off))
 				 * put the result in the AUX_REG.
 				 */
 				emit_mov_reg(&prog, is64, AUX_REG, BPF_REG_0);
-				maybe_emit_mod(&prog, AUX_REG, src_reg, is64);
+				maybe_emit_mod(&prog, AUX_REG, real_src_reg, is64);
 				EMIT2(simple_alu_opcodes[BPF_OP(insn->imm)],
-				      add_2reg(0xC0, AUX_REG, src_reg));
+				      add_2reg(0xC0, AUX_REG, real_src_reg));
 				/* Attempt to swap in new value */
 				err = emit_atomic(&prog, BPF_CMPXCHG,
 						  dst_reg, AUX_REG, insn->off,
@@ -1381,7 +1385,7 @@ st:			if (is_imm8(insn->off))
 				 */
 				EMIT2(X86_JNE, -(prog - branch_target) - 2);
 				/* Return the pre-modification value */
-				emit_mov_reg(&prog, is64, src_reg, BPF_REG_0);
+				emit_mov_reg(&prog, is64, real_src_reg, BPF_REG_0);
 				/* Restore R0 after clobbering RAX */
 				emit_mov_reg(&prog, true, BPF_REG_0, BPF_REG_AX);
 				break;
diff --git a/tools/testing/selftests/bpf/verifier/atomic_and.c b/tools/testing/selftests/bpf/verifier/atomic_and.c
index 1bdc8e6684f7..fe4bb70eb9c5 100644
--- a/tools/testing/selftests/bpf/verifier/atomic_and.c
+++ b/tools/testing/selftests/bpf/verifier/atomic_and.c
@@ -75,3 +75,26 @@
 	},
 	.result = ACCEPT,
 },
+{
+	"BPF_ATOMIC_AND with fetch - r0 as source reg",
+	.insns = {
+		/* val = 0x110; */
+		BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0x110),
+		/* old = atomic_fetch_and(&val, 0x011); */
+		BPF_MOV64_IMM(BPF_REG_0, 0x011),
+		BPF_ATOMIC_OP(BPF_DW, BPF_AND | BPF_FETCH, BPF_REG_10, BPF_REG_0, -8),
+		/* if (old != 0x110) exit(3); */
+		BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0x110, 2),
+		BPF_MOV64_IMM(BPF_REG_0, 3),
+		BPF_EXIT_INSN(),
+		/* if (val != 0x010) exit(2); */
+		BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -8),
+		BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x010, 2),
+		BPF_MOV64_IMM(BPF_REG_1, 2),
+		BPF_EXIT_INSN(),
+		/* exit(0); */
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+},

From 33ccec5fd740d0d5b78b77846f76eb5b4feb4327 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 17 Feb 2021 10:45:25 +0300
Subject: [PATCH 006/164] bpf: Fix a warning message in mark_ptr_not_null_reg()

The WARN_ON() argument is a condition, not an error message.  So this
code will print a stack trace but will not print the warning message.
Fix that and also change it to only WARN_ONCE().

Fixes: 4ddb74165ae5 ("bpf: Extract nullable reg type conversion into a helper function")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/YCzJlV3hnF%2Ft1Pk4@mwanda
---
 kernel/bpf/verifier.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1dda9d81f12c..3d34ba492d46 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1120,7 +1120,7 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
 		reg->type = PTR_TO_RDWR_BUF;
 		break;
 	default:
-		WARN_ON("unknown nullable register type");
+		WARN_ONCE(1, "unknown nullable register type");
 	}
 }
 

From 53f523f3052ac16bbc7718032aa6b848f971d28c Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Wed, 17 Feb 2021 16:16:47 -0800
Subject: [PATCH 007/164] bpf: Clear percpu pointers in bpf_prog_clone_free()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Similar to bpf_prog_realloc(), bpf_prog_clone_create() also copies
the percpu pointers, but the clone still shares them with the original
prog, so we have to clear these two percpu pointers in
bpf_prog_clone_free(). Otherwise we would get a double free:

 BUG: kernel NULL pointer dereference, address: 0000000000000000
 #PF: supervisor read access in kernel mode
 #PF: error_code(0x0000) - not-present page
 PGD 0 P4D 0
 Oops: 0000 [#1] SMP PTI
 CPU: 13 PID: 8140 Comm: kworker/13:247 Kdump: loaded Tainted: G                W    OE
  5.11.0-rc4.bm.1-amd64+ #1
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014
 test_bpf: #1 TXA
 Workqueue: events bpf_prog_free_deferred
 RIP: 0010:percpu_ref_get_many.constprop.97+0x42/0xf0
 Code: [...]
 RSP: 0018:ffffa6bce1f9bda0 EFLAGS: 00010002
 RAX: 0000000000000001 RBX: 0000000000000000 RCX: 00000000021dfc7b
 RDX: ffffffffae2eeb90 RSI: 867f92637e338da5 RDI: 0000000000000046
 RBP: ffffa6bce1f9bda8 R08: 0000000000000000 R09: 0000000000000001
 R10: 0000000000000046 R11: 0000000000000000 R12: 0000000000000280
 R13: 0000000000000000 R14: 0000000000000000 R15: ffff9b5f3ffdedc0
 FS:    0000000000000000(0000) GS:ffff9b5f2fb40000(0000) knlGS:0000000000000000
 CS:    0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000000000000000 CR3: 000000027c36c002 CR4: 00000000003706e0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
 Call Trace:
    refill_obj_stock+0x5e/0xd0
    free_percpu+0xee/0x550
    __bpf_prog_free+0x4d/0x60
    process_one_work+0x26a/0x590
    worker_thread+0x3c/0x390
    ? process_one_work+0x590/0x590
    kthread+0x130/0x150
    ? kthread_park+0x80/0x80
    ret_from_fork+0x1f/0x30

This bug is 100% reproducible with test_kmod.sh.

Fixes: 700d4796ef59 ("bpf: Optimize program stats")
Fixes: ca06f55b9002 ("bpf: Add per-program recursion prevention mechanism")
Reported-by: Jiang Wang <jiang.wang@bytedance.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210218001647.71631-1-xiyou.wangcong@gmail.com
---
 kernel/bpf/core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 0ae015ad1e05..aa1e64196d8d 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1118,6 +1118,8 @@ static void bpf_prog_clone_free(struct bpf_prog *fp)
 	 * clone is guaranteed to not be locked.
 	 */
 	fp->aux = NULL;
+	fp->stats = NULL;
+	fp->active = NULL;
 	__bpf_prog_free(fp);
 }
 

From 9aa940047ae81fa1806506556cde1efd0c39aef9 Mon Sep 17 00:00:00 2001
From: Sharvari Harisangam <sharvari.harisangam@nxp.com>
Date: Mon, 22 Feb 2021 18:19:44 +0530
Subject: [PATCH 008/164] MAINTAINERS: update for mwifiex driver maintainers

Add Sharvari Harisangam to Maintainer list.
Replace Ganapathi Bhat's email id in Maintainer list.

Signed-off-by: Rakesh Parmar <rakesh.parmar@nxp.com>
Signed-off-by: Sharvari Harisangam <sharvari.harisangam@nxp.com>
Acked-by: Ganapathi Bhat <ganapathi.bhat@nxp.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1613998184-20047-1-git-send-email-sharvari.harisangam@nxp.com
---
 MAINTAINERS | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 99335fd22c0a..98fd98ebe6b2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10685,7 +10685,8 @@ F:	drivers/net/ethernet/marvell/mvpp2/
 
 MARVELL MWIFIEX WIRELESS DRIVER
 M:	Amitkumar Karwar <amitkarwar@gmail.com>
-M:	Ganapathi Bhat <ganapathi.bhat@nxp.com>
+M:	Ganapathi Bhat <ganapathi017@gmail.com>
+M:	Sharvari Harisangam <sharvari.harisangam@nxp.com>
 M:	Xinming Hu <huxinming820@gmail.com>
 L:	linux-wireless@vger.kernel.org
 S:	Maintained

From ebb9d34e073dc965e9e1f0632a95dcb83736f166 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 21 Feb 2021 19:27:54 +0100
Subject: [PATCH 009/164] ath11k: qmi: use %pad to format dma_addr_t
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If CONFIG_ARCH_DMA_ADDR_T_64BIT=n:

    drivers/net/wireless/ath/ath11k/qmi.c: In function ‘ath11k_qmi_respond_fw_mem_request’:
    drivers/net/wireless/ath/ath11k/qmi.c:1690:8: warning: format ‘%llx’ expects argument of type ‘long long unsigned int’, but argument 5 has type ‘dma_addr_t’ {aka ‘unsigned int’} [-Wformat=]
     1690 |        "qmi req mem_seg[%d] 0x%llx %u %u\n", i,
	  |        ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     1691 |         ab->qmi.target_mem[i].paddr,
	  |         ~~~~~~~~~~~~~~~~~~~~~~~~~~~
	  |                              |
	  |                              dma_addr_t {aka unsigned int}
    drivers/net/wireless/ath/ath11k/debug.h:64:30: note: in definition of macro ‘ath11k_dbg’
       64 |   __ath11k_dbg(ar, dbg_mask, fmt, ##__VA_ARGS__); \
	  |                              ^~~
    drivers/net/wireless/ath/ath11k/qmi.c:1690:34: note: format string is defined here
     1690 |        "qmi req mem_seg[%d] 0x%llx %u %u\n", i,
	  |                               ~~~^
	  |                                  |
	  |                                  long long unsigned int
	  |                               %x

Fixes: d5395a5486596308 ("ath11k: qmi: add debug message for allocated memory segment addresses and sizes")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210221182754.2071863-1-geert@linux-m68k.org
---
 drivers/net/wireless/ath/ath11k/qmi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/qmi.c b/drivers/net/wireless/ath/ath11k/qmi.c
index 1aca841cd147..7968fe4eda22 100644
--- a/drivers/net/wireless/ath/ath11k/qmi.c
+++ b/drivers/net/wireless/ath/ath11k/qmi.c
@@ -1687,8 +1687,8 @@ static int ath11k_qmi_respond_fw_mem_request(struct ath11k_base *ab)
 			req->mem_seg[i].size = ab->qmi.target_mem[i].size;
 			req->mem_seg[i].type = ab->qmi.target_mem[i].type;
 			ath11k_dbg(ab, ATH11K_DBG_QMI,
-				   "qmi req mem_seg[%d] 0x%llx %u %u\n", i,
-				    ab->qmi.target_mem[i].paddr,
+				   "qmi req mem_seg[%d] %pad %u %u\n", i,
+				    &ab->qmi.target_mem[i].paddr,
 				    ab->qmi.target_mem[i].size,
 				    ab->qmi.target_mem[i].type);
 		}

From 77d7e87128d4dfb400df4208b2812160e999c165 Mon Sep 17 00:00:00 2001
From: Kalle Valo <kvalo@codeaurora.org>
Date: Mon, 22 Feb 2021 17:14:09 +0200
Subject: [PATCH 010/164] ath11k: fix AP mode for QCA6390

Commit c134d1f8c436 ("ath11k: Handle errors if peer creation fails") completely
broke AP mode on QCA6390:

kernel: [  151.230734] ath11k_pci 0000:06:00.0: failed to create peer after vdev start delay: -22
wpa_supplicant[2307]: Failed to set beacon parameters
wpa_supplicant[2307]: Interface initialization failed
wpa_supplicant[2307]: wlan0: interface state UNINITIALIZED->DISABLED
wpa_supplicant[2307]: wlan0: AP-DISABLED
wpa_supplicant[2307]: wlan0: Unable to setup interface.
wpa_supplicant[2307]: Failed to initialize AP interface

This was because commit c134d1f8c436 ("ath11k: Handle errors if peer creation
fails") added error handling for ath11k_peer_create(), which had been failing
all along but was unnoticed due to the missing error handling. The actual bug
was introduced already in commit aa44b2f3ecd4 ("ath11k: start vdev if a bss peer is
already created").

ath11k_peer_create() was failing because for AP mode the peer is created
already earlier op_add_interface() and we should skip creation here, but the
check for modes was wrong.  Fixing that makes AP mode work again.

This shouldn't affect IPQ8074 nor QCN9074 as they have hw_params.vdev_start_delay disabled.

Tested-on: QCA6390 hw2.0 PCI WLAN.HST.1.0.1-01740-QCAHSTSWPLZ_V2_TO_X86-1

Fixes: c134d1f8c436 ("ath11k: Handle errors if peer creation fails")
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1614006849-25764-1-git-send-email-kvalo@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/mac.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index b391169576e2..faa2e678e63e 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -5450,8 +5450,8 @@ ath11k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw,
 	}
 
 	if (ab->hw_params.vdev_start_delay &&
-	    (arvif->vdev_type == WMI_VDEV_TYPE_AP ||
-	    arvif->vdev_type == WMI_VDEV_TYPE_MONITOR)) {
+	    arvif->vdev_type != WMI_VDEV_TYPE_AP &&
+	    arvif->vdev_type != WMI_VDEV_TYPE_MONITOR) {
 		param.vdev_id = arvif->vdev_id;
 		param.peer_type = WMI_PEER_TYPE_DEFAULT;
 		param.peer_addr = ar->mac_addr;

From f4eda8b6e4a5c7897c6bb992ed63a27061b371ef Mon Sep 17 00:00:00 2001
From: Dmitrii Banshchikov <me@ubique.spb.ru>
Date: Tue, 23 Feb 2021 13:04:16 +0400
Subject: [PATCH 011/164] bpf: Drop imprecise log message

Now it is possible for global function to have a pointer argument that
points to something different than struct. Drop the irrelevant log
message and keep the logic same.

Fixes: e5069b9c23b3 ("bpf: Support pointers in global func args")
Signed-off-by: Dmitrii Banshchikov <me@ubique.spb.ru>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20210223090416.333943-1-me@ubique.spb.ru
---
 kernel/bpf/btf.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 2efeb5f4b343..b1a76fe046cb 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -4321,8 +4321,6 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf,
 		 * is not supported yet.
 		 * BPF_PROG_TYPE_RAW_TRACEPOINT is fine.
 		 */
-		if (log->level & BPF_LOG_LEVEL)
-			bpf_log(log, "arg#%d type is not a struct\n", arg);
 		return NULL;
 	}
 	tname = btf_name_by_offset(btf, t->name_off);

From c41d81bfbb4579c3e583457e383dd63d026bf947 Mon Sep 17 00:00:00 2001
From: Dmitrii Banshchikov <me@ubique.spb.ru>
Date: Tue, 23 Feb 2021 12:22:11 +0400
Subject: [PATCH 012/164] selftests/bpf: Fix a compiler warning in global func
 test

Add an explicit 'const void *' cast to pass program ctx pointer type into
a global function that expects pointer to structure.

warning: incompatible pointer types
passing 'struct __sk_buff *' to parameter of type 'const struct S *'
[-Wincompatible-pointer-types]
        return foo(skb);
                   ^~~
progs/test_global_func11.c:10:36: note: passing argument to parameter 's' here
__noinline int foo(const struct S *s)
                                   ^

Fixes: 8b08807d039a ("selftests/bpf: Add unit tests for pointers in global functions")
Signed-off-by: Dmitrii Banshchikov <me@ubique.spb.ru>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210223082211.302596-1-me@ubique.spb.ru
---
 tools/testing/selftests/bpf/progs/test_global_func11.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/progs/test_global_func11.c b/tools/testing/selftests/bpf/progs/test_global_func11.c
index 28488047c849..ef5277d982d9 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func11.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func11.c
@@ -15,5 +15,5 @@ __noinline int foo(const struct S *s)
 SEC("cgroup_skb/ingress")
 int test_cls(struct __sk_buff *skb)
 {
-	return foo(skb);
+	return foo((const void *)skb);
 }

From 41462c6e730ca0e63f5fed5a517052385d980c54 Mon Sep 17 00:00:00 2001
From: Kun-Chuan Hsieh <jetswayss@gmail.com>
Date: Wed, 24 Feb 2021 05:27:52 +0000
Subject: [PATCH 013/164] tools/resolve_btfids: Fix build error with older host
 toolchains

Older libelf.h and glibc elf.h might not yet define the ELF compression
types.

Checking and defining SHF_COMPRESSED fix the build error when compiling
with older toolchains. Also, the tool resolve_btfids is compiled with host
toolchain. The host toolchain is more likely to be older than the cross
compile toolchain.

Fixes: 51f6463aacfb ("tools/resolve_btfids: Fix sections with wrong alignment")
Signed-off-by: Kun-Chuan Hsieh <jetswayss@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Link: https://lore.kernel.org/bpf/20210224052752.5284-1-jetswayss@gmail.com
---
 tools/bpf/resolve_btfids/main.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index 7409d7860aa6..80d966cfcaa1 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -260,6 +260,11 @@ static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size)
 	return btf_id__add(root, id, false);
 }
 
+/* Older libelf.h and glibc elf.h might not yet define the ELF compression types. */
+#ifndef SHF_COMPRESSED
+#define SHF_COMPRESSED (1 << 11) /* Section with compressed data. */
+#endif
+
 /*
  * The data of compressed section should be aligned to 4
  * (for 32bit) or 8 (for 64 bit) bytes. The binutils ld

From a7c9c25a99bbdaff51da26b874d2faaa8fdd72b5 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 23 Feb 2021 21:14:57 +0800
Subject: [PATCH 014/164] bpf: Remove blank line in bpf helper description
 comment

Commit 34b2021cc616 ("bpf: Add BPF-helper for MTU checking") added an extra
blank line in bpf helper description. This will make bpf_helpers_doc.py stop
building bpf_helper_defs.h immediately after bpf_check_mtu(), which will
affect future added functions.

Fixes: 34b2021cc616 ("bpf: Add BPF-helper for MTU checking")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Link: https://lore.kernel.org/bpf/20210223131457.1378978-1-liuhangbin@gmail.com
---
 include/uapi/linux/bpf.h       | 1 -
 tools/include/uapi/linux/bpf.h | 1 -
 2 files changed, 2 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4c24daa43bac..79c893310492 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3850,7 +3850,6 @@ union bpf_attr {
  *
  * long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags)
  *	Description
-
  *		Check ctx packet size against exceeding MTU of net device (based
  *		on *ifindex*).  This helper will likely be used in combination
  *		with helpers that adjust/change the packet size.
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4c24daa43bac..79c893310492 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3850,7 +3850,6 @@ union bpf_attr {
  *
  * long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags)
  *	Description
-
  *		Check ctx packet size against exceeding MTU of net device (based
  *		on *ifindex*).  This helper will likely be used in combination
  *		with helpers that adjust/change the packet size.

From 557c223b643a35effec9654958d8edc62fd2603a Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 24 Feb 2021 16:14:03 +0800
Subject: [PATCH 015/164] selftests/bpf: No need to drop the packet when there
 is no geneve opt

In bpf geneve tunnel test we set geneve option on tx side. On rx side we
only call bpf_skb_get_tunnel_opt(). Since commit 9c2e14b48119 ("ip_tunnels:
Set tunnel option flag when tunnel metadata is present") geneve_rx() will
not add TUNNEL_GENEVE_OPT flag if there is no geneve option, which cause
bpf_skb_get_tunnel_opt() return ENOENT and _geneve_get_tunnel() in
test_tunnel_kern.c drop the packet.

As it should be valid that bpf_skb_get_tunnel_opt() return error when
there is not tunnel option, there is no need to drop the packet and
break all geneve rx traffic. Just set opt_class to 0 in this test and
keep returning TC_ACT_OK.

Fixes: 933a741e3b82 ("selftests/bpf: bpf tunnel test.")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: William Tu <u9012063@gmail.com>
Link: https://lore.kernel.org/bpf/20210224081403.1425474-1-liuhangbin@gmail.com
---
 tools/testing/selftests/bpf/progs/test_tunnel_kern.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
index a621b58ab079..9afe947cfae9 100644
--- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
@@ -446,10 +446,8 @@ int _geneve_get_tunnel(struct __sk_buff *skb)
 	}
 
 	ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
-	if (ret < 0) {
-		ERROR(ret);
-		return TC_ACT_SHOT;
-	}
+	if (ret < 0)
+		gopt.opt_class = 0;
 
 	bpf_trace_printk(fmt, sizeof(fmt),
 			key.tunnel_id, key.remote_ipv4, gopt.opt_class);

From 62541e266703549550e77fd46138422dbdc881f1 Mon Sep 17 00:00:00 2001
From: Kalle Valo <kvalo@codeaurora.org>
Date: Thu, 25 Feb 2021 09:04:21 +0200
Subject: [PATCH 016/164] iwlwifi: pcie: fix iwl_so_trans_cfg link error when
 CONFIG_IWLMVM is disabled

Randy reported an error on his randconfig builds:

ERROR: modpost: "iwl_so_trans_cfg" [drivers/net/wireless/intel/iwlwifi/iwlwifi.ko] undefined!

The problem was that when CONFIG_IWLMVM was disabled we were still accessing
iwl_so_trans_cfg. Fix it by moving IS_ENABLED() check before the access.

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Fixes: 930be4e76f26 ("iwlwifi: add support for SnJ with Jf devices")
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Acked-by: Luca Coelho <luciano.coelho@intel.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1614236661-20274-1-git-send-email-kvalo@codeaurora.org
---
 drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index 314fec4a89ad..ffaf973dae94 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -1106,6 +1106,8 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		}
 	}
 
+#if IS_ENABLED(CONFIG_IWLMVM)
+
 	/*
 	 * Workaround for problematic SnJ device: sometimes when
 	 * certain RF modules are connected to SnJ, the device ID
@@ -1116,7 +1118,6 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (CSR_HW_REV_TYPE(iwl_trans->hw_rev) == IWL_CFG_MAC_TYPE_SNJ)
 		iwl_trans->trans_cfg = &iwl_so_trans_cfg;
 
-#if IS_ENABLED(CONFIG_IWLMVM)
 	/*
 	 * special-case 7265D, it has the same PCI IDs.
 	 *

From fb5fabb192b22293b70bc3351696473c50746d90 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 25 Feb 2021 15:59:15 +0100
Subject: [PATCH 017/164] mt76: mt7921: remove incorrect error handling

Clang points out a mistake in the error handling in
mt7921_mcu_tx_rate_report(), which tries to dereference a pointer that
cannot be initialized because of the error that is being handled:

drivers/net/wireless/mediatek/mt76/mt7921/mcu.c:409:3: warning: variable 'stats' is uninitialized when used here [-Wuninitialized]
                stats->tx_rate = rate;
                ^~~~~
drivers/net/wireless/mediatek/mt76/mt7921/mcu.c:401:32: note: initialize the variable 'stats' to silence this warning
        struct mt7921_sta_stats *stats;
                                      ^
Just remove the obviously incorrect line.

Fixes: 1c099ab44727 ("mt76: mt7921: add MCU support")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210225145953.404859-2-arnd@kernel.org
---
 drivers/net/wireless/mediatek/mt76/mt7921/mcu.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index db125cd22b91..b5cc72e7e81c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -405,10 +405,8 @@ mt7921_mcu_tx_rate_report(struct mt7921_dev *dev, struct sk_buff *skb,
 	if (wlan_idx >= MT76_N_WCIDS)
 		return;
 	wcid = rcu_dereference(dev->mt76.wcid[wlan_idx]);
-	if (!wcid) {
-		stats->tx_rate = rate;
+	if (!wcid)
 		return;
-	}
 
 	msta = container_of(wcid, struct mt7921_sta, wcid);
 	stats = &msta->stats;

From d0bd52c591a1070c54dc428e926660eb4f981099 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 7 Feb 2021 12:48:31 +0100
Subject: [PATCH 018/164] mt76: dma: do not report truncated frames to mac80211

Commit b102f0c522cf6 ("mt76: fix array overflow on receiving too many
fragments for a packet") fixes a possible OOB access but it introduces a
memory leak since the pending frame is not released to page_frag_cache
if the frag array of skb_shared_info is full. Commit 93a1d4791c10
("mt76: dma: fix a possible memory leak in mt76_add_fragment()") fixes
the issue but does not free the truncated skb that is forwarded to
mac80211 layer. Fix the leftover issue discarding even truncated skbs.

Fixes: 93a1d4791c10 ("mt76: dma: fix a possible memory leak in mt76_add_fragment()")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/a03166fcc8214644333c68674a781836e0f57576.1612697217.git.lorenzo@kernel.org
---
 drivers/net/wireless/mediatek/mt76/dma.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index abdc8d364361..2f27c43ad76d 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -512,13 +512,13 @@ mt76_add_fragment(struct mt76_dev *dev, struct mt76_queue *q, void *data,
 {
 	struct sk_buff *skb = q->rx_head;
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
+	int nr_frags = shinfo->nr_frags;
 
-	if (shinfo->nr_frags < ARRAY_SIZE(shinfo->frags)) {
+	if (nr_frags < ARRAY_SIZE(shinfo->frags)) {
 		struct page *page = virt_to_head_page(data);
 		int offset = data - page_address(page) + q->buf_offset;
 
-		skb_add_rx_frag(skb, shinfo->nr_frags, page, offset, len,
-				q->buf_size);
+		skb_add_rx_frag(skb, nr_frags, page, offset, len, q->buf_size);
 	} else {
 		skb_free_frag(data);
 	}
@@ -527,7 +527,10 @@ mt76_add_fragment(struct mt76_dev *dev, struct mt76_queue *q, void *data,
 		return;
 
 	q->rx_head = NULL;
-	dev->drv->rx_skb(dev, q - dev->q_rx, skb);
+	if (nr_frags < ARRAY_SIZE(shinfo->frags))
+		dev->drv->rx_skb(dev, q - dev->q_rx, skb);
+	else
+		dev_kfree_skb(skb);
 }
 
 static int

From c490492f15f656340b35cb9e36b9bfdea3539e19 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 26 Feb 2021 15:21:27 +0100
Subject: [PATCH 019/164] mt76: mt7915: fix unused 'mode' variable

clang points out a possible corner case in the mt7915_tm_set_tx_cont()
function if called with invalid arguments:

drivers/net/wireless/mediatek/mt76/mt7915/testmode.c:593:2: warning: variable 'mode' is used uninitialized whenever switch default is taken [-Wsometimes-uninitialized]
        default:
        ^~~~~~~
drivers/net/wireless/mediatek/mt76/mt7915/testmode.c:597:13: note: uninitialized use occurs here
        rateval =  mode << 6 | rate_idx;
                   ^~~~
drivers/net/wireless/mediatek/mt76/mt7915/testmode.c:506:37: note: initialize the variable 'mode' to silence this warning
        u8 rate_idx = td->tx_rate_idx, mode;
                                           ^

Change it to return an error instead of continuing with invalid data
here.

Fixes: 3f0caa3cbf94 ("mt76: mt7915: add support for continuous tx in testmode")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/mediatek/mt76/mt7915/testmode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/testmode.c b/drivers/net/wireless/mediatek/mt76/mt7915/testmode.c
index 7fb2170a9561..bd798df748ba 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/testmode.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/testmode.c
@@ -543,7 +543,7 @@ mt7915_tm_set_tx_cont(struct mt7915_phy *phy, bool en)
 		tx_cont->bw = CMD_CBW_20MHZ;
 		break;
 	default:
-		break;
+		return -EINVAL;
 	}
 
 	if (!en) {
@@ -591,7 +591,7 @@ mt7915_tm_set_tx_cont(struct mt7915_phy *phy, bool en)
 		mode = MT_PHY_TYPE_HE_MU;
 		break;
 	default:
-		break;
+		return -EINVAL;
 	}
 
 	rateval =  mode << 6 | rate_idx;

From a3e860a83397bf761ec1128a3f0ba186445992c6 Mon Sep 17 00:00:00 2001
From: Joakim Zhang <qiangqing.zhang@nxp.com>
Date: Thu, 25 Feb 2021 17:01:10 +0800
Subject: [PATCH 020/164] net: stmmac: stop each tx channel independently

If clear GMAC_CONFIG_TE bit, it would stop all tx channels, but users
may only want to stop specific tx channel.

Fixes: 48863ce5940f ("stmmac: add DMA support for GMAC 4.xx")
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
index 0b4ee2dbb691..71e50751ef2d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
@@ -53,10 +53,6 @@ void dwmac4_dma_stop_tx(void __iomem *ioaddr, u32 chan)
 
 	value &= ~DMA_CONTROL_ST;
 	writel(value, ioaddr + DMA_CHAN_TX_CONTROL(chan));
-
-	value = readl(ioaddr + GMAC_CONFIG);
-	value &= ~GMAC_CONFIG_TE;
-	writel(value, ioaddr + GMAC_CONFIG);
 }
 
 void dwmac4_dma_start_rx(void __iomem *ioaddr, u32 chan)

From c511819d138de38e1637eedb645c207e09680d0f Mon Sep 17 00:00:00 2001
From: Joakim Zhang <qiangqing.zhang@nxp.com>
Date: Thu, 25 Feb 2021 17:01:11 +0800
Subject: [PATCH 021/164] net: stmmac: fix watchdog timeout during
 suspend/resume stress test

stmmac_xmit() call stmmac_tx_timer_arm() at the end to modify tx timer to
do the transmission cleanup work. Imagine such a situation, stmmac enters
suspend immediately after tx timer modified, it's expire callback
stmmac_tx_clean() would not be invoked. This could affect BQL, since
netdev_tx_sent_queue() has been called, but netdev_tx_completed_queue()
have not been involved, as a result, dql_avail(&dev_queue->dql) finally
always return a negative value.

__dev_queue_xmit->__dev_xmit_skb->qdisc_run->__qdisc_run->qdisc_restart->dequeue_skb:
	if ((q->flags & TCQ_F_ONETXQUEUE) &&
		netif_xmit_frozen_or_stopped(txq)) // __QUEUE_STATE_STACK_XOFF is set

Net core will stop transmitting any more. Finillay, net watchdong would timeout.
To fix this issue, we should call netdev_tx_reset_queue() in stmmac_resume().

Fixes: 54139cf3bb33 ("net: stmmac: adding multiple buffers for rx")
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 26b971cd4da5..12ed337a239b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -5257,6 +5257,8 @@ static void stmmac_reset_queues_param(struct stmmac_priv *priv)
 		tx_q->cur_tx = 0;
 		tx_q->dirty_tx = 0;
 		tx_q->mss = 0;
+
+		netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, queue));
 	}
 }
 

From bfaf91ca848e758ed7be99b61fd936d03819fa56 Mon Sep 17 00:00:00 2001
From: Joakim Zhang <qiangqing.zhang@nxp.com>
Date: Thu, 25 Feb 2021 17:01:12 +0800
Subject: [PATCH 022/164] net: stmmac: fix dma physical address of descriptor
 when display ring

Driver uses dma_alloc_coherent to allocate dma memory for descriptors,
dma_alloc_coherent will return both the virtual address and physical
address. AFAIK, virt_to_phys could not convert virtual address to
physical address, for which memory is allocated by dma_alloc_coherent.

dwmac4_display_ring() function is broken for various descriptor, it only
support normal descriptor(struct dma_desc) now, this patch also extends to
support all descriptor types.

Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../ethernet/stmicro/stmmac/dwmac4_descs.c    | 50 +++++++++++++---
 .../net/ethernet/stmicro/stmmac/enh_desc.c    |  9 ++-
 drivers/net/ethernet/stmicro/stmmac/hwif.h    |  3 +-
 .../net/ethernet/stmicro/stmmac/norm_desc.c   |  9 ++-
 .../net/ethernet/stmicro/stmmac/stmmac_main.c | 57 ++++++++++++-------
 5 files changed, 94 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index c6540b003b43..ee87811b0ca5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -402,19 +402,53 @@ static void dwmac4_rd_set_tx_ic(struct dma_desc *p)
 	p->des2 |= cpu_to_le32(TDES2_INTERRUPT_ON_COMPLETION);
 }
 
-static void dwmac4_display_ring(void *head, unsigned int size, bool rx)
+static void dwmac4_display_ring(void *head, unsigned int size, bool rx,
+				dma_addr_t dma_rx_phy, unsigned int desc_size)
 {
-	struct dma_desc *p = (struct dma_desc *)head;
+	dma_addr_t dma_addr;
 	int i;
 
 	pr_info("%s descriptor ring:\n", rx ? "RX" : "TX");
 
-	for (i = 0; i < size; i++) {
-		pr_info("%03d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n",
-			i, (unsigned int)virt_to_phys(p),
-			le32_to_cpu(p->des0), le32_to_cpu(p->des1),
-			le32_to_cpu(p->des2), le32_to_cpu(p->des3));
-		p++;
+	if (desc_size == sizeof(struct dma_desc)) {
+		struct dma_desc *p = (struct dma_desc *)head;
+
+		for (i = 0; i < size; i++) {
+			dma_addr = dma_rx_phy + i * sizeof(*p);
+			pr_info("%03d [%pad]: 0x%x 0x%x 0x%x 0x%x\n",
+				i, &dma_addr,
+				le32_to_cpu(p->des0), le32_to_cpu(p->des1),
+				le32_to_cpu(p->des2), le32_to_cpu(p->des3));
+			p++;
+		}
+	} else if (desc_size == sizeof(struct dma_extended_desc)) {
+		struct dma_extended_desc *extp = (struct dma_extended_desc *)head;
+
+		for (i = 0; i < size; i++) {
+			dma_addr = dma_rx_phy + i * sizeof(*extp);
+			pr_info("%03d [%pad]: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
+				i, &dma_addr,
+				le32_to_cpu(extp->basic.des0), le32_to_cpu(extp->basic.des1),
+				le32_to_cpu(extp->basic.des2), le32_to_cpu(extp->basic.des3),
+				le32_to_cpu(extp->des4), le32_to_cpu(extp->des5),
+				le32_to_cpu(extp->des6), le32_to_cpu(extp->des7));
+			extp++;
+		}
+	} else if (desc_size == sizeof(struct dma_edesc)) {
+		struct dma_edesc *ep = (struct dma_edesc *)head;
+
+		for (i = 0; i < size; i++) {
+			dma_addr = dma_rx_phy + i * sizeof(*ep);
+			pr_info("%03d [%pad]: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
+				i, &dma_addr,
+				le32_to_cpu(ep->des4), le32_to_cpu(ep->des5),
+				le32_to_cpu(ep->des6), le32_to_cpu(ep->des7),
+				le32_to_cpu(ep->basic.des0), le32_to_cpu(ep->basic.des1),
+				le32_to_cpu(ep->basic.des2), le32_to_cpu(ep->basic.des3));
+			ep++;
+		}
+	} else {
+		pr_err("unsupported descriptor!");
 	}
 }
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
index d02cec296f51..6650edfab5bc 100644
--- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
@@ -417,19 +417,22 @@ static int enh_desc_get_rx_timestamp_status(void *desc, void *next_desc,
 	}
 }
 
-static void enh_desc_display_ring(void *head, unsigned int size, bool rx)
+static void enh_desc_display_ring(void *head, unsigned int size, bool rx,
+				  dma_addr_t dma_rx_phy, unsigned int desc_size)
 {
 	struct dma_extended_desc *ep = (struct dma_extended_desc *)head;
+	dma_addr_t dma_addr;
 	int i;
 
 	pr_info("Extended %s descriptor ring:\n", rx ? "RX" : "TX");
 
 	for (i = 0; i < size; i++) {
 		u64 x;
+		dma_addr = dma_rx_phy + i * sizeof(*ep);
 
 		x = *(u64 *)ep;
-		pr_info("%03d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n",
-			i, (unsigned int)virt_to_phys(ep),
+		pr_info("%03d [%pad]: 0x%x 0x%x 0x%x 0x%x\n",
+			i, &dma_addr,
 			(unsigned int)x, (unsigned int)(x >> 32),
 			ep->basic.des2, ep->basic.des3);
 		ep++;
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index b40b2e0667bb..7417db31402f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -78,7 +78,8 @@ struct stmmac_desc_ops {
 	/* get rx timestamp status */
 	int (*get_rx_timestamp_status)(void *desc, void *next_desc, u32 ats);
 	/* Display ring */
-	void (*display_ring)(void *head, unsigned int size, bool rx);
+	void (*display_ring)(void *head, unsigned int size, bool rx,
+			     dma_addr_t dma_rx_phy, unsigned int desc_size);
 	/* set MSS via context descriptor */
 	void (*set_mss)(struct dma_desc *p, unsigned int mss);
 	/* get descriptor skbuff address */
diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
index f083360e4ba6..98ef43f35802 100644
--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
@@ -269,19 +269,22 @@ static int ndesc_get_rx_timestamp_status(void *desc, void *next_desc, u32 ats)
 		return 1;
 }
 
-static void ndesc_display_ring(void *head, unsigned int size, bool rx)
+static void ndesc_display_ring(void *head, unsigned int size, bool rx,
+			       dma_addr_t dma_rx_phy, unsigned int desc_size)
 {
 	struct dma_desc *p = (struct dma_desc *)head;
+	dma_addr_t dma_addr;
 	int i;
 
 	pr_info("%s descriptor ring:\n", rx ? "RX" : "TX");
 
 	for (i = 0; i < size; i++) {
 		u64 x;
+		dma_addr = dma_rx_phy + i * sizeof(*p);
 
 		x = *(u64 *)p;
-		pr_info("%03d [0x%x]: 0x%x 0x%x 0x%x 0x%x",
-			i, (unsigned int)virt_to_phys(p),
+		pr_info("%03d [%pad]: 0x%x 0x%x 0x%x 0x%x",
+			i, &dma_addr,
 			(unsigned int)x, (unsigned int)(x >> 32),
 			p->des2, p->des3);
 		p++;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 12ed337a239b..730f2d71578c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1133,6 +1133,7 @@ static int stmmac_phy_setup(struct stmmac_priv *priv)
 static void stmmac_display_rx_rings(struct stmmac_priv *priv)
 {
 	u32 rx_cnt = priv->plat->rx_queues_to_use;
+	unsigned int desc_size;
 	void *head_rx;
 	u32 queue;
 
@@ -1142,19 +1143,24 @@ static void stmmac_display_rx_rings(struct stmmac_priv *priv)
 
 		pr_info("\tRX Queue %u rings\n", queue);
 
-		if (priv->extend_desc)
+		if (priv->extend_desc) {
 			head_rx = (void *)rx_q->dma_erx;
-		else
+			desc_size = sizeof(struct dma_extended_desc);
+		} else {
 			head_rx = (void *)rx_q->dma_rx;
+			desc_size = sizeof(struct dma_desc);
+		}
 
 		/* Display RX ring */
-		stmmac_display_ring(priv, head_rx, priv->dma_rx_size, true);
+		stmmac_display_ring(priv, head_rx, priv->dma_rx_size, true,
+				    rx_q->dma_rx_phy, desc_size);
 	}
 }
 
 static void stmmac_display_tx_rings(struct stmmac_priv *priv)
 {
 	u32 tx_cnt = priv->plat->tx_queues_to_use;
+	unsigned int desc_size;
 	void *head_tx;
 	u32 queue;
 
@@ -1164,14 +1170,19 @@ static void stmmac_display_tx_rings(struct stmmac_priv *priv)
 
 		pr_info("\tTX Queue %d rings\n", queue);
 
-		if (priv->extend_desc)
+		if (priv->extend_desc) {
 			head_tx = (void *)tx_q->dma_etx;
-		else if (tx_q->tbs & STMMAC_TBS_AVAIL)
+			desc_size = sizeof(struct dma_extended_desc);
+		} else if (tx_q->tbs & STMMAC_TBS_AVAIL) {
 			head_tx = (void *)tx_q->dma_entx;
-		else
+			desc_size = sizeof(struct dma_edesc);
+		} else {
 			head_tx = (void *)tx_q->dma_tx;
+			desc_size = sizeof(struct dma_desc);
+		}
 
-		stmmac_display_ring(priv, head_tx, priv->dma_tx_size, false);
+		stmmac_display_ring(priv, head_tx, priv->dma_tx_size, false,
+				    tx_q->dma_tx_phy, desc_size);
 	}
 }
 
@@ -3736,18 +3747,23 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 	unsigned int count = 0, error = 0, len = 0;
 	int status = 0, coe = priv->hw->rx_csum;
 	unsigned int next_entry = rx_q->cur_rx;
+	unsigned int desc_size;
 	struct sk_buff *skb = NULL;
 
 	if (netif_msg_rx_status(priv)) {
 		void *rx_head;
 
 		netdev_dbg(priv->dev, "%s: descriptor ring:\n", __func__);
-		if (priv->extend_desc)
+		if (priv->extend_desc) {
 			rx_head = (void *)rx_q->dma_erx;
-		else
+			desc_size = sizeof(struct dma_extended_desc);
+		} else {
 			rx_head = (void *)rx_q->dma_rx;
+			desc_size = sizeof(struct dma_desc);
+		}
 
-		stmmac_display_ring(priv, rx_head, priv->dma_rx_size, true);
+		stmmac_display_ring(priv, rx_head, priv->dma_rx_size, true,
+				    rx_q->dma_rx_phy, desc_size);
 	}
 	while (count < limit) {
 		unsigned int buf1_len = 0, buf2_len = 0;
@@ -4315,24 +4331,27 @@ static int stmmac_set_mac_address(struct net_device *ndev, void *addr)
 static struct dentry *stmmac_fs_dir;
 
 static void sysfs_display_ring(void *head, int size, int extend_desc,
-			       struct seq_file *seq)
+			       struct seq_file *seq, dma_addr_t dma_phy_addr)
 {
 	int i;
 	struct dma_extended_desc *ep = (struct dma_extended_desc *)head;
 	struct dma_desc *p = (struct dma_desc *)head;
+	dma_addr_t dma_addr;
 
 	for (i = 0; i < size; i++) {
 		if (extend_desc) {
-			seq_printf(seq, "%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n",
-				   i, (unsigned int)virt_to_phys(ep),
+			dma_addr = dma_phy_addr + i * sizeof(*ep);
+			seq_printf(seq, "%d [%pad]: 0x%x 0x%x 0x%x 0x%x\n",
+				   i, &dma_addr,
 				   le32_to_cpu(ep->basic.des0),
 				   le32_to_cpu(ep->basic.des1),
 				   le32_to_cpu(ep->basic.des2),
 				   le32_to_cpu(ep->basic.des3));
 			ep++;
 		} else {
-			seq_printf(seq, "%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n",
-				   i, (unsigned int)virt_to_phys(p),
+			dma_addr = dma_phy_addr + i * sizeof(*p);
+			seq_printf(seq, "%d [%pad]: 0x%x 0x%x 0x%x 0x%x\n",
+				   i, &dma_addr,
 				   le32_to_cpu(p->des0), le32_to_cpu(p->des1),
 				   le32_to_cpu(p->des2), le32_to_cpu(p->des3));
 			p++;
@@ -4360,11 +4379,11 @@ static int stmmac_rings_status_show(struct seq_file *seq, void *v)
 		if (priv->extend_desc) {
 			seq_printf(seq, "Extended descriptor ring:\n");
 			sysfs_display_ring((void *)rx_q->dma_erx,
-					   priv->dma_rx_size, 1, seq);
+					   priv->dma_rx_size, 1, seq, rx_q->dma_rx_phy);
 		} else {
 			seq_printf(seq, "Descriptor ring:\n");
 			sysfs_display_ring((void *)rx_q->dma_rx,
-					   priv->dma_rx_size, 0, seq);
+					   priv->dma_rx_size, 0, seq, rx_q->dma_rx_phy);
 		}
 	}
 
@@ -4376,11 +4395,11 @@ static int stmmac_rings_status_show(struct seq_file *seq, void *v)
 		if (priv->extend_desc) {
 			seq_printf(seq, "Extended descriptor ring:\n");
 			sysfs_display_ring((void *)tx_q->dma_etx,
-					   priv->dma_tx_size, 1, seq);
+					   priv->dma_tx_size, 1, seq, tx_q->dma_tx_phy);
 		} else if (!(tx_q->tbs & STMMAC_TBS_AVAIL)) {
 			seq_printf(seq, "Descriptor ring:\n");
 			sysfs_display_ring((void *)tx_q->dma_tx,
-					   priv->dma_tx_size, 0, seq);
+					   priv->dma_tx_size, 0, seq, tx_q->dma_tx_phy);
 		}
 	}
 

From 396e13e11577b614db77db0bbb6fca935b94eb1b Mon Sep 17 00:00:00 2001
From: Joakim Zhang <qiangqing.zhang@nxp.com>
Date: Thu, 25 Feb 2021 17:01:13 +0800
Subject: [PATCH 023/164] net: stmmac: fix wrongly set buffer2 valid when sph
 unsupport

In current driver, buffer2 available only when hardware supports split
header. Wrongly set buffer2 valid in stmmac_rx_refill when refill buffer
address. You can see that desc3 is 0x81000000 after initialization, but
turn out to be 0x83000000 after refill.

Fixes: 67afd6d1cfdf ("net: stmmac: Add Split Header support and enable it in XGMAC cores")
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c   | 9 +++++++--
 drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c | 2 +-
 drivers/net/ethernet/stmicro/stmmac/hwif.h           | 2 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c    | 8 ++++++--
 4 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index ee87811b0ca5..cbf4429fb1d2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -533,10 +533,15 @@ static void dwmac4_get_rx_header_len(struct dma_desc *p, unsigned int *len)
 	*len = le32_to_cpu(p->des2) & RDES2_HL;
 }
 
-static void dwmac4_set_sec_addr(struct dma_desc *p, dma_addr_t addr)
+static void dwmac4_set_sec_addr(struct dma_desc *p, dma_addr_t addr, bool buf2_valid)
 {
 	p->des2 = cpu_to_le32(lower_32_bits(addr));
-	p->des3 = cpu_to_le32(upper_32_bits(addr) | RDES3_BUFFER2_VALID_ADDR);
+	p->des3 = cpu_to_le32(upper_32_bits(addr));
+
+	if (buf2_valid)
+		p->des3 |= cpu_to_le32(RDES3_BUFFER2_VALID_ADDR);
+	else
+		p->des3 &= cpu_to_le32(~RDES3_BUFFER2_VALID_ADDR);
 }
 
 static void dwmac4_set_tbs(struct dma_edesc *p, u32 sec, u32 nsec)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
index 0aaf19ab5672..ccfb0102dde4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
@@ -292,7 +292,7 @@ static void dwxgmac2_get_rx_header_len(struct dma_desc *p, unsigned int *len)
 		*len = le32_to_cpu(p->des2) & XGMAC_RDES2_HL;
 }
 
-static void dwxgmac2_set_sec_addr(struct dma_desc *p, dma_addr_t addr)
+static void dwxgmac2_set_sec_addr(struct dma_desc *p, dma_addr_t addr, bool is_valid)
 {
 	p->des2 = cpu_to_le32(lower_32_bits(addr));
 	p->des3 = cpu_to_le32(upper_32_bits(addr));
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 7417db31402f..979ac9fca23c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -92,7 +92,7 @@ struct stmmac_desc_ops {
 	int (*get_rx_hash)(struct dma_desc *p, u32 *hash,
 			   enum pkt_hash_types *type);
 	void (*get_rx_header_len)(struct dma_desc *p, unsigned int *len);
-	void (*set_sec_addr)(struct dma_desc *p, dma_addr_t addr);
+	void (*set_sec_addr)(struct dma_desc *p, dma_addr_t addr, bool buf2_valid);
 	void (*set_sarc)(struct dma_desc *p, u32 sarc_type);
 	void (*set_vlan_tag)(struct dma_desc *p, u16 tag, u16 inner_tag,
 			     u32 inner_type);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 730f2d71578c..cd7709da0969 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1314,9 +1314,10 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p,
 			return -ENOMEM;
 
 		buf->sec_addr = page_pool_get_dma_addr(buf->sec_page);
-		stmmac_set_desc_sec_addr(priv, p, buf->sec_addr);
+		stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, true);
 	} else {
 		buf->sec_page = NULL;
+		stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, false);
 	}
 
 	buf->addr = page_pool_get_dma_addr(buf->page);
@@ -3659,7 +3660,10 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
 					   DMA_FROM_DEVICE);
 
 		stmmac_set_desc_addr(priv, p, buf->addr);
-		stmmac_set_desc_sec_addr(priv, p, buf->sec_addr);
+		if (priv->sph)
+			stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, true);
+		else
+			stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, false);
 		stmmac_refill_desc3(priv, rx_q, p);
 
 		rx_q->rx_count_frames++;

From 9c63faaa931e443e7abbbee9de0169f1d4710546 Mon Sep 17 00:00:00 2001
From: Joakim Zhang <qiangqing.zhang@nxp.com>
Date: Thu, 25 Feb 2021 17:01:14 +0800
Subject: [PATCH 024/164] net: stmmac: re-init rx buffers when mac resume back

During suspend/resume stress test, we found descriptor write back by DMA
could exhibit unusual behavior, e.g.:
	003 [0xc4310030]: 0x0 0x40 0x0 0xb5010040

We can see that desc3 write back is 0xb5010040, it is still ownd by DMA,
so application would not recycle this buffer. It will trigger fatal bus
error when DMA try to use this descriptor again. To fix this issue, we
should re-init all rx buffers when mac resume back.

Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../net/ethernet/stmicro/stmmac/stmmac_main.c | 84 ++++++++++++++++++-
 1 file changed, 83 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index cd7709da0969..0eba44e9c1f8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1379,6 +1379,88 @@ static void stmmac_free_tx_buffer(struct stmmac_priv *priv, u32 queue, int i)
 	}
 }
 
+/**
+ * stmmac_reinit_rx_buffers - reinit the RX descriptor buffer.
+ * @priv: driver private structure
+ * Description: this function is called to re-allocate a receive buffer, perform
+ * the DMA mapping and init the descriptor.
+ */
+static void stmmac_reinit_rx_buffers(struct stmmac_priv *priv)
+{
+	u32 rx_count = priv->plat->rx_queues_to_use;
+	u32 queue;
+	int i;
+
+	for (queue = 0; queue < rx_count; queue++) {
+		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+		for (i = 0; i < priv->dma_rx_size; i++) {
+			struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i];
+
+			if (buf->page) {
+				page_pool_recycle_direct(rx_q->page_pool, buf->page);
+				buf->page = NULL;
+			}
+
+			if (priv->sph && buf->sec_page) {
+				page_pool_recycle_direct(rx_q->page_pool, buf->sec_page);
+				buf->sec_page = NULL;
+			}
+		}
+	}
+
+	for (queue = 0; queue < rx_count; queue++) {
+		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+		for (i = 0; i < priv->dma_rx_size; i++) {
+			struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i];
+			struct dma_desc *p;
+
+			if (priv->extend_desc)
+				p = &((rx_q->dma_erx + i)->basic);
+			else
+				p = rx_q->dma_rx + i;
+
+			if (!buf->page) {
+				buf->page = page_pool_dev_alloc_pages(rx_q->page_pool);
+				if (!buf->page)
+					goto err_reinit_rx_buffers;
+
+				buf->addr = page_pool_get_dma_addr(buf->page);
+			}
+
+			if (priv->sph && !buf->sec_page) {
+				buf->sec_page = page_pool_dev_alloc_pages(rx_q->page_pool);
+				if (!buf->sec_page)
+					goto err_reinit_rx_buffers;
+
+				buf->sec_addr = page_pool_get_dma_addr(buf->sec_page);
+			}
+
+			stmmac_set_desc_addr(priv, p, buf->addr);
+			if (priv->sph)
+				stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, true);
+			else
+				stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, false);
+			if (priv->dma_buf_sz == BUF_SIZE_16KiB)
+				stmmac_init_desc3(priv, p);
+		}
+	}
+
+	return;
+
+err_reinit_rx_buffers:
+	do {
+		while (--i >= 0)
+			stmmac_free_rx_buffer(priv, queue, i);
+
+		if (queue == 0)
+			break;
+
+		i = priv->dma_rx_size;
+	} while (queue-- > 0);
+}
+
 /**
  * init_dma_rx_desc_rings - init the RX descriptor rings
  * @dev: net device structure
@@ -5343,7 +5425,7 @@ int stmmac_resume(struct device *dev)
 	mutex_lock(&priv->lock);
 
 	stmmac_reset_queues_param(priv);
-
+	stmmac_reinit_rx_buffers(priv);
 	stmmac_free_tx_skbufs(priv);
 	stmmac_clear_descriptors(priv);
 

From 907310ceb27ee4259bedb6c1257f5d05ee44f3ce Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 25 Feb 2021 15:38:31 +0100
Subject: [PATCH 025/164] net: mscc: ocelot: select NET_DEVLINK

Without this option, the driver fails to link:

ld.lld: error: undefined symbol: devlink_sb_register
>>> referenced by ocelot_devlink.c
>>>               net/ethernet/mscc/ocelot_devlink.o:(ocelot_devlink_sb_register) in archive drivers/built-in.a
>>> referenced by ocelot_devlink.c
>>>               net/ethernet/mscc/ocelot_devlink.o:(ocelot_devlink_sb_register) in archive drivers/built-in.a

Fixes: f59fd9cab730 ("net: mscc: ocelot: configure watermarks using devlink-sb")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20210225143910.3964364-1-arnd@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mscc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mscc/Kconfig b/drivers/net/ethernet/mscc/Kconfig
index c0ede0ca7115..05cb040c2677 100644
--- a/drivers/net/ethernet/mscc/Kconfig
+++ b/drivers/net/ethernet/mscc/Kconfig
@@ -13,6 +13,7 @@ if NET_VENDOR_MICROSEMI
 
 # Users should depend on NET_SWITCHDEV, HAS_IOMEM
 config MSCC_OCELOT_SWITCH_LIB
+	select NET_DEVLINK
 	select REGMAP_MMIO
 	select PACKING
 	select PHYLIB

From 01c2c1ad8f45e0c191bfd961dc41cd77df0d7a2f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 25 Feb 2021 15:38:32 +0100
Subject: [PATCH 026/164] net: dsa: tag_ocelot_8021q: fix driver dependency

When the ocelot driver code is in a library, the dsa tag
code cannot be built-in:

ld.lld: error: undefined symbol: ocelot_can_inject
>>> referenced by tag_ocelot_8021q.c
>>>               dsa/tag_ocelot_8021q.o:(ocelot_xmit) in archive net/built-in.a

ld.lld: error: undefined symbol: ocelot_port_inject_frame
>>> referenced by tag_ocelot_8021q.c
>>>               dsa/tag_ocelot_8021q.o:(ocelot_xmit) in archive net/built-in.a

Building the tag support only really makes sense for compile-testing
when the driver is available, so add a Kconfig dependency that prevents
the broken configuration while allowing COMPILE_TEST alternative when
MSCC_OCELOT_SWITCH_LIB is disabled entirely.  This case is handled
through the #ifdef check in include/soc/mscc/ocelot.h.

Fixes: 0a6f17c6ae21 ("net: dsa: tag_ocelot_8021q: add support for PTP timestamping")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://lore.kernel.org/r/20210225143910.3964364-2-arnd@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/dsa/Kconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 3589224c8da9..58b8fc82cd3c 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -118,6 +118,8 @@ config NET_DSA_TAG_OCELOT
 
 config NET_DSA_TAG_OCELOT_8021Q
 	tristate "Tag driver for Ocelot family of switches, using VLAN"
+	depends on MSCC_OCELOT_SWITCH_LIB || \
+	          (MSCC_OCELOT_SWITCH_LIB=n && COMPILE_TEST)
 	select NET_DSA_TAG_8021Q
 	help
 	  Say Y or M if you want to enable support for tagging frames with a

From 63c75c053b4160f7b90a418dcc4e5bcfac2fb6fc Mon Sep 17 00:00:00 2001
From: DENG Qingfang <dqfext@gmail.com>
Date: Fri, 26 Feb 2021 14:32:26 +0800
Subject: [PATCH 027/164] net: dsa: mt7530: don't build GPIO support if
 !GPIOLIB

The new GPIO support may be optional at runtime, but it requires
building against gpiolib:

ERROR: modpost: "gpiochip_get_data" [drivers/net/dsa/mt7530.ko]
undefined!
ERROR: modpost: "devm_gpiochip_add_data_with_key"
[drivers/net/dsa/mt7530.ko] undefined!

Add #ifdef to exclude GPIO support if GPIOLIB is not enabled.

Fixes: 429a0edeefd8 ("net: dsa: mt7530: MT7530 optional GPIO support")
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: DENG Qingfang <dqfext@gmail.com>
Link: https://lore.kernel.org/r/20210226063226.8474-1-dqfext@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/mt7530.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index c17de2bcf2fe..f06f5fa2f898 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -1624,6 +1624,7 @@ mtk_get_tag_protocol(struct dsa_switch *ds, int port,
 	}
 }
 
+#ifdef CONFIG_GPIOLIB
 static inline u32
 mt7530_gpio_to_bit(unsigned int offset)
 {
@@ -1726,6 +1727,7 @@ mt7530_setup_gpio(struct mt7530_priv *priv)
 
 	return devm_gpiochip_add_data(dev, gc, priv);
 }
+#endif /* CONFIG_GPIOLIB */
 
 static int
 mt7530_setup(struct dsa_switch *ds)
@@ -1868,11 +1870,13 @@ mt7530_setup(struct dsa_switch *ds)
 		}
 	}
 
+#ifdef CONFIG_GPIOLIB
 	if (of_property_read_bool(priv->dev->of_node, "gpio-controller")) {
 		ret = mt7530_setup_gpio(priv);
 		if (ret)
 			return ret;
 	}
+#endif /* CONFIG_GPIOLIB */
 
 	mt7530_setup_port5(ds, interface);
 

From 7f654157f0aefba04cd7f6297351c87b76b47b89 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 25 Feb 2021 15:57:27 +0100
Subject: [PATCH 028/164] net: phy: make mdio_bus_phy_suspend/resume as
 __maybe_unused

When CONFIG_PM_SLEEP is disabled, the compiler warns about unused
functions:

drivers/net/phy/phy_device.c:273:12: error: unused function 'mdio_bus_phy_suspend' [-Werror,-Wunused-function]
static int mdio_bus_phy_suspend(struct device *dev)
drivers/net/phy/phy_device.c:293:12: error: unused function 'mdio_bus_phy_resume' [-Werror,-Wunused-function]
static int mdio_bus_phy_resume(struct device *dev)

The logic is intentional, so just mark these two as __maybe_unused
and remove the incorrect #ifdef.

Fixes: 4c0d2e96ba05 ("net: phy: consider that suspend2ram may cut off PHY power")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://lore.kernel.org/r/20210225145748.404410-1-arnd@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/phy_device.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index ce495473cd5d..cc38e326405a 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -230,7 +230,6 @@ static struct phy_driver genphy_driver;
 static LIST_HEAD(phy_fixup_list);
 static DEFINE_MUTEX(phy_fixup_lock);
 
-#ifdef CONFIG_PM
 static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
 {
 	struct device_driver *drv = phydev->mdio.dev.driver;
@@ -270,7 +269,7 @@ out:
 	return !phydev->suspended;
 }
 
-static int mdio_bus_phy_suspend(struct device *dev)
+static __maybe_unused int mdio_bus_phy_suspend(struct device *dev)
 {
 	struct phy_device *phydev = to_phy_device(dev);
 
@@ -290,7 +289,7 @@ static int mdio_bus_phy_suspend(struct device *dev)
 	return phy_suspend(phydev);
 }
 
-static int mdio_bus_phy_resume(struct device *dev)
+static __maybe_unused int mdio_bus_phy_resume(struct device *dev)
 {
 	struct phy_device *phydev = to_phy_device(dev);
 	int ret;
@@ -316,7 +315,6 @@ no_resume:
 
 static SIMPLE_DEV_PM_OPS(mdio_bus_phy_pm_ops, mdio_bus_phy_suspend,
 			 mdio_bus_phy_resume);
-#endif /* CONFIG_PM */
 
 /**
  * phy_register_fixup - creates a new phy_fixup and adds it to the list

From 6a4d7234ae9a3bb31181f348ade9bbdb55aeb5c5 Mon Sep 17 00:00:00 2001
From: Heiko Thiery <heiko.thiery@gmail.com>
Date: Thu, 25 Feb 2021 22:15:16 +0100
Subject: [PATCH 029/164] net: fec: ptp: avoid register access when ipg clock
 is disabled

When accessing the timecounter register on an i.MX8MQ the kernel hangs.
This is only the case when the interface is down. This can be reproduced
by reading with 'phc_ctrl eth0 get'.

Like described in the change in 91c0d987a9788dcc5fe26baafd73bf9242b68900
the igp clock is disabled when the interface is down and leads to a
system hang.

So we check if the ptp clock status before reading the timecounter
register.

Signed-off-by: Heiko Thiery <heiko.thiery@gmail.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Link: https://lore.kernel.org/r/20210225211514.9115-1-heiko.thiery@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/freescale/fec_ptp.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c
index 2e344aada4c6..1753807cbf97 100644
--- a/drivers/net/ethernet/freescale/fec_ptp.c
+++ b/drivers/net/ethernet/freescale/fec_ptp.c
@@ -377,9 +377,16 @@ static int fec_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
 	u64 ns;
 	unsigned long flags;
 
+	mutex_lock(&adapter->ptp_clk_mutex);
+	/* Check the ptp clock */
+	if (!adapter->ptp_clk_on) {
+		mutex_unlock(&adapter->ptp_clk_mutex);
+		return -EINVAL;
+	}
 	spin_lock_irqsave(&adapter->tmreg_lock, flags);
 	ns = timecounter_read(&adapter->tc);
 	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+	mutex_unlock(&adapter->ptp_clk_mutex);
 
 	*ts = ns_to_timespec64(ns);
 

From 2107d45f17bedd7dbf4178462da0ac223835a2a7 Mon Sep 17 00:00:00 2001
From: Arjun Roy <arjunroy@google.com>
Date: Thu, 25 Feb 2021 15:26:28 -0800
Subject: [PATCH 030/164] tcp: Fix sign comparison bug in
 getsockopt(TCP_ZEROCOPY_RECEIVE)

getsockopt(TCP_ZEROCOPY_RECEIVE) has a bug where we read a
user-provided "len" field of type signed int, and then compare the
value to the result of an "offsetofend" operation, which is unsigned.

Negative values provided by the user will be promoted to large
positive numbers; thus checking that len < offsetofend() will return
false when the intention was that it return true.

Note that while len is originally checked for negative values earlier
on in do_tcp_getsockopt(), subsequent calls to get_user() re-read the
value from userspace which may have changed in the meantime.

Therefore, re-add the check for negative values after the call to
get_user in the handler code for TCP_ZEROCOPY_RECEIVE.

Fixes: c8856c051454 ("tcp-zerocopy: Return inq along with tcp receive zerocopy.")
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Arjun Roy <arjunroy@google.com>
Link: https://lore.kernel.org/r/20210225232628.4033281-1-arjunroy.kdev@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/tcp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a3422e42784e..dfb6f286c1de 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -4143,7 +4143,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 
 		if (get_user(len, optlen))
 			return -EFAULT;
-		if (len < offsetofend(struct tcp_zerocopy_receive, length))
+		if (len < 0 ||
+		    len < offsetofend(struct tcp_zerocopy_receive, length))
 			return -EINVAL;
 		if (unlikely(len > sizeof(zc))) {
 			err = check_zeroed_user(optval + sizeof(zc),

From edcbf5137f093b5502f5f6b97cce3cbadbde27aa Mon Sep 17 00:00:00 2001
From: Danielle Ratson <danieller@nvidia.com>
Date: Thu, 25 Feb 2021 18:57:19 +0200
Subject: [PATCH 031/164] selftests: forwarding: Fix race condition in mirror
 installation

When mirroring to a gretap in hardware the device expects to be
programmed with the egress port and all the encapsulating headers. This
requires the driver to resolve the path the packet will take in the
software data path and program the device accordingly.

If the path cannot be resolved (in this case because of an unresolved
neighbor), then mirror installation fails until the path is resolved.
This results in a race that causes the test to sometimes fail.

Fix this by setting the neighbor's state to permanent, so that it is
always valid.

Fixes: b5b029399fa6d ("selftests: forwarding: mirror_gre_bridge_1d_vlan: Add STP test")
Signed-off-by: Danielle Ratson <danieller@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../net/forwarding/mirror_gre_bridge_1d_vlan.sh          | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
index 197e769c2ed1..f8cda822c1ce 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
@@ -86,11 +86,20 @@ test_ip6gretap()
 
 test_gretap_stp()
 {
+	# Sometimes after mirror installation, the neighbor's state is not valid.
+	# The reason is that there is no SW datapath activity related to the
+	# neighbor for the remote GRE address. Therefore whether the corresponding
+	# neighbor will be valid is a matter of luck, and the test is thus racy.
+	# Set the neighbor's state to permanent, so it would be always valid.
+	ip neigh replace 192.0.2.130 lladdr $(mac_get $h3) \
+		nud permanent dev br2
 	full_test_span_gre_stp gt4 $swp3.555 "mirror to gretap"
 }
 
 test_ip6gretap_stp()
 {
+	ip neigh replace 2001:db8:2::2 lladdr $(mac_get $h3) \
+		nud permanent dev br2
 	full_test_span_gre_stp gt6 $swp3.555 "mirror to ip6gretap"
 }
 

From ae9b24ddb69b4e31cda1b5e267a5a08a1db11717 Mon Sep 17 00:00:00 2001
From: Danielle Ratson <danieller@nvidia.com>
Date: Thu, 25 Feb 2021 18:57:20 +0200
Subject: [PATCH 032/164] mlxsw: spectrum_ethtool: Add an external speed to
 PTYS register

Currently, only external bits are added to the PTYS register, whereas
there is one external bit that is wrongly marked as internal, and so was
recently removed from the register.

Add that bit to the PTYS register again, as this bit is no longer
internal.

Its removal resulted in '100000baseLR4_ER4/Full' link mode no longer
being supported, causing a regression on some setups.

Fixes: 5bf01b571cf4 ("mlxsw: spectrum_ethtool: Remove internal speeds from PTYS register")
Signed-off-by: Danielle Ratson <danieller@nvidia.com>
Reported-by: Eddie Shklaer <eddies@nvidia.com>
Tested-by: Eddie Shklaer <eddies@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h              | 1 +
 drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c | 5 +++++
 drivers/net/ethernet/mellanox/mlxsw/switchx2.c         | 3 ++-
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 16e2df6ef2f4..c4adc7f740d3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -4430,6 +4430,7 @@ MLXSW_ITEM32(reg, ptys, ext_eth_proto_cap, 0x08, 0, 32);
 #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4		BIT(20)
 #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4		BIT(21)
 #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4		BIT(22)
+#define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4	BIT(23)
 #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR		BIT(27)
 #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR		BIT(28)
 #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR		BIT(29)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
index bd7f873f6290..0bd64169bf81 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
@@ -1169,6 +1169,11 @@ static const struct mlxsw_sp1_port_link_mode mlxsw_sp1_port_link_mode[] = {
 		.mask_ethtool	= ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT,
 		.speed		= SPEED_100000,
 	},
+	{
+		.mask		= MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4,
+		.mask_ethtool	= ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT,
+		.speed		= SPEED_100000,
+	},
 };
 
 #define MLXSW_SP1_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sp1_port_link_mode)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
index 40e2e79d4517..131b2a53d261 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
@@ -613,7 +613,8 @@ static const struct mlxsw_sx_port_link_mode mlxsw_sx_port_link_mode[] = {
 	{
 		.mask		= MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4 |
 				  MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 |
-				  MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4,
+				  MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 |
+				  MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4,
 		.speed		= 100000,
 	},
 };

From dc860b88ce0a7ed9a048d5042cbb175daf60b657 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 25 Feb 2021 18:57:21 +0200
Subject: [PATCH 033/164] mlxsw: spectrum_router: Ignore routes using a deleted
 nexthop object

Routes are currently processed from a workqueue whereas nexthop objects
are processed in system call context. This can result in the driver not
finding a suitable nexthop group for a route and issuing a warning [1].

Fix this by ignoring such routes earlier in the process. The subsequent
deletion notification will be ignored as well.

[1]
 WARNING: CPU: 2 PID: 7754 at drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c:4853 mlxsw_sp_router_fib_event_work+0x1112/0x1e00 [mlxsw_spectrum]
 [...]
 CPU: 2 PID: 7754 Comm: kworker/u8:0 Not tainted 5.11.0-rc6-cq-20210207-1 #16
 Hardware name: Mellanox Technologies Ltd. MSN2100/SA001390, BIOS 5.6.5 05/24/2018
 Workqueue: mlxsw_core_ordered mlxsw_sp_router_fib_event_work [mlxsw_spectrum]
 RIP: 0010:mlxsw_sp_router_fib_event_work+0x1112/0x1e00 [mlxsw_spectrum]

Fixes: cdd6cfc54c64 ("mlxsw: spectrum_router: Allow programming routes with nexthop objects")
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reported-by: Alex Veber <alexve@nvidia.com>
Tested-by: Alex Veber <alexve@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 9ce90841f92d..eda99d82766a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -5951,6 +5951,10 @@ mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp,
 	if (mlxsw_sp->router->aborted)
 		return 0;
 
+	if (fen_info->fi->nh &&
+	    !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, fen_info->fi->nh->id))
+		return 0;
+
 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
 					 &fen_info->dst, sizeof(fen_info->dst),
 					 fen_info->dst_len,
@@ -6601,6 +6605,9 @@ static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp,
 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
 		return 0;
 
+	if (rt->nh && !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, rt->nh->id))
+		return 0;
+
 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
 					 &rt->fib6_dst.addr,
 					 sizeof(rt->fib6_dst.addr),

From d20cd745218cde1b268bef5282095ec6c95a3ea2 Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Date: Fri, 26 Feb 2021 04:43:09 -0500
Subject: [PATCH 034/164] bnxt_en: Fix race between firmware reset and driver
 remove.

The driver's error recovery reset sequence can take many seconds to
complete and only the critical sections are protected by rtnl_lock.
A recent change has introduced a regression in this sequence.

bnxt_remove_one() may be called while the recovery is in progress.
Normally, unregister_netdev() would cause bnxt_close_nic() to be
called and this would cause the error recovery to safely abort
with the BNXT_STATE_ABORT_ERR flag set in bnxt_close_nic().

Recently, we added bnxt_reinit_after_abort() to allow the user to
reopen the device after an aborted recovery.  This causes the
regression in the scenario described above because we would
attempt to re-open even after the netdev has been unregistered.

Fix it by checking the netdev reg_state in
bnxt_reinit_after_abort() and abort if it is unregistered.

Fixes: 6882c36cf82e ("bnxt_en: attempt to reinitialize after aborted reset")
Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index a680fd9c68ea..c55189c7bb36 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -9890,6 +9890,9 @@ static int bnxt_reinit_after_abort(struct bnxt *bp)
 	if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
 		return -EBUSY;
 
+	if (bp->dev->reg_state == NETREG_UNREGISTERED)
+		return -ENODEV;
+
 	rc = bnxt_fw_init_one(bp);
 	if (!rc) {
 		bnxt_clear_int_mode(bp);

From 20d7d1c5c9b11e9f538ed4a2289be106de970d3e Mon Sep 17 00:00:00 2001
From: Edwin Peer <edwin.peer@broadcom.com>
Date: Fri, 26 Feb 2021 04:43:10 -0500
Subject: [PATCH 035/164] bnxt_en: reliably allocate IRQ table on reset to
 avoid crash

The following trace excerpt corresponds with a NULL pointer dereference
of 'bp->irq_tbl' in bnxt_setup_inta() on an Aarch64 system after many
device resets:

    Unable to handle kernel NULL pointer dereference at ... 000000d
    ...
    pc : string+0x3c/0x80
    lr : vsnprintf+0x294/0x7e0
    sp : ffff00000f61ba70 pstate : 20000145
    x29: ffff00000f61ba70 x28: 000000000000000d
    x27: ffff0000009c8b5a x26: ffff00000f61bb80
    x25: ffff0000009c8b5a x24: 0000000000000012
    x23: 00000000ffffffe0 x22: ffff000008990428
    x21: ffff00000f61bb80 x20: 000000000000000d
    x19: 000000000000001f x18: 0000000000000000
    x17: 0000000000000000 x16: ffff800b6d0fb400
    x15: 0000000000000000 x14: ffff800b7fe31ae8
    x13: 00001ed16472c920 x12: ffff000008c6b1c9
    x11: ffff000008cf0580 x10: ffff00000f61bb80
    x9 : 00000000ffffffd8 x8 : 000000000000000c
    x7 : ffff800b684b8000 x6 : 0000000000000000
    x5 : 0000000000000065 x4 : 0000000000000001
    x3 : ffff0a00ffffff04 x2 : 000000000000001f
    x1 : 0000000000000000 x0 : 000000000000000d
    Call trace:
    string+0x3c/0x80
    vsnprintf+0x294/0x7e0
    snprintf+0x44/0x50
    __bnxt_open_nic+0x34c/0x928 [bnxt_en]
    bnxt_open+0xe8/0x238 [bnxt_en]
    __dev_open+0xbc/0x130
    __dev_change_flags+0x12c/0x168
    dev_change_flags+0x20/0x60
    ...

Ordinarily, a call to bnxt_setup_inta() (not in trace due to inlining)
would not be expected on a system supporting MSIX at all. However, if
bnxt_init_int_mode() does not end up being called after the call to
bnxt_clear_int_mode() in bnxt_fw_reset_close(), then the driver will
think that only INTA is supported and bp->irq_tbl will be NULL,
causing the above crash.

In the error recovery scenario, we call bnxt_clear_int_mode() in
bnxt_fw_reset_close() early in the sequence. Ordinarily, we will
call bnxt_init_int_mode() in bnxt_hwrm_if_change() after we
reestablish communication with the firmware after reset.  However,
if the sequence has to abort before we call bnxt_init_int_mode() and
if the user later attempts to re-open the device, then it will cause
the crash above.

We fix it in 2 ways:

1. Check for bp->irq_tbl in bnxt_setup_int_mode(). If it is NULL, call
bnxt_init_init_mode().

2. If we need to abort in bnxt_hwrm_if_change() and cannot complete
the error recovery sequence, set the BNXT_STATE_ABORT_ERR flag.  This
will cause more drastic recovery at the next attempt to re-open the
device, including a call to bnxt_init_int_mode().

Fixes: 3bc7d4a352ef ("bnxt_en: Add BNXT_STATE_IN_FW_RESET state.")
Reviewed-by: Scott Branden <scott.branden@broadcom.com>
Signed-off-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index c55189c7bb36..b53a0d87371a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -8556,10 +8556,18 @@ static void bnxt_setup_inta(struct bnxt *bp)
 	bp->irq_tbl[0].handler = bnxt_inta;
 }
 
+static int bnxt_init_int_mode(struct bnxt *bp);
+
 static int bnxt_setup_int_mode(struct bnxt *bp)
 {
 	int rc;
 
+	if (!bp->irq_tbl) {
+		rc = bnxt_init_int_mode(bp);
+		if (rc || !bp->irq_tbl)
+			return rc ?: -ENODEV;
+	}
+
 	if (bp->flags & BNXT_FLAG_USING_MSIX)
 		bnxt_setup_msix(bp);
 	else
@@ -8744,7 +8752,7 @@ static int bnxt_init_inta(struct bnxt *bp)
 
 static int bnxt_init_int_mode(struct bnxt *bp)
 {
-	int rc = 0;
+	int rc = -ENODEV;
 
 	if (bp->flags & BNXT_FLAG_MSIX_CAP)
 		rc = bnxt_init_msix(bp);
@@ -9514,7 +9522,8 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
 {
 	struct hwrm_func_drv_if_change_output *resp = bp->hwrm_cmd_resp_addr;
 	struct hwrm_func_drv_if_change_input req = {0};
-	bool resc_reinit = false, fw_reset = false;
+	bool fw_reset = !bp->irq_tbl;
+	bool resc_reinit = false;
 	int rc, retry = 0;
 	u32 flags = 0;
 
@@ -9557,6 +9566,7 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
 
 	if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state) && !fw_reset) {
 		netdev_err(bp->dev, "RESET_DONE not set during FW reset.\n");
+		set_bit(BNXT_STATE_ABORT_ERR, &bp->state);
 		return -ENODEV;
 	}
 	if (resc_reinit || fw_reset) {

From c33cb0020ee6dd96cc9976d6085a7d8422f6dbed Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@altlinux.org>
Date: Mon, 22 Feb 2021 08:00:00 +0000
Subject: [PATCH 036/164] uapi: nfnetlink_cthelper.h: fix userspace compilation
 error

Apparently, <linux/netfilter/nfnetlink_cthelper.h> and
<linux/netfilter/nfnetlink_acct.h> could not be included into the same
compilation unit because of a cut-and-paste typo in the former header.

Fixes: 12f7a505331e6 ("netfilter: add user-space connection tracking helper infrastructure")
Cc: <stable@vger.kernel.org> # v3.6
Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nfnetlink_cthelper.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/netfilter/nfnetlink_cthelper.h b/include/uapi/linux/netfilter/nfnetlink_cthelper.h
index a13137afc429..70af02092d16 100644
--- a/include/uapi/linux/netfilter/nfnetlink_cthelper.h
+++ b/include/uapi/linux/netfilter/nfnetlink_cthelper.h
@@ -5,7 +5,7 @@
 #define NFCT_HELPER_STATUS_DISABLED	0
 #define NFCT_HELPER_STATUS_ENABLED	1
 
-enum nfnl_acct_msg_types {
+enum nfnl_cthelper_msg_types {
 	NFNL_MSG_CTHELPER_NEW,
 	NFNL_MSG_CTHELPER_GET,
 	NFNL_MSG_CTHELPER_DEL,

From c57ea2d7d81fbaa72c7d0ffbff61ade1039f4a0c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Klemen=20Ko=C5=A1ir?= <klemen.kosir@kream.io>
Date: Sat, 20 Feb 2021 18:29:26 +0900
Subject: [PATCH 037/164] netfilter: conntrack: Remove a double space in a log
 message
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Removed an extra space in a log message and an extra blank line in code.

Signed-off-by: Klemen Košir <klemen.kosir@kream.io>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_helper.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 118f415928ae..b055187235f8 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -219,7 +219,7 @@ nf_ct_lookup_helper(struct nf_conn *ct, struct net *net)
 			return NULL;
 		pr_info("nf_conntrack: default automatic helper assignment "
 			"has been turned off for security reasons and CT-based "
-			" firewall rule not found. Use the iptables CT target "
+			"firewall rule not found. Use the iptables CT target "
 			"to attach helpers instead.\n");
 		net->ct.auto_assign_helper_warned = 1;
 		return NULL;
@@ -228,7 +228,6 @@ nf_ct_lookup_helper(struct nf_conn *ct, struct net *net)
 	return __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 }
 
-
 int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
 			      gfp_t flags)
 {

From 03a3ca37e4c6478e3a84f04c8429dd5889e107fd Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 24 Feb 2021 17:23:19 +0100
Subject: [PATCH 038/164] netfilter: nf_nat: undo erroneous tcp edemux lookup

Under extremely rare conditions TCP early demux will retrieve the wrong
socket.

1. local machine establishes a connection to a remote server, S, on port
   p.

   This gives:
   laddr:lport -> S:p
   ... both in tcp and conntrack.

2. local machine establishes a connection to host H, on port p2.
   2a. TCP stack choses same laddr:lport, so we have
   laddr:lport -> H:p2 from TCP point of view.
   2b). There is a destination NAT rewrite in place, translating
        H:p2 to S:p.  This results in following conntrack entries:

   I)  laddr:lport -> S:p  (origin)  S:p -> laddr:lport (reply)
   II) laddr:lport -> H:p2 (origin)  S:p -> laddr:lport2 (reply)

   NAT engine has rewritten laddr:lport to laddr:lport2 to map
   the reply packet to the correct origin.

   When server sends SYN/ACK to laddr:lport2, the PREROUTING hook
   will undo-the SNAT transformation, rewriting IP header to
   S:p -> laddr:lport

   This causes TCP early demux to associate the skb with the TCP socket
   of the first connection.

   The INPUT hook will then reverse the DNAT transformation, rewriting
   the IP header to H:p2 -> laddr:lport.

Because packet ends up with the wrong socket, the new connection
never completes: originator stays in SYN_SENT and conntrack entry
remains in SYN_RECV until timeout, and responder retransmits SYN/ACK
until it gives up.

To resolve this, orphan the skb after the input rewrite:
Because the source IP address changed, the socket must be incorrect.
We can't move the DNAT undo to prerouting due to backwards
compatibility, doing so will make iptables/nftables rules to no longer
match the way they did.

After orphan, the packet will be handed to the next protocol layer
(tcp, udp, ...) and that will repeat the socket lookup just like as if
early demux was disabled.

Fixes: 41063e9dd1195 ("ipv4: Early TCP socket demux.")
Closes: https://bugzilla.netfilter.org/show_bug.cgi?id=1427
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_nat_proto.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c
index e87b6bd6b3cd..4731d21fc3ad 100644
--- a/net/netfilter/nf_nat_proto.c
+++ b/net/netfilter/nf_nat_proto.c
@@ -646,8 +646,8 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
 }
 
 static unsigned int
-nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
-	       const struct nf_hook_state *state)
+nf_nat_ipv4_pre_routing(void *priv, struct sk_buff *skb,
+			const struct nf_hook_state *state)
 {
 	unsigned int ret;
 	__be32 daddr = ip_hdr(skb)->daddr;
@@ -659,6 +659,23 @@ nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
 	return ret;
 }
 
+static unsigned int
+nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb,
+		     const struct nf_hook_state *state)
+{
+	__be32 saddr = ip_hdr(skb)->saddr;
+	struct sock *sk = skb->sk;
+	unsigned int ret;
+
+	ret = nf_nat_ipv4_fn(priv, skb, state);
+
+	if (ret == NF_ACCEPT && sk && saddr != ip_hdr(skb)->saddr &&
+	    !inet_sk_transparent(sk))
+		skb_orphan(skb); /* TCP edemux obtained wrong socket */
+
+	return ret;
+}
+
 static unsigned int
 nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
 		const struct nf_hook_state *state)
@@ -736,7 +753,7 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
 static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
 	/* Before packet filtering, change destination */
 	{
-		.hook		= nf_nat_ipv4_in,
+		.hook		= nf_nat_ipv4_pre_routing,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_PRE_ROUTING,
 		.priority	= NF_IP_PRI_NAT_DST,
@@ -757,7 +774,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
 	},
 	/* After packet filtering, change source */
 	{
-		.hook		= nf_nat_ipv4_fn,
+		.hook		= nf_nat_ipv4_local_in,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP_PRI_NAT_SRC,

From 07b5a76e18925a595bfef44531dbf2f397bb5507 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 24 Feb 2021 17:23:20 +0100
Subject: [PATCH 039/164] netfilter: conntrack: avoid misleading 'invalid' in
 log message

The packet is not flagged as invalid: conntrack will accept it and
its associated with the conntrack entry.

This happens e.g. when receiving a retransmitted SYN in SYN_RECV state.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_proto_tcp.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 1d7e1c595546..ec23330687a5 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -982,8 +982,10 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
 					IP_CT_EXP_CHALLENGE_ACK;
 		}
 		spin_unlock_bh(&ct->lock);
-		nf_ct_l4proto_log_invalid(skb, ct, "invalid packet ignored in "
-					  "state %s ", tcp_conntrack_names[old_state]);
+		nf_ct_l4proto_log_invalid(skb, ct,
+					  "packet (index %d) in dir %d ignored, state %s",
+					  index, dir,
+					  tcp_conntrack_names[old_state]);
 		return NF_ACCEPT;
 	case TCP_CONNTRACK_MAX:
 		/* Special case for SYN proxy: when the SYN to the server or

From c2c16ccba2f55d527dd145a5d8c038694b3b343f Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 24 Feb 2021 17:23:21 +0100
Subject: [PATCH 040/164] selftests: netfilter: test nat port clash resolution
 interaction with tcp early demux

Convert Antonio Ojeas bug reproducer to a kselftest.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 tools/testing/selftests/netfilter/Makefile    |  2 +-
 .../selftests/netfilter/nf_nat_edemux.sh      | 99 +++++++++++++++++++
 2 files changed, 100 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/netfilter/nf_nat_edemux.sh

diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index 3006a8e5b41a..3171069a6b46 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -4,7 +4,7 @@
 TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
 	conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
 	nft_concat_range.sh nft_conntrack_helper.sh \
-	nft_queue.sh nft_meta.sh \
+	nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
 	ipip-conntrack-mtu.sh
 
 LDLIBS = -lmnl
diff --git a/tools/testing/selftests/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/netfilter/nf_nat_edemux.sh
new file mode 100755
index 000000000000..cfee3b65be0f
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nf_nat_edemux.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test NAT source port clash resolution
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+
+cleanup()
+{
+	ip netns del $ns1
+	ip netns del $ns2
+}
+
+iperf3 -v > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without iperf3"
+	exit $ksft_skip
+fi
+
+iptables --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without iptables"
+	exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+ip netns add "$ns1"
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not create net namespace $ns1"
+	exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+ip netns add $ns2
+
+# Connect the namespaces using a veth pair
+ip link add name veth2 type veth peer name veth1
+ip link set netns $ns1 dev veth1
+ip link set netns $ns2 dev veth2
+
+ip netns exec $ns1 ip link set up dev lo
+ip netns exec $ns1 ip link set up dev veth1
+ip netns exec $ns1 ip addr add 192.168.1.1/24 dev veth1
+
+ip netns exec $ns2 ip link set up dev lo
+ip netns exec $ns2 ip link set up dev veth2
+ip netns exec $ns2 ip addr add 192.168.1.2/24 dev veth2
+
+# Create a server in one namespace
+ip netns exec $ns1 iperf3 -s > /dev/null 2>&1 &
+iperfs=$!
+
+# Restrict source port to just one so we don't have to exhaust
+# all others.
+ip netns exec $ns2 sysctl -q net.ipv4.ip_local_port_range="10000 10000"
+
+# add a virtual IP using DNAT
+ip netns exec $ns2 iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201
+
+# ... and route it to the other namespace
+ip netns exec $ns2 ip route add 10.96.0.1 via 192.168.1.1
+
+sleep 1
+
+# add a persistent connection from the other namespace
+ip netns exec $ns2 nc -q 10 -w 10 192.168.1.1 5201 > /dev/null &
+
+sleep 1
+
+# ip daddr:dport will be rewritten to 192.168.1.1 5201
+# NAT must reallocate source port 10000 because
+# 192.168.1.2:10000 -> 192.168.1.1:5201 is already in use
+echo test | ip netns exec $ns2 nc -w 3 -q 3 10.96.0.1 443 >/dev/null
+ret=$?
+
+kill $iperfs
+
+# Check nc can connect to 10.96.0.1:443 (aka 192.168.1.1:5201).
+if [ $ret -eq 0 ]; then
+	echo "PASS: nc can connect via NAT'd address"
+else
+	echo "FAIL: nc cannot connect via NAT'd address"
+	exit 1
+fi
+
+exit 0

From 8e24edddad152b998b37a7f583175137ed2e04a5 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Sat, 27 Feb 2021 11:27:45 +0300
Subject: [PATCH 041/164] netfilter: x_tables: gpf inside xt_find_revision()

nested target/match_revfn() calls work with xt[NFPROTO_UNSPEC] lists
without taking xt[NFPROTO_UNSPEC].mutex. This can race with module unload
and cause host to crash:

general protection fault: 0000 [#1]
Modules linked in: ... [last unloaded: xt_cluster]
CPU: 0 PID: 542455 Comm: iptables
RIP: 0010:[<ffffffff8ffbd518>]  [<ffffffff8ffbd518>] strcmp+0x18/0x40
RDX: 0000000000000003 RSI: ffff9a5a5d9abe10 RDI: dead000000000111
R13: ffff9a5a5d9abe10 R14: ffff9a5a5d9abd8c R15: dead000000000100
(VvS: %R15 -- &xt_match,  %RDI -- &xt_match.name,
xt_cluster unregister match in xt[NFPROTO_UNSPEC].match list)
Call Trace:
 [<ffffffff902ccf44>] match_revfn+0x54/0xc0
 [<ffffffff902ccf9f>] match_revfn+0xaf/0xc0
 [<ffffffff902cd01e>] xt_find_revision+0x6e/0xf0
 [<ffffffffc05a5be0>] do_ipt_get_ctl+0x100/0x420 [ip_tables]
 [<ffffffff902cc6bf>] nf_getsockopt+0x4f/0x70
 [<ffffffff902dd99e>] ip_getsockopt+0xde/0x100
 [<ffffffff903039b5>] raw_getsockopt+0x25/0x50
 [<ffffffff9026c5da>] sock_common_getsockopt+0x1a/0x20
 [<ffffffff9026b89d>] SyS_getsockopt+0x7d/0xf0
 [<ffffffff903cbf92>] system_call_fastpath+0x25/0x2a

Fixes: 656caff20e1 ("netfilter 04/09: x_tables: fix match/target revision lookup")
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Reviewed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/x_tables.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index acce622582e3..bce6ca203d46 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -330,6 +330,7 @@ static int match_revfn(u8 af, const char *name, u8 revision, int *bestp)
 	const struct xt_match *m;
 	int have_rev = 0;
 
+	mutex_lock(&xt[af].mutex);
 	list_for_each_entry(m, &xt[af].match, list) {
 		if (strcmp(m->name, name) == 0) {
 			if (m->revision > *bestp)
@@ -338,6 +339,7 @@ static int match_revfn(u8 af, const char *name, u8 revision, int *bestp)
 				have_rev = 1;
 		}
 	}
+	mutex_unlock(&xt[af].mutex);
 
 	if (af != NFPROTO_UNSPEC && !have_rev)
 		return match_revfn(NFPROTO_UNSPEC, name, revision, bestp);
@@ -350,6 +352,7 @@ static int target_revfn(u8 af, const char *name, u8 revision, int *bestp)
 	const struct xt_target *t;
 	int have_rev = 0;
 
+	mutex_lock(&xt[af].mutex);
 	list_for_each_entry(t, &xt[af].target, list) {
 		if (strcmp(t->name, name) == 0) {
 			if (t->revision > *bestp)
@@ -358,6 +361,7 @@ static int target_revfn(u8 af, const char *name, u8 revision, int *bestp)
 				have_rev = 1;
 		}
 	}
+	mutex_unlock(&xt[af].mutex);
 
 	if (af != NFPROTO_UNSPEC && !have_rev)
 		return target_revfn(NFPROTO_UNSPEC, name, revision, bestp);
@@ -371,12 +375,10 @@ int xt_find_revision(u8 af, const char *name, u8 revision, int target,
 {
 	int have_rev, best = -1;
 
-	mutex_lock(&xt[af].mutex);
 	if (target == 1)
 		have_rev = target_revfn(af, name, revision, &best);
 	else
 		have_rev = match_revfn(af, name, revision, &best);
-	mutex_unlock(&xt[af].mutex);
 
 	/* Nothing at all?  Return 0 to try loading module. */
 	if (best == -1) {

From a4fc088ad4ff4a99d01978aa41065132b574b4b2 Mon Sep 17 00:00:00 2001
From: Yinjun Zhang <yinjun.zhang@corigine.com>
Date: Thu, 25 Feb 2021 13:51:02 +0100
Subject: [PATCH 042/164] ethtool: fix the check logic of at least one channel
 for RX/TX

The command "ethtool -L <intf> combined 0" may clean the RX/TX channel
count and skip the error path, since the attrs
tb[ETHTOOL_A_CHANNELS_RX_COUNT] and tb[ETHTOOL_A_CHANNELS_TX_COUNT]
are NULL in this case when recent ethtool is used.

Tested using ethtool v5.10.

Fixes: 7be92514b99c ("ethtool: check if there is at least one channel for TX/RX in the core")
Signed-off-by: Yinjun Zhang <yinjun.zhang@corigine.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Louis Peens <louis.peens@netronome.com>
Link: https://lore.kernel.org/r/20210225125102.23989-1-simon.horman@netronome.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ethtool/channels.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c
index 25a9e566ef5c..6a070dc8e4b0 100644
--- a/net/ethtool/channels.c
+++ b/net/ethtool/channels.c
@@ -116,10 +116,9 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info)
 	struct ethtool_channels channels = {};
 	struct ethnl_req_info req_info = {};
 	struct nlattr **tb = info->attrs;
-	const struct nlattr *err_attr;
+	u32 err_attr, max_rx_in_use = 0;
 	const struct ethtool_ops *ops;
 	struct net_device *dev;
-	u32 max_rx_in_use = 0;
 	int ret;
 
 	ret = ethnl_parse_header_dev_get(&req_info,
@@ -157,34 +156,35 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info)
 
 	/* ensure new channel counts are within limits */
 	if (channels.rx_count > channels.max_rx)
-		err_attr = tb[ETHTOOL_A_CHANNELS_RX_COUNT];
+		err_attr = ETHTOOL_A_CHANNELS_RX_COUNT;
 	else if (channels.tx_count > channels.max_tx)
-		err_attr = tb[ETHTOOL_A_CHANNELS_TX_COUNT];
+		err_attr = ETHTOOL_A_CHANNELS_TX_COUNT;
 	else if (channels.other_count > channels.max_other)
-		err_attr = tb[ETHTOOL_A_CHANNELS_OTHER_COUNT];
+		err_attr = ETHTOOL_A_CHANNELS_OTHER_COUNT;
 	else if (channels.combined_count > channels.max_combined)
-		err_attr = tb[ETHTOOL_A_CHANNELS_COMBINED_COUNT];
+		err_attr = ETHTOOL_A_CHANNELS_COMBINED_COUNT;
 	else
-		err_attr = NULL;
+		err_attr = 0;
 	if (err_attr) {
 		ret = -EINVAL;
-		NL_SET_ERR_MSG_ATTR(info->extack, err_attr,
+		NL_SET_ERR_MSG_ATTR(info->extack, tb[err_attr],
 				    "requested channel count exceeds maximum");
 		goto out_ops;
 	}
 
 	/* ensure there is at least one RX and one TX channel */
 	if (!channels.combined_count && !channels.rx_count)
-		err_attr = tb[ETHTOOL_A_CHANNELS_RX_COUNT];
+		err_attr = ETHTOOL_A_CHANNELS_RX_COUNT;
 	else if (!channels.combined_count && !channels.tx_count)
-		err_attr = tb[ETHTOOL_A_CHANNELS_TX_COUNT];
+		err_attr = ETHTOOL_A_CHANNELS_TX_COUNT;
 	else
-		err_attr = NULL;
+		err_attr = 0;
 	if (err_attr) {
 		if (mod_combined)
-			err_attr = tb[ETHTOOL_A_CHANNELS_COMBINED_COUNT];
+			err_attr = ETHTOOL_A_CHANNELS_COMBINED_COUNT;
 		ret = -EINVAL;
-		NL_SET_ERR_MSG_ATTR(info->extack, err_attr, "requested channel counts would result in no RX or TX channel being configured");
+		NL_SET_ERR_MSG_ATTR(info->extack, tb[err_attr],
+				    "requested channel counts would result in no RX or TX channel being configured");
 		goto out_ops;
 	}
 

From d313d16bbaea0f11a2e98f04a6c678b43c208915 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Fri, 26 Feb 2021 14:20:38 +0100
Subject: [PATCH 043/164] net: broadcom: bcm4908_enet: enable RX after
 processing packets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When receiving a lot of packets hardware may run out of free
descriptiors and stop RX ring. Enable it every time after handling
received packets.

Fixes: 4feffeadbcb2 ("net: broadcom: bcm4908enet: add BCM4908 controller driver")
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/20210226132038.29849-1-zajec5@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/bcm4908_enet.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bcm4908_enet.c b/drivers/net/ethernet/broadcom/bcm4908_enet.c
index 0b70e9e0ddad..98cf82dea3e4 100644
--- a/drivers/net/ethernet/broadcom/bcm4908_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm4908_enet.c
@@ -592,6 +592,9 @@ static int bcm4908_enet_poll(struct napi_struct *napi, int weight)
 		bcm4908_enet_intrs_on(enet);
 	}
 
+	/* Hardware could disable ring if it run out of descriptors */
+	bcm4908_enet_dma_rx_ring_enable(enet, &enet->rx_ring);
+
 	return handled;
 }
 

From 89e5c58fc1e2857ccdaae506fb8bc5fed57ee063 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 26 Feb 2021 22:22:48 +0100
Subject: [PATCH 044/164] net: Fix gro aggregation for udp encaps with zero
 csum

We noticed a GRO issue for UDP-based encaps such as vxlan/geneve when the
csum for the UDP header itself is 0. In that case, GRO aggregation does
not take place on the phys dev, but instead is deferred to the vxlan/geneve
driver (see trace below).

The reason is essentially that GRO aggregation bails out in udp_gro_receive()
for such case when drivers marked the skb with CHECKSUM_UNNECESSARY (ice, i40e,
others) where for non-zero csums 2abb7cdc0dc8 ("udp: Add support for doing
checksum unnecessary conversion") promotes those skbs to CHECKSUM_COMPLETE
and napi context has csum_valid set. This is however not the case for zero
UDP csum (here: csum_cnt is still 0 and csum_valid continues to be false).

At the same time 57c67ff4bd92 ("udp: additional GRO support") added matches
on !uh->check ^ !uh2->check as part to determine candidates for aggregation,
so it certainly is expected to handle zero csums in udp_gro_receive(). The
purpose of the check added via 662880f44203 ("net: Allow GRO to use and set
levels of checksum unnecessary") seems to catch bad csum and stop aggregation
right away.

One way to fix aggregation in the zero case is to only perform the !csum_valid
check in udp_gro_receive() if uh->check is infact non-zero.

Before:

  [...]
  swapper     0 [008]   731.946506: net:netif_receive_skb: dev=enp10s0f0  skbaddr=0xffff966497100400 len=1500   (1)
  swapper     0 [008]   731.946507: net:netif_receive_skb: dev=enp10s0f0  skbaddr=0xffff966497100200 len=1500
  swapper     0 [008]   731.946507: net:netif_receive_skb: dev=enp10s0f0  skbaddr=0xffff966497101100 len=1500
  swapper     0 [008]   731.946508: net:netif_receive_skb: dev=enp10s0f0  skbaddr=0xffff966497101700 len=1500
  swapper     0 [008]   731.946508: net:netif_receive_skb: dev=enp10s0f0  skbaddr=0xffff966497101b00 len=1500
  swapper     0 [008]   731.946508: net:netif_receive_skb: dev=enp10s0f0  skbaddr=0xffff966497100600 len=1500
  swapper     0 [008]   731.946508: net:netif_receive_skb: dev=enp10s0f0  skbaddr=0xffff966497100f00 len=1500
  swapper     0 [008]   731.946509: net:netif_receive_skb: dev=enp10s0f0  skbaddr=0xffff966497100a00 len=1500
  swapper     0 [008]   731.946516: net:netif_receive_skb: dev=enp10s0f0  skbaddr=0xffff966497100500 len=1500
  swapper     0 [008]   731.946516: net:netif_receive_skb: dev=enp10s0f0  skbaddr=0xffff966497100700 len=1500
  swapper     0 [008]   731.946516: net:netif_receive_skb: dev=enp10s0f0  skbaddr=0xffff966497101d00 len=1500   (2)
  swapper     0 [008]   731.946517: net:netif_receive_skb: dev=enp10s0f0  skbaddr=0xffff966497101000 len=1500
  swapper     0 [008]   731.946517: net:netif_receive_skb: dev=enp10s0f0  skbaddr=0xffff966497101c00 len=1500
  swapper     0 [008]   731.946517: net:netif_receive_skb: dev=enp10s0f0  skbaddr=0xffff966497101400 len=1500
  swapper     0 [008]   731.946518: net:netif_receive_skb: dev=enp10s0f0  skbaddr=0xffff966497100e00 len=1500
  swapper     0 [008]   731.946518: net:netif_receive_skb: dev=enp10s0f0  skbaddr=0xffff966497101600 len=1500
  swapper     0 [008]   731.946521: net:netif_receive_skb: dev=enp10s0f0  skbaddr=0xffff966497100800 len=774
  swapper     0 [008]   731.946530: net:netif_receive_skb: dev=test_vxlan skbaddr=0xffff966497100400 len=14032 (1)
  swapper     0 [008]   731.946530: net:netif_receive_skb: dev=test_vxlan skbaddr=0xffff966497101d00 len=9112  (2)
  [...]

  # netperf -H 10.55.10.4 -t TCP_STREAM -l 20
  MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.55.10.4 () port 0 AF_INET : demo
  Recv   Send    Send
  Socket Socket  Message  Elapsed
  Size   Size    Size     Time     Throughput
  bytes  bytes   bytes    secs.    10^6bits/sec

   87380  16384  16384    20.01    13129.24

After:

  [...]
  swapper     0 [026]   521.862641: net:netif_receive_skb: dev=enp10s0f0  skbaddr=0xffff93ab0d479000 len=11286 (1)
  swapper     0 [026]   521.862643: net:netif_receive_skb: dev=test_vxlan skbaddr=0xffff93ab0d479000 len=11236 (1)
  swapper     0 [026]   521.862650: net:netif_receive_skb: dev=enp10s0f0  skbaddr=0xffff93ab0d478500 len=2898  (2)
  swapper     0 [026]   521.862650: net:netif_receive_skb: dev=enp10s0f0  skbaddr=0xffff93ab0d479f00 len=8490  (3)
  swapper     0 [026]   521.862653: net:netif_receive_skb: dev=test_vxlan skbaddr=0xffff93ab0d478500 len=2848  (2)
  swapper     0 [026]   521.862653: net:netif_receive_skb: dev=test_vxlan skbaddr=0xffff93ab0d479f00 len=8440  (3)
  [...]

  # netperf -H 10.55.10.4 -t TCP_STREAM -l 20
  MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.55.10.4 () port 0 AF_INET : demo
  Recv   Send    Send
  Socket Socket  Message  Elapsed
  Size   Size    Size     Time     Throughput
  bytes  bytes   bytes    secs.    10^6bits/sec

   87380  16384  16384    20.01    24576.53

Fixes: 57c67ff4bd92 ("udp: additional GRO support")
Fixes: 662880f44203 ("net: Allow GRO to use and set levels of checksum unnecessary")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Jesse Brandeburg <jesse.brandeburg@intel.com>
Cc: Tom Herbert <tom@herbertland.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/r/20210226212248.8300-1-daniel@iogearbox.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/udp_offload.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index b76c48efd37e..c5b4b586570f 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -526,7 +526,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
 	}
 
 	if (!sk || NAPI_GRO_CB(skb)->encap_mark ||
-	    (skb->ip_summed != CHECKSUM_PARTIAL &&
+	    (uh->check && skb->ip_summed != CHECKSUM_PARTIAL &&
 	     NAPI_GRO_CB(skb)->csum_cnt == 0 &&
 	     !NAPI_GRO_CB(skb)->csum_valid) ||
 	    !udp_sk(sk)->gro_receive)

From ae85ddda0f1b341b2d25f5a5e0eff1d42b6ef3df Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Sat, 27 Feb 2021 15:24:51 +0800
Subject: [PATCH 045/164] net: hns3: fix error mask definition of flow director

Currently, some bit filed definitions of flow director TCAM
configuration command are incorrect. Since the wrong MSB is
always 0, and these fields are assgined in order, so it still works.

Fix it by redefine them.

Fixes: 117328680288 ("net: hns3: Add input key and action config support for flow director")
Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index ff52a65b4cff..057dda735492 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -1053,16 +1053,16 @@ struct hclge_fd_tcam_config_3_cmd {
 #define HCLGE_FD_AD_DROP_B		0
 #define HCLGE_FD_AD_DIRECT_QID_B	1
 #define HCLGE_FD_AD_QID_S		2
-#define HCLGE_FD_AD_QID_M		GENMASK(12, 2)
+#define HCLGE_FD_AD_QID_M		GENMASK(11, 2)
 #define HCLGE_FD_AD_USE_COUNTER_B	12
 #define HCLGE_FD_AD_COUNTER_NUM_S	13
 #define HCLGE_FD_AD_COUNTER_NUM_M	GENMASK(20, 13)
 #define HCLGE_FD_AD_NXT_STEP_B		20
 #define HCLGE_FD_AD_NXT_KEY_S		21
-#define HCLGE_FD_AD_NXT_KEY_M		GENMASK(26, 21)
+#define HCLGE_FD_AD_NXT_KEY_M		GENMASK(25, 21)
 #define HCLGE_FD_AD_WR_RULE_ID_B	0
 #define HCLGE_FD_AD_RULE_ID_S		1
-#define HCLGE_FD_AD_RULE_ID_M		GENMASK(13, 1)
+#define HCLGE_FD_AD_RULE_ID_M		GENMASK(12, 1)
 #define HCLGE_FD_AD_TC_OVRD_B		16
 #define HCLGE_FD_AD_TC_SIZE_S		17
 #define HCLGE_FD_AD_TC_SIZE_M		GENMASK(20, 17)

From c75ec148a316e8cf52274d16b9b422703b96f5ce Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Sat, 27 Feb 2021 15:24:52 +0800
Subject: [PATCH 046/164] net: hns3: fix query vlan mask value error for flow
 director

Currently, the driver returns VLAN_VID_MASK for vlan mask field,
when get flow director rule information for rule doesn't use vlan.
It may cause the vlan mask value display as 0xf000 in this
case, like below:

estuary:/$ ethtool -u eth1
50 RX rings available
Total 1 rules

Filter: 2
Rule Type: TCP over IPv4
Src IP addr: 0.0.0.0 mask: 255.255.255.255
Dest IP addr: 0.0.0.0 mask: 255.255.255.255
TOS: 0x0 mask: 0xff
Src port: 0 mask: 0xffff
Dest port: 0 mask: 0xffff
VLAN EtherType: 0x0 mask: 0xffff
VLAN: 0x0 mask: 0xf000
User-defined: 0x1234 mask: 0x0
Action: Direct to queue 3

Fix it by return 0.

Fixes: 05c2314fe6a8 ("net: hns3: Add support for rule query of flow director")
Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 34b744df6709..932cfd1fb7e9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -6330,8 +6330,7 @@ static void hclge_fd_get_ext_info(struct ethtool_rx_flow_spec *fs,
 		fs->h_ext.vlan_tci = cpu_to_be16(rule->tuples.vlan_tag1);
 		fs->m_ext.vlan_tci =
 				rule->unused_tuple & BIT(INNER_VLAN_TAG_FST) ?
-				cpu_to_be16(VLAN_VID_MASK) :
-				cpu_to_be16(rule->tuples_mask.vlan_tag1);
+				0 : cpu_to_be16(rule->tuples_mask.vlan_tag1);
 	}
 
 	if (fs->flow_type & FLOW_MAC_EXT) {

From b36fc875bcdee56865c444a2cdae17d354a6d5f5 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Sat, 27 Feb 2021 15:24:53 +0800
Subject: [PATCH 047/164] net: hns3: fix bug when calculating the TCAM table
 info

The function hclge_fd_convert_tuple() is used to convert tuples
and tuples mask to TCAM x and y.  But it misuses the source mac
as source mac mask when convert INNER_SRC_MAC, which may cause
the flow director rule works unexpectedly. So fix it.

Fixes: 117328680288 ("net: hns3: Add input key and action config support for flow director")
Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 932cfd1fb7e9..e3f81c7e0ce7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -5245,9 +5245,9 @@ static bool hclge_fd_convert_tuple(u32 tuple_bit, u8 *key_x, u8 *key_y,
 	case BIT(INNER_SRC_MAC):
 		for (i = 0; i < ETH_ALEN; i++) {
 			calc_x(key_x[ETH_ALEN - 1 - i], rule->tuples.src_mac[i],
-			       rule->tuples.src_mac[i]);
+			       rule->tuples_mask.src_mac[i]);
 			calc_y(key_y[ETH_ALEN - 1 - i], rule->tuples.src_mac[i],
-			       rule->tuples.src_mac[i]);
+			       rule->tuples_mask.src_mac[i]);
 		}
 
 		return true;

From 449052cfebf624b670faa040245d3feed770d22f Mon Sep 17 00:00:00 2001
From: Joakim Zhang <qiangqing.zhang@nxp.com>
Date: Thu, 18 Feb 2021 19:00:35 +0800
Subject: [PATCH 048/164] can: flexcan: assert FRZ bit in flexcan_chip_freeze()

Assert HALT bit to enter freeze mode, there is a premise that FRZ bit is
asserted. This patch asserts FRZ bit in flexcan_chip_freeze, although
the reset value is 1b'1. This is a prepare patch, later patch will
invoke flexcan_chip_freeze() to enter freeze mode, which polling freeze
mode acknowledge.

Fixes: b1aa1c7a2165b ("can: flexcan: fix transition from and to freeze mode in chip_{,un}freeze")
Link: https://lore.kernel.org/r/20210218110037.16591-2-qiangqing.zhang@nxp.com
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/flexcan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index 971ada36e37f..ee2d4967d66a 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -701,7 +701,7 @@ static int flexcan_chip_freeze(struct flexcan_priv *priv)
 	u32 reg;
 
 	reg = priv->read(&regs->mcr);
-	reg |= FLEXCAN_MCR_HALT;
+	reg |= FLEXCAN_MCR_FRZ | FLEXCAN_MCR_HALT;
 	priv->write(reg, &regs->mcr);
 
 	while (timeout-- && !(priv->read(&regs->mcr) & FLEXCAN_MCR_FRZ_ACK))

From ec15e27cc8904605846a354bb1f808ea1432f853 Mon Sep 17 00:00:00 2001
From: Joakim Zhang <qiangqing.zhang@nxp.com>
Date: Thu, 18 Feb 2021 19:00:36 +0800
Subject: [PATCH 049/164] can: flexcan: enable RX FIFO after FRZ/HALT valid

RX FIFO enable failed could happen when do system reboot stress test:

[    0.303958] flexcan 5a8d0000.can: 5a8d0000.can supply xceiver not found, using dummy regulator
[    0.304281] flexcan 5a8d0000.can (unnamed net_device) (uninitialized): Could not enable RX FIFO, unsupported core
[    0.314640] flexcan 5a8d0000.can: registering netdev failed
[    0.320728] flexcan 5a8e0000.can: 5a8e0000.can supply xceiver not found, using dummy regulator
[    0.320991] flexcan 5a8e0000.can (unnamed net_device) (uninitialized): Could not enable RX FIFO, unsupported core
[    0.331360] flexcan 5a8e0000.can: registering netdev failed
[    0.337444] flexcan 5a8f0000.can: 5a8f0000.can supply xceiver not found, using dummy regulator
[    0.337716] flexcan 5a8f0000.can (unnamed net_device) (uninitialized): Could not enable RX FIFO, unsupported core
[    0.348117] flexcan 5a8f0000.can: registering netdev failed

RX FIFO should be enabled after the FRZ/HALT are valid. But the current
code enable RX FIFO and FRZ/HALT at the same time.

Fixes: e955cead03117 ("CAN: Add Flexcan CAN controller driver")
Link: https://lore.kernel.org/r/20210218110037.16591-3-qiangqing.zhang@nxp.com
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/flexcan.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index ee2d4967d66a..e66a51dbea0a 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -1865,10 +1865,14 @@ static int register_flexcandev(struct net_device *dev)
 	if (err)
 		goto out_chip_disable;
 
-	/* set freeze, halt and activate FIFO, restrict register access */
+	/* set freeze, halt */
+	err = flexcan_chip_freeze(priv);
+	if (err)
+		goto out_chip_disable;
+
+	/* activate FIFO, restrict register access */
 	reg = priv->read(&regs->mcr);
-	reg |= FLEXCAN_MCR_FRZ | FLEXCAN_MCR_HALT |
-		FLEXCAN_MCR_FEN | FLEXCAN_MCR_SUPV;
+	reg |=  FLEXCAN_MCR_FEN | FLEXCAN_MCR_SUPV;
 	priv->write(reg, &regs->mcr);
 
 	/* Currently we only support newer versions of this core

From c63820045e2000f05657467a08715c18c9f490d9 Mon Sep 17 00:00:00 2001
From: Joakim Zhang <qiangqing.zhang@nxp.com>
Date: Thu, 18 Feb 2021 19:00:37 +0800
Subject: [PATCH 050/164] can: flexcan: invoke flexcan_chip_freeze() to enter
 freeze mode

Invoke flexcan_chip_freeze() to enter freeze mode, since need poll
freeze mode acknowledge.

Fixes: e955cead03117 ("CAN: Add Flexcan CAN controller driver")
Link: https://lore.kernel.org/r/20210218110037.16591-4-qiangqing.zhang@nxp.com
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/flexcan.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index e66a51dbea0a..134c05757a3b 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -1480,10 +1480,13 @@ static int flexcan_chip_start(struct net_device *dev)
 
 	flexcan_set_bittiming(dev);
 
+	/* set freeze, halt */
+	err = flexcan_chip_freeze(priv);
+	if (err)
+		goto out_chip_disable;
+
 	/* MCR
 	 *
-	 * enable freeze
-	 * halt now
 	 * only supervisor access
 	 * enable warning int
 	 * enable individual RX masking
@@ -1492,9 +1495,8 @@ static int flexcan_chip_start(struct net_device *dev)
 	 */
 	reg_mcr = priv->read(&regs->mcr);
 	reg_mcr &= ~FLEXCAN_MCR_MAXMB(0xff);
-	reg_mcr |= FLEXCAN_MCR_FRZ | FLEXCAN_MCR_HALT | FLEXCAN_MCR_SUPV |
-		FLEXCAN_MCR_WRN_EN | FLEXCAN_MCR_IRMQ | FLEXCAN_MCR_IDAM_C |
-		FLEXCAN_MCR_MAXMB(priv->tx_mb_idx);
+	reg_mcr |= FLEXCAN_MCR_SUPV | FLEXCAN_MCR_WRN_EN | FLEXCAN_MCR_IRMQ |
+		FLEXCAN_MCR_IDAM_C | FLEXCAN_MCR_MAXMB(priv->tx_mb_idx);
 
 	/* MCR
 	 *

From 2afe72ead5ab672c8012bda83cbe65f8145568e0 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Mon, 22 Feb 2021 20:46:06 +0100
Subject: [PATCH 051/164] can: mcp251xfd: revert "can: mcp251xfd: add BQL
 support"

In the following 4 patches

| 99842c9685ab can: dev: can_rx_offload_get_echo_skb(): extend to return can frame length
| 9420e1d495e2 can: dev: can_get_echo_skb(): extend to return can frame length
| 1dcb6e57db83 can: dev: can_put_echo_skb(): extend to handle frame_len
| f0ef72febc9a can: dev: extend struct can_skb_priv to hold CAN frame length

the CAN echo SKB support was extended to hold the CAN frame
length (which is the length of the CAN frame on the wire). It is meant
as a helper for BQL support, to avoid the re-calculation of the frame
length before sending it and on TX-completion.

However if the CAN frame is send without the request to be looped back
the SKB is discarded in can_put_echo_skb() and the subsequent
can_get_echo_skb() and can_rx_offload_get_echo_skb() return 0 for the
CAN frame length. This results in BQL stalling the TX queue after a
few packages.

Until the BQL helpers can_get_echo_skb() and
can_rx_offload_get_echo_skb() are fixed, revert the BQL support for
the mcp251xfd driver.

This reverts commit 4162e18e949ba520d5116ac0323500355479a00e.

Fixes: 4162e18e949b ("can: mcp251xfd: add BQL support")
Cc: Manivannan Sadhasivam <mani@kernel.org>
Cc: Thomas Kopp <thomas.kopp@microchip.com>
Link: https://lore.kernel.org/r/20210228083347.28580-1-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 .../net/can/spi/mcp251xfd/mcp251xfd-core.c    | 21 ++++---------------
 1 file changed, 4 insertions(+), 17 deletions(-)

diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index 3c5b92911d46..799e9d5d3481 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -335,8 +335,6 @@ static void mcp251xfd_ring_init(struct mcp251xfd_priv *priv)
 	u8 len;
 	int i, j;
 
-	netdev_reset_queue(priv->ndev);
-
 	/* TEF */
 	tef_ring = priv->tef;
 	tef_ring->head = 0;
@@ -1249,8 +1247,7 @@ mcp251xfd_handle_tefif_recover(const struct mcp251xfd_priv *priv, const u32 seq)
 
 static int
 mcp251xfd_handle_tefif_one(struct mcp251xfd_priv *priv,
-			   const struct mcp251xfd_hw_tef_obj *hw_tef_obj,
-			   unsigned int *frame_len_ptr)
+			   const struct mcp251xfd_hw_tef_obj *hw_tef_obj)
 {
 	struct net_device_stats *stats = &priv->ndev->stats;
 	u32 seq, seq_masked, tef_tail_masked;
@@ -1272,8 +1269,7 @@ mcp251xfd_handle_tefif_one(struct mcp251xfd_priv *priv,
 	stats->tx_bytes +=
 		can_rx_offload_get_echo_skb(&priv->offload,
 					    mcp251xfd_get_tef_tail(priv),
-					    hw_tef_obj->ts,
-					    frame_len_ptr);
+					    hw_tef_obj->ts, NULL);
 	stats->tx_packets++;
 	priv->tef->tail++;
 
@@ -1331,7 +1327,6 @@ mcp251xfd_tef_obj_read(const struct mcp251xfd_priv *priv,
 static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
 {
 	struct mcp251xfd_hw_tef_obj hw_tef_obj[MCP251XFD_TX_OBJ_NUM_MAX];
-	unsigned int total_frame_len = 0;
 	u8 tef_tail, len, l;
 	int err, i;
 
@@ -1353,9 +1348,7 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
 	}
 
 	for (i = 0; i < len; i++) {
-		unsigned int frame_len;
-
-		err = mcp251xfd_handle_tefif_one(priv, &hw_tef_obj[i], &frame_len);
+		err = mcp251xfd_handle_tefif_one(priv, &hw_tef_obj[i]);
 		/* -EAGAIN means the Sequence Number in the TEF
 		 * doesn't match our tef_tail. This can happen if we
 		 * read the TEF objects too early. Leave loop let the
@@ -1365,8 +1358,6 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
 			goto out_netif_wake_queue;
 		if (err)
 			return err;
-
-		total_frame_len += frame_len;
 	}
 
  out_netif_wake_queue:
@@ -1397,7 +1388,6 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
 			return err;
 
 		tx_ring->tail += len;
-		netdev_completed_queue(priv->ndev, len, total_frame_len);
 
 		err = mcp251xfd_check_tef_tail(priv);
 		if (err)
@@ -2443,7 +2433,6 @@ static netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
 	struct mcp251xfd_priv *priv = netdev_priv(ndev);
 	struct mcp251xfd_tx_ring *tx_ring = priv->tx;
 	struct mcp251xfd_tx_obj *tx_obj;
-	unsigned int frame_len;
 	u8 tx_head;
 	int err;
 
@@ -2462,9 +2451,7 @@ static netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
 	if (mcp251xfd_get_tx_free(tx_ring) == 0)
 		netif_stop_queue(ndev);
 
-	frame_len = can_skb_get_frame_len(skb);
-	can_put_echo_skb(skb, ndev, tx_head, frame_len);
-	netdev_sent_queue(priv->ndev, frame_len);
+	can_put_echo_skb(skb, ndev, tx_head, 0);
 
 	err = mcp251xfd_tx_obj_write(priv, tx_obj);
 	if (err)

From e940e0895a82c6fbaa259f2615eb52b57ee91a7e Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Fri, 26 Feb 2021 10:24:56 +0100
Subject: [PATCH 052/164] can: skb: can_skb_set_owner(): fix ref counting if
 socket was closed before setting skb ownership

There are two ref count variables controlling the free()ing of a socket:
- struct sock::sk_refcnt - which is changed by sock_hold()/sock_put()
- struct sock::sk_wmem_alloc - which accounts the memory allocated by
  the skbs in the send path.

In case there are still TX skbs on the fly and the socket() is closed,
the struct sock::sk_refcnt reaches 0. In the TX-path the CAN stack
clones an "echo" skb, calls sock_hold() on the original socket and
references it. This produces the following back trace:

| WARNING: CPU: 0 PID: 280 at lib/refcount.c:25 refcount_warn_saturate+0x114/0x134
| refcount_t: addition on 0; use-after-free.
| Modules linked in: coda_vpu(E) v4l2_jpeg(E) videobuf2_vmalloc(E) imx_vdoa(E)
| CPU: 0 PID: 280 Comm: test_can.sh Tainted: G            E     5.11.0-04577-gf8ff6603c617 #203
| Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
| Backtrace:
| [<80bafea4>] (dump_backtrace) from [<80bb0280>] (show_stack+0x20/0x24) r7:00000000 r6:600f0113 r5:00000000 r4:81441220
| [<80bb0260>] (show_stack) from [<80bb593c>] (dump_stack+0xa0/0xc8)
| [<80bb589c>] (dump_stack) from [<8012b268>] (__warn+0xd4/0x114) r9:00000019 r8:80f4a8c2 r7:83e4150c r6:00000000 r5:00000009 r4:80528f90
| [<8012b194>] (__warn) from [<80bb09c4>] (warn_slowpath_fmt+0x88/0xc8) r9:83f26400 r8:80f4a8d1 r7:00000009 r6:80528f90 r5:00000019 r4:80f4a8c2
| [<80bb0940>] (warn_slowpath_fmt) from [<80528f90>] (refcount_warn_saturate+0x114/0x134) r8:00000000 r7:00000000 r6:82b44000 r5:834e5600 r4:83f4d540
| [<80528e7c>] (refcount_warn_saturate) from [<8079a4c8>] (__refcount_add.constprop.0+0x4c/0x50)
| [<8079a47c>] (__refcount_add.constprop.0) from [<8079a57c>] (can_put_echo_skb+0xb0/0x13c)
| [<8079a4cc>] (can_put_echo_skb) from [<8079ba98>] (flexcan_start_xmit+0x1c4/0x230) r9:00000010 r8:83f48610 r7:0fdc0000 r6:0c080000 r5:82b44000 r4:834e5600
| [<8079b8d4>] (flexcan_start_xmit) from [<80969078>] (netdev_start_xmit+0x44/0x70) r9:814c0ba0 r8:80c8790c r7:00000000 r6:834e5600 r5:82b44000 r4:82ab1f00
| [<80969034>] (netdev_start_xmit) from [<809725a4>] (dev_hard_start_xmit+0x19c/0x318) r9:814c0ba0 r8:00000000 r7:82ab1f00 r6:82b44000 r5:00000000 r4:834e5600
| [<80972408>] (dev_hard_start_xmit) from [<809c6584>] (sch_direct_xmit+0xcc/0x264) r10:834e5600 r9:00000000 r8:00000000 r7:82b44000 r6:82ab1f00 r5:834e5600 r4:83f27400
| [<809c64b8>] (sch_direct_xmit) from [<809c6c0c>] (__qdisc_run+0x4f0/0x534)

To fix this problem, only set skb ownership to sockets which have still
a ref count > 0.

Fixes: 0ae89beb283a ("can: add destructor for self generated skbs")
Cc: Oliver Hartkopp <socketcan@hartkopp.net>
Cc: Andre Naujoks <nautsch2@gmail.com>
Link: https://lore.kernel.org/r/20210226092456.27126-1-o.rempel@pengutronix.de
Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/skb.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h
index 685f34cfba20..d438eb058069 100644
--- a/include/linux/can/skb.h
+++ b/include/linux/can/skb.h
@@ -65,8 +65,12 @@ static inline void can_skb_reserve(struct sk_buff *skb)
 
 static inline void can_skb_set_owner(struct sk_buff *skb, struct sock *sk)
 {
-	if (sk) {
-		sock_hold(sk);
+	/* If the socket has already been closed by user space, the
+	 * refcount may already be 0 (and the socket will be freed
+	 * after the last TX skb has been freed). So only increase
+	 * socket refcount if the refcount is > 0.
+	 */
+	if (sk && refcount_inc_not_zero(&sk->sk_refcnt)) {
 		skb->destructor = sock_efree;
 		skb->sk = sk;
 	}

From 2712625200ed69c642b9abc3a403830c4643364c Mon Sep 17 00:00:00 2001
From: Torin Cooper-Bennun <torin@maxiluxsystems.com>
Date: Fri, 26 Feb 2021 16:34:41 +0000
Subject: [PATCH 053/164] can: tcan4x5x: tcan4x5x_init(): fix initialization -
 clear MRAM before entering Normal Mode

This patch prevents a potentially destructive race condition. The
device is fully operational on the bus after entering Normal Mode, so
zeroing the MRAM after entering this mode may lead to loss of
information, e.g. new received messages.

This patch fixes the problem by first initializing the MRAM, then
bringing the device into Normale Mode.

Fixes: 5443c226ba91 ("can: tcan4x5x: Add tcan4x5x driver to the kernel")
Link: https://lore.kernel.org/r/20210226163440.313628-1-torin@maxiluxsystems.com
Suggested-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Torin Cooper-Bennun <torin@maxiluxsystems.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/m_can/tcan4x5x-core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/can/m_can/tcan4x5x-core.c b/drivers/net/can/m_can/tcan4x5x-core.c
index b7caec769ddb..4147cecfbbd6 100644
--- a/drivers/net/can/m_can/tcan4x5x-core.c
+++ b/drivers/net/can/m_can/tcan4x5x-core.c
@@ -237,14 +237,14 @@ static int tcan4x5x_init(struct m_can_classdev *cdev)
 	if (ret)
 		return ret;
 
+	/* Zero out the MCAN buffers */
+	m_can_init_ram(cdev);
+
 	ret = regmap_update_bits(tcan4x5x->regmap, TCAN4X5X_CONFIG,
 				 TCAN4X5X_MODE_SEL_MASK, TCAN4X5X_MODE_NORMAL);
 	if (ret)
 		return ret;
 
-	/* Zero out the MCAN buffers */
-	m_can_init_ram(cdev);
-
 	return ret;
 }
 

From 73f476aa1975bae6a792b340f5b26ffcfba869a6 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Fri, 26 Feb 2021 17:30:20 +0200
Subject: [PATCH 054/164] net: phy: ti: take into account all possible
 interrupt sources

The previous implementation of .handle_interrupt() did not take into
account the fact that all the interrupt status registers should be
acknowledged since multiple interrupt sources could be asserted.

Fix this by reading all the status registers before exiting with
IRQ_NONE or triggering the PHY state machine.

Fixes: 1d1ae3c6ca3f ("net: phy: ti: implement generic .handle_interrupt() callback")
Reported-by: Sven Schuchmann <schuchmann@schleissheimer.de>
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Link: https://lore.kernel.org/r/20210226153020.867852-1-ciorneiioana@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/dp83822.c   |  9 +++++----
 drivers/net/phy/dp83tc811.c | 11 ++++++-----
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c
index be1224b4447b..f7a2ec150e54 100644
--- a/drivers/net/phy/dp83822.c
+++ b/drivers/net/phy/dp83822.c
@@ -290,6 +290,7 @@ static int dp83822_config_intr(struct phy_device *phydev)
 
 static irqreturn_t dp83822_handle_interrupt(struct phy_device *phydev)
 {
+	bool trigger_machine = false;
 	int irq_status;
 
 	/* The MISR1 and MISR2 registers are holding the interrupt status in
@@ -305,7 +306,7 @@ static irqreturn_t dp83822_handle_interrupt(struct phy_device *phydev)
 		return IRQ_NONE;
 	}
 	if (irq_status & ((irq_status & GENMASK(7, 0)) << 8))
-		goto trigger_machine;
+		trigger_machine = true;
 
 	irq_status = phy_read(phydev, MII_DP83822_MISR2);
 	if (irq_status < 0) {
@@ -313,11 +314,11 @@ static irqreturn_t dp83822_handle_interrupt(struct phy_device *phydev)
 		return IRQ_NONE;
 	}
 	if (irq_status & ((irq_status & GENMASK(7, 0)) << 8))
-		goto trigger_machine;
+		trigger_machine = true;
 
-	return IRQ_NONE;
+	if (!trigger_machine)
+		return IRQ_NONE;
 
-trigger_machine:
 	phy_trigger_machine(phydev);
 
 	return IRQ_HANDLED;
diff --git a/drivers/net/phy/dp83tc811.c b/drivers/net/phy/dp83tc811.c
index 688fadffb249..7ea32fb77190 100644
--- a/drivers/net/phy/dp83tc811.c
+++ b/drivers/net/phy/dp83tc811.c
@@ -264,6 +264,7 @@ static int dp83811_config_intr(struct phy_device *phydev)
 
 static irqreturn_t dp83811_handle_interrupt(struct phy_device *phydev)
 {
+	bool trigger_machine = false;
 	int irq_status;
 
 	/* The INT_STAT registers 1, 2 and 3 are holding the interrupt status
@@ -279,7 +280,7 @@ static irqreturn_t dp83811_handle_interrupt(struct phy_device *phydev)
 		return IRQ_NONE;
 	}
 	if (irq_status & ((irq_status & GENMASK(7, 0)) << 8))
-		goto trigger_machine;
+		trigger_machine = true;
 
 	irq_status = phy_read(phydev, MII_DP83811_INT_STAT2);
 	if (irq_status < 0) {
@@ -287,7 +288,7 @@ static irqreturn_t dp83811_handle_interrupt(struct phy_device *phydev)
 		return IRQ_NONE;
 	}
 	if (irq_status & ((irq_status & GENMASK(7, 0)) << 8))
-		goto trigger_machine;
+		trigger_machine = true;
 
 	irq_status = phy_read(phydev, MII_DP83811_INT_STAT3);
 	if (irq_status < 0) {
@@ -295,11 +296,11 @@ static irqreturn_t dp83811_handle_interrupt(struct phy_device *phydev)
 		return IRQ_NONE;
 	}
 	if (irq_status & ((irq_status & GENMASK(7, 0)) << 8))
-		goto trigger_machine;
+		trigger_machine = true;
 
-	return IRQ_NONE;
+	if (!trigger_machine)
+		return IRQ_NONE;
 
-trigger_machine:
 	phy_trigger_machine(phydev);
 
 	return IRQ_HANDLED;

From 826d82170b539f16e1955ab940222543c012044e Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Thu, 25 Feb 2021 16:39:01 +0100
Subject: [PATCH 055/164] xen-netback: use local var in xenvif_tx_check_gop()
 instead of re-calculating

shinfo already holds the result of skb_shinfo(skb) at this point - no
need to re-invoke the construct even twice.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/netback.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index e5c73f819662..aff5ac1f002a 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -557,8 +557,8 @@ check_frags:
 	}
 
 	if (skb_has_frag_list(skb) && !first_shinfo) {
-		first_shinfo = skb_shinfo(skb);
-		shinfo = skb_shinfo(skb_shinfo(skb)->frag_list);
+		first_shinfo = shinfo;
+		shinfo = skb_shinfo(shinfo->frag_list);
 		nr_frags = shinfo->nr_frags;
 
 		goto check_frags;

From 9eb8bc593a5eed167dac2029abef343854c5ba75 Mon Sep 17 00:00:00 2001
From: DENG Qingfang <dqfext@gmail.com>
Date: Mon, 1 Mar 2021 01:08:23 +0800
Subject: [PATCH 056/164] net: dsa: tag_rtl4_a: fix egress tags

Commit 86dd9868b878 has several issues, but was accepted too soon
before anyone could take a look.

- Double free. dsa_slave_xmit() will free the skb if the xmit function
  returns NULL, but the skb is already freed by eth_skb_pad(). Use
  __skb_put_padto() to avoid that.
- Unnecessary allocation. It has been done by DSA core since commit
  a3b0b6479700.
- A u16 pointer points to skb data. It should be __be16 for network
  byte order.
- Typo in comments. "numer" -> "number".

Fixes: 86dd9868b878 ("net: dsa: tag_rtl4_a: Support also egress tags")
Signed-off-by: DENG Qingfang <dqfext@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/tag_rtl4_a.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c
index c17d39b4a1a0..e9176475bac8 100644
--- a/net/dsa/tag_rtl4_a.c
+++ b/net/dsa/tag_rtl4_a.c
@@ -35,14 +35,12 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb,
 				      struct net_device *dev)
 {
 	struct dsa_port *dp = dsa_slave_to_port(dev);
+	__be16 *p;
 	u8 *tag;
-	u16 *p;
 	u16 out;
 
 	/* Pad out to at least 60 bytes */
-	if (unlikely(eth_skb_pad(skb)))
-		return NULL;
-	if (skb_cow_head(skb, RTL4_A_HDR_LEN) < 0)
+	if (unlikely(__skb_put_padto(skb, ETH_ZLEN, false)))
 		return NULL;
 
 	netdev_dbg(dev, "add realtek tag to package to port %d\n",
@@ -53,13 +51,13 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb,
 	tag = skb->data + 2 * ETH_ALEN;
 
 	/* Set Ethertype */
-	p = (u16 *)tag;
+	p = (__be16 *)tag;
 	*p = htons(RTL4_A_ETHERTYPE);
 
 	out = (RTL4_A_PROTOCOL_RTL8366RB << 12) | (2 << 8);
-	/* The lower bits is the port numer */
+	/* The lower bits is the port number */
 	out |= (u8)dp->index;
-	p = (u16 *)(tag + 2);
+	p = (__be16 *)(tag + 2);
 	*p = htons(out);
 
 	return skb;

From 4372339efc06bc2a796f4cc9d0a7a929dfda4967 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sat, 27 Feb 2021 01:40:19 +0100
Subject: [PATCH 057/164] net: always use icmp{,v6}_ndo_send from
 ndo_start_xmit

There were a few remaining tunnel drivers that didn't receive the prior
conversion to icmp{,v6}_ndo_send. Knowing now that this could lead to
memory corrution (see ee576c47db60 ("net: icmp: pass zeroed opts from
icmp{,v6}_ndo_send before sending") for details), there's even more
imperative to have these all converted. So this commit goes through the
remaining cases that I could find and does a boring translation to the
ndo variety.

The Fixes: line below is the merge that originally added icmp{,v6}_
ndo_send and converted the first batch of icmp{,v6}_send users. The
rationale then for the change applies equally to this patch. It's just
that these drivers were left out of the initial conversion because these
network devices are hiding in net/ rather than in drivers/net/.

Cc: Florian Westphal <fw@strlen.de>
Cc: Willem de Bruijn <willemb@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: David Ahern <dsahern@kernel.org>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Fixes: 803381f9f117 ("Merge branch 'icmp-account-for-NAT-when-sending-icmps-from-ndo-layer'")
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_tunnel.c  |  5 ++---
 net/ipv4/ip_vti.c     |  6 +++---
 net/ipv6/ip6_gre.c    | 16 ++++++++--------
 net/ipv6/ip6_tunnel.c | 10 +++++-----
 net/ipv6/ip6_vti.c    |  6 +++---
 net/ipv6/sit.c        |  2 +-
 6 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 76a420c76f16..f6cc26de5ed3 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -502,8 +502,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
 		if (!skb_is_gso(skb) &&
 		    (inner_iph->frag_off & htons(IP_DF)) &&
 		    mtu < pkt_size) {
-			memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
-			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+			icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 			return -E2BIG;
 		}
 	}
@@ -527,7 +526,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
 
 		if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
 					mtu < pkt_size) {
-			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+			icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 			return -E2BIG;
 		}
 	}
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index abc171e79d3e..eb207089ece0 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -238,13 +238,13 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
 	if (skb->len > mtu) {
 		skb_dst_update_pmtu_no_confirm(skb, mtu);
 		if (skb->protocol == htons(ETH_P_IP)) {
-			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
-				  htonl(mtu));
+			icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+				      htonl(mtu));
 		} else {
 			if (mtu < IPV6_MIN_MTU)
 				mtu = IPV6_MIN_MTU;
 
-			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+			icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		}
 
 		dst_release(dst);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index c3bc89b6b1a1..1baf43aacb2e 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -678,8 +678,8 @@ static int prepare_ip6gre_xmit_ipv6(struct sk_buff *skb,
 
 		tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
 		if (tel->encap_limit == 0) {
-			icmpv6_send(skb, ICMPV6_PARAMPROB,
-				    ICMPV6_HDR_FIELD, offset + 2);
+			icmpv6_ndo_send(skb, ICMPV6_PARAMPROB,
+					ICMPV6_HDR_FIELD, offset + 2);
 			return -1;
 		}
 		*encap_limit = tel->encap_limit - 1;
@@ -805,8 +805,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
 	if (err != 0) {
 		/* XXX: send ICMP error even if DF is not set. */
 		if (err == -EMSGSIZE)
-			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
-				  htonl(mtu));
+			icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+				      htonl(mtu));
 		return -1;
 	}
 
@@ -837,7 +837,7 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
 			  &mtu, skb->protocol);
 	if (err != 0) {
 		if (err == -EMSGSIZE)
-			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+			icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		return -1;
 	}
 
@@ -1063,10 +1063,10 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
 		/* XXX: send ICMP error even if DF is not set. */
 		if (err == -EMSGSIZE) {
 			if (skb->protocol == htons(ETH_P_IP))
-				icmp_send(skb, ICMP_DEST_UNREACH,
-					  ICMP_FRAG_NEEDED, htonl(mtu));
+				icmp_ndo_send(skb, ICMP_DEST_UNREACH,
+					      ICMP_FRAG_NEEDED, htonl(mtu));
 			else
-				icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+				icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		}
 
 		goto tx_err;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index a7950baa05e5..3fa0eca5a06f 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1332,8 +1332,8 @@ ipxip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev,
 
 				tel = (void *)&skb_network_header(skb)[offset];
 				if (tel->encap_limit == 0) {
-					icmpv6_send(skb, ICMPV6_PARAMPROB,
-						ICMPV6_HDR_FIELD, offset + 2);
+					icmpv6_ndo_send(skb, ICMPV6_PARAMPROB,
+							ICMPV6_HDR_FIELD, offset + 2);
 					return -1;
 				}
 				encap_limit = tel->encap_limit - 1;
@@ -1385,11 +1385,11 @@ ipxip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev,
 		if (err == -EMSGSIZE)
 			switch (protocol) {
 			case IPPROTO_IPIP:
-				icmp_send(skb, ICMP_DEST_UNREACH,
-					  ICMP_FRAG_NEEDED, htonl(mtu));
+				icmp_ndo_send(skb, ICMP_DEST_UNREACH,
+					      ICMP_FRAG_NEEDED, htonl(mtu));
 				break;
 			case IPPROTO_IPV6:
-				icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+				icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 				break;
 			default:
 				break;
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 0225fd694192..f10e7a72ea62 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -521,10 +521,10 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
 			if (mtu < IPV6_MIN_MTU)
 				mtu = IPV6_MIN_MTU;
 
-			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+			icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		} else {
-			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
-				  htonl(mtu));
+			icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+				      htonl(mtu));
 		}
 
 		err = -EMSGSIZE;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 93636867aee2..63ccd9f2dccc 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -987,7 +987,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 			skb_dst_update_pmtu_no_confirm(skb, mtu);
 
 		if (skb->len > mtu && !skb_is_gso(skb)) {
-			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+			icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 			ip_rt_put(rt);
 			goto tx_error;
 		}

From d9032dba5a2b2bbf0fdce67c8795300ec9923b43 Mon Sep 17 00:00:00 2001
From: Guangbin Huang <huangguangbin2@huawei.com>
Date: Sat, 27 Feb 2021 11:05:58 +0800
Subject: [PATCH 058/164] net: phy: fix save wrong speed and duplex problem if
 autoneg is on

If phy uses generic driver and autoneg is on, enter command
"ethtool -s eth0 speed 50" will not change phy speed actually, but
command "ethtool eth0" shows speed is 50Mb/s because phydev->speed
has been set to 50 and no update later.

And duplex setting has same problem too.

However, if autoneg is on, phy only changes speed and duplex according to
phydev->advertising, but not phydev->speed and phydev->duplex. So in this
case, phydev->speed and phydev->duplex don't need to be set in function
phy_ethtool_ksettings_set() if autoneg is on.

Fixes: 51e2a3846eab ("PHY: Avoid unnecessary aneg restarts")
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 1be07e45d314..fc2e7cb5b2e5 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -276,14 +276,16 @@ int phy_ethtool_ksettings_set(struct phy_device *phydev,
 
 	phydev->autoneg = autoneg;
 
-	phydev->speed = speed;
+	if (autoneg == AUTONEG_DISABLE) {
+		phydev->speed = speed;
+		phydev->duplex = duplex;
+	}
 
 	linkmode_copy(phydev->advertising, advertising);
 
 	linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
 			 phydev->advertising, autoneg == AUTONEG_ENABLE);
 
-	phydev->duplex = duplex;
 	phydev->master_slave_set = cmd->base.master_slave_cfg;
 	phydev->mdix_ctrl = cmd->base.eth_tp_mdix_ctrl;
 

From 4deb550bc3b698a1f03d0332cde3df154d1b6c1e Mon Sep 17 00:00:00 2001
From: Tong Zhang <ztong0001@gmail.com>
Date: Sat, 27 Feb 2021 16:15:06 -0500
Subject: [PATCH 059/164] atm: eni: dont release is never initialized

label err_eni_release is reachable when eni_start() fail.
In eni_start() it calls dev->phy->start() in the last step, if start()
fail we don't need to call phy->stop(), if start() is never called, we
neither need to call phy->stop(), otherwise null-ptr-deref will happen.

In order to fix this issue, don't call phy->stop() in label err_eni_release

[    4.875714] ==================================================================
[    4.876091] BUG: KASAN: null-ptr-deref in suni_stop+0x47/0x100 [suni]
[    4.876433] Read of size 8 at addr 0000000000000030 by task modprobe/95
[    4.876778]
[    4.876862] CPU: 0 PID: 95 Comm: modprobe Not tainted 5.11.0-rc7-00090-gdcc0b49040c7 #2
[    4.877290] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-48-gd94
[    4.877876] Call Trace:
[    4.878009]  dump_stack+0x7d/0xa3
[    4.878191]  kasan_report.cold+0x10c/0x10e
[    4.878410]  ? __slab_free+0x2f0/0x340
[    4.878612]  ? suni_stop+0x47/0x100 [suni]
[    4.878832]  suni_stop+0x47/0x100 [suni]
[    4.879043]  eni_do_release+0x3b/0x70 [eni]
[    4.879269]  eni_init_one.cold+0x1152/0x1747 [eni]
[    4.879528]  ? _raw_spin_lock_irqsave+0x7b/0xd0
[    4.879768]  ? eni_ioctl+0x270/0x270 [eni]
[    4.879990]  ? __mutex_lock_slowpath+0x10/0x10
[    4.880226]  ? eni_ioctl+0x270/0x270 [eni]
[    4.880448]  local_pci_probe+0x6f/0xb0
[    4.880650]  pci_device_probe+0x171/0x240
[    4.880864]  ? pci_device_remove+0xe0/0xe0
[    4.881086]  ? kernfs_create_link+0xb6/0x110
[    4.881315]  ? sysfs_do_create_link_sd.isra.0+0x76/0xe0
[    4.881594]  really_probe+0x161/0x420
[    4.881791]  driver_probe_device+0x6d/0xd0
[    4.882010]  device_driver_attach+0x82/0x90
[    4.882233]  ? device_driver_attach+0x90/0x90
[    4.882465]  __driver_attach+0x60/0x100
[    4.882671]  ? device_driver_attach+0x90/0x90
[    4.882903]  bus_for_each_dev+0xe1/0x140
[    4.883114]  ? subsys_dev_iter_exit+0x10/0x10
[    4.883346]  ? klist_node_init+0x61/0x80
[    4.883557]  bus_add_driver+0x254/0x2a0
[    4.883764]  driver_register+0xd3/0x150
[    4.883971]  ? 0xffffffffc0038000
[    4.884149]  do_one_initcall+0x84/0x250
[    4.884355]  ? trace_event_raw_event_initcall_finish+0x150/0x150
[    4.884674]  ? unpoison_range+0xf/0x30
[    4.884875]  ? ____kasan_kmalloc.constprop.0+0x84/0xa0
[    4.885150]  ? unpoison_range+0xf/0x30
[    4.885352]  ? unpoison_range+0xf/0x30
[    4.885557]  do_init_module+0xf8/0x350
[    4.885760]  load_module+0x3fe6/0x4340
[    4.885960]  ? vm_unmap_ram+0x1d0/0x1d0
[    4.886166]  ? ____kasan_kmalloc.constprop.0+0x84/0xa0
[    4.886441]  ? module_frob_arch_sections+0x20/0x20
[    4.886697]  ? __do_sys_finit_module+0x108/0x170
[    4.886941]  __do_sys_finit_module+0x108/0x170
[    4.887178]  ? __ia32_sys_init_module+0x40/0x40
[    4.887419]  ? file_open_root+0x200/0x200
[    4.887634]  ? do_sys_open+0x85/0xe0
[    4.887826]  ? filp_open+0x50/0x50
[    4.888009]  ? fpregs_assert_state_consistent+0x4d/0x60
[    4.888287]  ? exit_to_user_mode_prepare+0x2f/0x130
[    4.888547]  do_syscall_64+0x33/0x40
[    4.888739]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[    4.889010] RIP: 0033:0x7ff62fcf1cf7
[    4.889202] Code: 48 89 57 30 48 8b 04 24 48 89 47 38 e9 1d a0 02 00 48 89 f8 48 89 f71
[    4.890172] RSP: 002b:00007ffe6644ade8 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
[    4.890570] RAX: ffffffffffffffda RBX: 0000000000f2ca70 RCX: 00007ff62fcf1cf7
[    4.890944] RDX: 0000000000000000 RSI: 0000000000f2b9e0 RDI: 0000000000000003
[    4.891318] RBP: 0000000000000003 R08: 0000000000000000 R09: 0000000000000001
[    4.891691] R10: 00007ff62fd55300 R11: 0000000000000246 R12: 0000000000f2b9e0
[    4.892064] R13: 0000000000000000 R14: 0000000000f2bdd0 R15: 0000000000000001
[    4.892439] ==================================================================

Signed-off-by: Tong Zhang <ztong0001@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/eni.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
index 316a9947541f..b574cce98dc3 100644
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -2260,7 +2260,8 @@ out:
 	return rc;
 
 err_eni_release:
-	eni_do_release(dev);
+	dev->phy = NULL;
+	iounmap(ENI_DEV(dev)->ioaddr);
 err_unregister:
 	atm_dev_deregister(dev);
 err_free_consistent:

From a2bd45834e83d6c5a04d397bde13d744a4812dfc Mon Sep 17 00:00:00 2001
From: Tong Zhang <ztong0001@gmail.com>
Date: Sat, 27 Feb 2021 22:55:50 -0500
Subject: [PATCH 060/164] atm: lanai: dont run lanai_dev_close if not open

lanai_dev_open() can fail. When it fail, lanai->base is unmapped and the
pci device is disabled. The caller, lanai_init_one(), then tries to run
atm_dev_deregister(). This will subsequently call lanai_dev_close() and
use the already released MMIO area.

To fix this issue, set the lanai->base to NULL if open fail,
and test the flag in lanai_dev_close().

[    8.324153] lanai: lanai_start() failed, err=19
[    8.324819] lanai(itf 0): shutting down interface
[    8.325211] BUG: unable to handle page fault for address: ffffc90000180024
[    8.325781] #PF: supervisor write access in kernel mode
[    8.326215] #PF: error_code(0x0002) - not-present page
[    8.326641] PGD 100000067 P4D 100000067 PUD 100139067 PMD 10013a067 PTE 0
[    8.327206] Oops: 0002 [#1] SMP KASAN NOPTI
[    8.327557] CPU: 0 PID: 95 Comm: modprobe Not tainted 5.11.0-rc7-00090-gdcc0b49040c7 #12
[    8.328229] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-48-gd9c812dda519-4
[    8.329145] RIP: 0010:lanai_dev_close+0x4f/0xe5 [lanai]
[    8.329587] Code: 00 48 c7 c7 00 d3 01 c0 e8 49 4e 0a c2 48 8d bd 08 02 00 00 e8 6e 52 14 c1 48 80
[    8.330917] RSP: 0018:ffff8881029ef680 EFLAGS: 00010246
[    8.331196] RAX: 000000000003fffe RBX: ffff888102fb4800 RCX: ffffffffc001a98a
[    8.331572] RDX: ffffc90000180000 RSI: 0000000000000246 RDI: ffff888102fb4000
[    8.331948] RBP: ffff888102fb4000 R08: ffffffff8115da8a R09: ffffed102053deaa
[    8.332326] R10: 0000000000000003 R11: ffffed102053dea9 R12: ffff888102fb48a4
[    8.332701] R13: ffffffffc00123c0 R14: ffff888102fb4b90 R15: ffff888102fb4b88
[    8.333077] FS:  00007f08eb9056a0(0000) GS:ffff88815b400000(0000) knlGS:0000000000000000
[    8.333502] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    8.333806] CR2: ffffc90000180024 CR3: 0000000102a28000 CR4: 00000000000006f0
[    8.334182] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[    8.334557] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[    8.334932] Call Trace:
[    8.335066]  atm_dev_deregister+0x161/0x1a0 [atm]
[    8.335324]  lanai_init_one.cold+0x20c/0x96d [lanai]
[    8.335594]  ? lanai_send+0x2a0/0x2a0 [lanai]
[    8.335831]  local_pci_probe+0x6f/0xb0
[    8.336039]  pci_device_probe+0x171/0x240
[    8.336255]  ? pci_device_remove+0xe0/0xe0
[    8.336475]  ? kernfs_create_link+0xb6/0x110
[    8.336704]  ? sysfs_do_create_link_sd.isra.0+0x76/0xe0
[    8.336983]  really_probe+0x161/0x420
[    8.337181]  driver_probe_device+0x6d/0xd0
[    8.337401]  device_driver_attach+0x82/0x90
[    8.337626]  ? device_driver_attach+0x90/0x90
[    8.337859]  __driver_attach+0x60/0x100
[    8.338065]  ? device_driver_attach+0x90/0x90
[    8.338298]  bus_for_each_dev+0xe1/0x140
[    8.338511]  ? subsys_dev_iter_exit+0x10/0x10
[    8.338745]  ? klist_node_init+0x61/0x80
[    8.338956]  bus_add_driver+0x254/0x2a0
[    8.339164]  driver_register+0xd3/0x150
[    8.339370]  ? 0xffffffffc0028000
[    8.339550]  do_one_initcall+0x84/0x250
[    8.339755]  ? trace_event_raw_event_initcall_finish+0x150/0x150
[    8.340076]  ? free_vmap_area_noflush+0x1a5/0x5c0
[    8.340329]  ? unpoison_range+0xf/0x30
[    8.340532]  ? ____kasan_kmalloc.constprop.0+0x84/0xa0
[    8.340806]  ? unpoison_range+0xf/0x30
[    8.341014]  ? unpoison_range+0xf/0x30
[    8.341217]  do_init_module+0xf8/0x350
[    8.341419]  load_module+0x3fe6/0x4340
[    8.341621]  ? vm_unmap_ram+0x1d0/0x1d0
[    8.341826]  ? ____kasan_kmalloc.constprop.0+0x84/0xa0
[    8.342101]  ? module_frob_arch_sections+0x20/0x20
[    8.342358]  ? __do_sys_finit_module+0x108/0x170
[    8.342604]  __do_sys_finit_module+0x108/0x170
[    8.342841]  ? __ia32_sys_init_module+0x40/0x40
[    8.343083]  ? file_open_root+0x200/0x200
[    8.343298]  ? do_sys_open+0x85/0xe0
[    8.343491]  ? filp_open+0x50/0x50
[    8.343675]  ? exit_to_user_mode_prepare+0xfc/0x130
[    8.343935]  do_syscall_64+0x33/0x40
[    8.344132]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[    8.344401] RIP: 0033:0x7f08eb887cf7
[    8.344594] Code: 48 89 57 30 48 8b 04 24 48 89 47 38 e9 1d a0 02 00 48 89 f8 48 89 f7 48 89 d6 41
[    8.345565] RSP: 002b:00007ffcd5c98ad8 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
[    8.345962] RAX: ffffffffffffffda RBX: 00000000008fea70 RCX: 00007f08eb887cf7
[    8.346336] RDX: 0000000000000000 RSI: 00000000008fd9e0 RDI: 0000000000000003
[    8.346711] RBP: 0000000000000003 R08: 0000000000000000 R09: 0000000000000001
[    8.347085] R10: 00007f08eb8eb300 R11: 0000000000000246 R12: 00000000008fd9e0
[    8.347460] R13: 0000000000000000 R14: 00000000008fddd0 R15: 0000000000000001
[    8.347836] Modules linked in: lanai(+) atm
[    8.348065] CR2: ffffc90000180024
[    8.348244] ---[ end trace 7fdc1c668f2003e5 ]---
[    8.348490] RIP: 0010:lanai_dev_close+0x4f/0xe5 [lanai]
[    8.348772] Code: 00 48 c7 c7 00 d3 01 c0 e8 49 4e 0a c2 48 8d bd 08 02 00 00 e8 6e 52 14 c1 48 80
[    8.349745] RSP: 0018:ffff8881029ef680 EFLAGS: 00010246
[    8.350022] RAX: 000000000003fffe RBX: ffff888102fb4800 RCX: ffffffffc001a98a
[    8.350397] RDX: ffffc90000180000 RSI: 0000000000000246 RDI: ffff888102fb4000
[    8.350772] RBP: ffff888102fb4000 R08: ffffffff8115da8a R09: ffffed102053deaa
[    8.351151] R10: 0000000000000003 R11: ffffed102053dea9 R12: ffff888102fb48a4
[    8.351525] R13: ffffffffc00123c0 R14: ffff888102fb4b90 R15: ffff888102fb4b88
[    8.351918] FS:  00007f08eb9056a0(0000) GS:ffff88815b400000(0000) knlGS:0000000000000000
[    8.352343] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    8.352647] CR2: ffffc90000180024 CR3: 0000000102a28000 CR4: 00000000000006f0
[    8.353022] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[    8.353397] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[    8.353958] modprobe (95) used greatest stack depth: 26216 bytes left

Signed-off-by: Tong Zhang <ztong0001@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/lanai.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c
index d7277c26e423..32d7aa141d96 100644
--- a/drivers/atm/lanai.c
+++ b/drivers/atm/lanai.c
@@ -2233,6 +2233,7 @@ static int lanai_dev_open(struct atm_dev *atmdev)
 	conf1_write(lanai);
 #endif
 	iounmap(lanai->base);
+	lanai->base = NULL;
     error_pci:
 	pci_disable_device(lanai->pci);
     error:
@@ -2245,6 +2246,8 @@ static int lanai_dev_open(struct atm_dev *atmdev)
 static void lanai_dev_close(struct atm_dev *atmdev)
 {
 	struct lanai_dev *lanai = (struct lanai_dev *) atmdev->dev_data;
+	if (lanai->base==NULL)
+		return;
 	printk(KERN_INFO DEV_LABEL "(itf %d): shutting down interface\n",
 	    lanai->number);
 	lanai_timed_poll_stop(lanai);
@@ -2552,7 +2555,7 @@ static int lanai_init_one(struct pci_dev *pci,
 	struct atm_dev *atmdev;
 	int result;
 
-	lanai = kmalloc(sizeof(*lanai), GFP_KERNEL);
+	lanai = kzalloc(sizeof(*lanai), GFP_KERNEL);
 	if (lanai == NULL) {
 		printk(KERN_ERR DEV_LABEL
 		       ": couldn't allocate dev_data structure!\n");

From 8c91bc3d44dfef8284af384877fbe61117e8b7d1 Mon Sep 17 00:00:00 2001
From: Sergey Shtylyov <s.shtylyov@omprussia.ru>
Date: Sun, 28 Feb 2021 23:25:43 +0300
Subject: [PATCH 061/164] sh_eth: fix TRSCER mask for SH771x

According  to  the SH7710, SH7712, SH7713 Group User's Manual: Hardware,
Rev. 3.00, the TRSCER register actually has only bit 7 valid (and named
differently), with all the other bits reserved. Apparently, this was not
the case with some early revisions of the manual as we have the other
bits declared (and set) in the original driver.  Follow the suit and add
the explicit sh_eth_cpu_data::trscer_err_mask initializer for SH771x...

Fixes: 86a74ff21a7a ("net: sh_eth: add support for Renesas SuperH Ethernet")
Signed-off-by: Sergey Shtylyov <s.shtylyov@omprussia.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 590b088bc4c7..e79bb0a3ced5 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -1089,6 +1089,9 @@ static struct sh_eth_cpu_data sh771x_data = {
 			  EESIPR_CEEFIP | EESIPR_CELFIP |
 			  EESIPR_RRFIP | EESIPR_RTLFIP | EESIPR_RTSFIP |
 			  EESIPR_PREIP | EESIPR_CERFIP,
+
+	.trscer_err_mask = DESC_I_RINT8,
+
 	.tsu		= 1,
 	.dual_port	= 1,
 };

From 75be7fb7f978202c4c3a1a713af4485afb2ff5f6 Mon Sep 17 00:00:00 2001
From: Sergey Shtylyov <s.shtylyov@omprussia.ru>
Date: Sun, 28 Feb 2021 23:26:34 +0300
Subject: [PATCH 062/164] sh_eth: fix TRSCER mask for R7S72100

According  to  the RZ/A1H Group, RZ/A1M Group User's Manual: Hardware,
Rev. 4.00, the TRSCER register has bit 9 reserved, hence we can't use
the driver's default TRSCER mask.  Add the explicit initializer for
sh_eth_cpu_data::trscer_err_mask for R7S72100.

Fixes: db893473d313 ("sh_eth: Add support for r7s72100")
Signed-off-by: Sergey Shtylyov <s.shtylyov@omprussia.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index e79bb0a3ced5..7f14d4aa5b3e 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -560,6 +560,8 @@ static struct sh_eth_cpu_data r7s72100_data = {
 			  EESR_TDE,
 	.fdr_value	= 0x0000070f,
 
+	.trscer_err_mask = DESC_I_RINT8 | DESC_I_RINT5,
+
 	.no_psr		= 1,
 	.apr		= 1,
 	.mpr		= 1,

From 165bc5a4f30eee4735845aa7dbd6b738643f2603 Mon Sep 17 00:00:00 2001
From: Sergey Shtylyov <s.shtylyov@omprussia.ru>
Date: Sun, 28 Feb 2021 23:27:32 +0300
Subject: [PATCH 063/164] sh_eth: fix TRSCER mask for R7S9210

According  to the RZ/A2M Group User's Manual: Hardware, Rev. 2.00,
the TRSCER register has bit 9 reserved, hence we can't use the driver's
default TRSCER mask.  Add the explicit initializer for sh_eth_cpu_data::
trscer_err_mask for R7S9210.

Fixes: 6e0bb04d0e4f ("sh_eth: Add R7S9210 support")
Signed-off-by: Sergey Shtylyov <s.shtylyov@omprussia.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 7f14d4aa5b3e..f029c7c03804 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -782,6 +782,8 @@ static struct sh_eth_cpu_data r7s9210_data = {
 
 	.fdr_value	= 0x0000070f,
 
+	.trscer_err_mask = DESC_I_RINT8 | DESC_I_RINT5,
+
 	.apr		= 1,
 	.mpr		= 1,
 	.tpauser	= 1,

From 093b036aa94e01a0bea31a38d7f0ee28a2749023 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Mon, 1 Mar 2021 02:22:40 +0300
Subject: [PATCH 064/164] net/qrtr: fix __netdev_alloc_skb call

syzbot found WARNING in __alloc_pages_nodemask()[1] when order >= MAX_ORDER.
It was caused by a huge length value passed from userspace to qrtr_tun_write_iter(),
which tries to allocate skb. Since the value comes from the untrusted source
there is no need to raise a warning in __alloc_pages_nodemask().

[1] WARNING in __alloc_pages_nodemask+0x5f8/0x730 mm/page_alloc.c:5014
Call Trace:
 __alloc_pages include/linux/gfp.h:511 [inline]
 __alloc_pages_node include/linux/gfp.h:524 [inline]
 alloc_pages_node include/linux/gfp.h:538 [inline]
 kmalloc_large_node+0x60/0x110 mm/slub.c:3999
 __kmalloc_node_track_caller+0x319/0x3f0 mm/slub.c:4496
 __kmalloc_reserve net/core/skbuff.c:150 [inline]
 __alloc_skb+0x4e4/0x5a0 net/core/skbuff.c:210
 __netdev_alloc_skb+0x70/0x400 net/core/skbuff.c:446
 netdev_alloc_skb include/linux/skbuff.h:2832 [inline]
 qrtr_endpoint_post+0x84/0x11b0 net/qrtr/qrtr.c:442
 qrtr_tun_write_iter+0x11f/0x1a0 net/qrtr/tun.c:98
 call_write_iter include/linux/fs.h:1901 [inline]
 new_sync_write+0x426/0x650 fs/read_write.c:518
 vfs_write+0x791/0xa30 fs/read_write.c:605
 ksys_write+0x12d/0x250 fs/read_write.c:658
 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

Reported-by: syzbot+80dccaee7c6630fa9dcf@syzkaller.appspotmail.com
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Acked-by: Alexander Lobakin <alobakin@pm.me>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/qrtr/qrtr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index b34358282f37..82d2eb8c21d1 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -439,7 +439,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
 	if (len == 0 || len & 3)
 		return -EINVAL;
 
-	skb = netdev_alloc_skb(NULL, len);
+	skb = __netdev_alloc_skb(NULL, len, GFP_ATOMIC | __GFP_NOWARN);
 	if (!skb)
 		return -ENOMEM;
 

From 8bd2a05527349c8627d2b9795d3c7a6f76033676 Mon Sep 17 00:00:00 2001
From: Yejune Deng <yejune.deng@gmail.com>
Date: Mon, 1 Mar 2021 14:05:48 +0800
Subject: [PATCH 065/164] inetpeer: use div64_ul() and clamp_val() calculate
 inet_peer_threshold

In inet_initpeers(), struct inet_peer on IA32 uses 128 bytes in nowdays.
Get rid of the cascade and use div64_ul() and clamp_val() calculate that
will not need to be adjusted in the future as suggested by Eric Dumazet.

Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Yejune Deng <yejune.deng@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inetpeer.c | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index ff327a62c9ce..da21dfce24d7 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -65,7 +65,7 @@ EXPORT_SYMBOL_GPL(inet_peer_base_init);
 #define PEER_MAX_GC 32
 
 /* Exported for sysctl_net_ipv4.  */
-int inet_peer_threshold __read_mostly = 65536 + 128;	/* start to throw entries more
+int inet_peer_threshold __read_mostly;	/* start to throw entries more
 					 * aggressively at this stage */
 int inet_peer_minttl __read_mostly = 120 * HZ;	/* TTL under high load: 120 sec */
 int inet_peer_maxttl __read_mostly = 10 * 60 * HZ;	/* usual time to live: 10 min */
@@ -73,20 +73,13 @@ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ;	/* usual time to live: 10 min
 /* Called from ip_output.c:ip_init  */
 void __init inet_initpeers(void)
 {
-	struct sysinfo si;
+	u64 nr_entries;
 
-	/* Use the straight interface to information about memory. */
-	si_meminfo(&si);
-	/* The values below were suggested by Alexey Kuznetsov
-	 * <kuznet@ms2.inr.ac.ru>.  I don't have any opinion about the values
-	 * myself.  --SAW
-	 */
-	if (si.totalram <= (32768*1024)/PAGE_SIZE)
-		inet_peer_threshold >>= 1; /* max pool size about 1MB on IA32 */
-	if (si.totalram <= (16384*1024)/PAGE_SIZE)
-		inet_peer_threshold >>= 1; /* about 512KB */
-	if (si.totalram <= (8192*1024)/PAGE_SIZE)
-		inet_peer_threshold >>= 2; /* about 128KB */
+	 /* 1% of physical memory */
+	nr_entries = div64_ul((u64)totalram_pages() << PAGE_SHIFT,
+			      100 * L1_CACHE_ALIGN(sizeof(struct inet_peer)));
+
+	inet_peer_threshold = clamp_val(nr_entries, 4096, 65536 + 128);
 
 	peer_cachep = kmem_cache_create("inet_peer_cache",
 			sizeof(struct inet_peer),

From c646d10dda2dcde82c6ce5a474522621ab2b8b19 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 1 Mar 2021 13:18:11 +0200
Subject: [PATCH 066/164] net: enetc: don't overwrite the RSS indirection table
 when initializing

After the blamed patch, all RX traffic gets hashed to CPU 0 because the
hashing indirection table set up in:

enetc_pf_probe
-> enetc_alloc_si_resources
   -> enetc_configure_si
      -> enetc_setup_default_rss_table

is overwritten later in:

enetc_pf_probe
-> enetc_init_port_rss_memory

which zero-initializes the entire port RSS table in order to avoid ECC errors.

The trouble really is that enetc_init_port_rss_memory really neads
enetc_alloc_si_resources to be called, because it depends upon
enetc_alloc_cbdr and enetc_setup_cbdr. But that whole enetc_configure_si
thing could have been better thought out, it has nothing to do in a
function called "alloc_si_resources", especially since its counterpart,
"free_si_resources", does nothing to unwind the configuration of the SI.

The point is, we need to pull out enetc_configure_si out of
enetc_alloc_resources, and move it after enetc_init_port_rss_memory.
This allows us to set up the default RSS indirection table after
initializing the memory.

Fixes: 07bf34a50e32 ("net: enetc: initialize the RFS and RSS memories")
Cc: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c    | 11 +++--------
 drivers/net/ethernet/freescale/enetc/enetc.h    |  1 +
 drivers/net/ethernet/freescale/enetc/enetc_pf.c |  7 +++++++
 drivers/net/ethernet/freescale/enetc/enetc_vf.c |  7 +++++++
 4 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index c78d12229730..fdb6b9e8da78 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -1058,13 +1058,12 @@ static int enetc_setup_default_rss_table(struct enetc_si *si, int num_groups)
 	return 0;
 }
 
-static int enetc_configure_si(struct enetc_ndev_priv *priv)
+int enetc_configure_si(struct enetc_ndev_priv *priv)
 {
 	struct enetc_si *si = priv->si;
 	struct enetc_hw *hw = &si->hw;
 	int err;
 
-	enetc_setup_cbdr(hw, &si->cbd_ring);
 	/* set SI cache attributes */
 	enetc_wr(hw, ENETC_SICAR0,
 		 ENETC_SICAR_RD_COHERENT | ENETC_SICAR_WR_COHERENT);
@@ -1112,6 +1111,8 @@ int enetc_alloc_si_resources(struct enetc_ndev_priv *priv)
 	if (err)
 		return err;
 
+	enetc_setup_cbdr(&si->hw, &si->cbd_ring);
+
 	priv->cls_rules = kcalloc(si->num_fs_entries, sizeof(*priv->cls_rules),
 				  GFP_KERNEL);
 	if (!priv->cls_rules) {
@@ -1119,14 +1120,8 @@ int enetc_alloc_si_resources(struct enetc_ndev_priv *priv)
 		goto err_alloc_cls;
 	}
 
-	err = enetc_configure_si(priv);
-	if (err)
-		goto err_config_si;
-
 	return 0;
 
-err_config_si:
-	kfree(priv->cls_rules);
 err_alloc_cls:
 	enetc_clear_cbdr(&si->hw);
 	enetc_free_cbdr(priv->dev, &si->cbd_ring);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index 8532d23b54f5..f8275cef3b5c 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -292,6 +292,7 @@ void enetc_get_si_caps(struct enetc_si *si);
 void enetc_init_si_rings_params(struct enetc_ndev_priv *priv);
 int enetc_alloc_si_resources(struct enetc_ndev_priv *priv);
 void enetc_free_si_resources(struct enetc_ndev_priv *priv);
+int enetc_configure_si(struct enetc_ndev_priv *priv);
 
 int enetc_open(struct net_device *ndev);
 int enetc_close(struct net_device *ndev);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index 515c5b29d7aa..d02ecb2e46ae 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -1108,6 +1108,12 @@ static int enetc_pf_probe(struct pci_dev *pdev,
 		goto err_init_port_rss;
 	}
 
+	err = enetc_configure_si(priv);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to configure SI\n");
+		goto err_config_si;
+	}
+
 	err = enetc_alloc_msix(priv);
 	if (err) {
 		dev_err(&pdev->dev, "MSIX alloc failed\n");
@@ -1136,6 +1142,7 @@ err_phylink_create:
 	enetc_mdiobus_destroy(pf);
 err_mdiobus_create:
 	enetc_free_msix(priv);
+err_config_si:
 err_init_port_rss:
 err_init_port_rfs:
 err_alloc_msix:
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c
index 39c1a09e69a9..9b755a84c2d6 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c
@@ -171,6 +171,12 @@ static int enetc_vf_probe(struct pci_dev *pdev,
 		goto err_alloc_si_res;
 	}
 
+	err = enetc_configure_si(priv);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to configure SI\n");
+		goto err_config_si;
+	}
+
 	err = enetc_alloc_msix(priv);
 	if (err) {
 		dev_err(&pdev->dev, "MSIX alloc failed\n");
@@ -187,6 +193,7 @@ static int enetc_vf_probe(struct pci_dev *pdev,
 
 err_reg_netdev:
 	enetc_free_msix(priv);
+err_config_si:
 err_alloc_msix:
 	enetc_free_si_resources(priv);
 err_alloc_si_res:

From 3222b5b613db558e9a494bbf53f3c984d90f71ea Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 1 Mar 2021 13:18:12 +0200
Subject: [PATCH 067/164] net: enetc: initialize RFS/RSS memories for unused
 ports too

Michael reports that since linux-next-20210211, the AER messages for ECC
errors have started reappearing, and this time they can be reliably
reproduced with the first ping on one of his LS1028A boards.

$ ping 1[   33.258069] pcieport 0000:00:1f.0: AER: Multiple Corrected error received: 0000:00:00.0
72.16.0.1
PING [   33.267050] pcieport 0000:00:1f.0: AER: can't find device of ID0000
172.16.0.1 (172.16.0.1): 56 data bytes
64 bytes from 172.16.0.1: seq=0 ttl=64 time=17.124 ms
64 bytes from 172.16.0.1: seq=1 ttl=64 time=0.273 ms

$ devmem 0x1f8010e10 32
0xC0000006

It isn't clear why this is necessary, but it seems that for the errors
to go away, we must clear the entire RFS and RSS memory, not just for
the ports in use.

Sadly the code is structured in such a way that we can't have unified
logic for the used and unused ports. For the minimal initialization of
an unused port, we need just to enable and ioremap the PF memory space,
and a control buffer descriptor ring. Unused ports must then free the
CBDR because the driver will exit, but used ports can not pick up from
where that code path left, since the CBDR API does not reinitialize a
ring when setting it up, so its producer and consumer indices are out of
sync between the software and hardware state. So a separate
enetc_init_unused_port function was created, and it gets called right
after the PF memory space is enabled.

Fixes: 07bf34a50e32 ("net: enetc: initialize the RFS and RSS memories")
Reported-by: Michael Walle <michael@walle.cc>
Cc: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Tested-by: Michael Walle <michael@walle.cc>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c  |  8 ++---
 drivers/net/ethernet/freescale/enetc/enetc.h  |  4 +++
 .../net/ethernet/freescale/enetc/enetc_pf.c   | 33 ++++++++++++++++---
 3 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index fdb6b9e8da78..eb45830a1667 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -984,7 +984,7 @@ static void enetc_free_rxtx_rings(struct enetc_ndev_priv *priv)
 		enetc_free_tx_ring(priv->tx_ring[i]);
 }
 
-static int enetc_alloc_cbdr(struct device *dev, struct enetc_cbdr *cbdr)
+int enetc_alloc_cbdr(struct device *dev, struct enetc_cbdr *cbdr)
 {
 	int size = cbdr->bd_count * sizeof(struct enetc_cbd);
 
@@ -1005,7 +1005,7 @@ static int enetc_alloc_cbdr(struct device *dev, struct enetc_cbdr *cbdr)
 	return 0;
 }
 
-static void enetc_free_cbdr(struct device *dev, struct enetc_cbdr *cbdr)
+void enetc_free_cbdr(struct device *dev, struct enetc_cbdr *cbdr)
 {
 	int size = cbdr->bd_count * sizeof(struct enetc_cbd);
 
@@ -1013,7 +1013,7 @@ static void enetc_free_cbdr(struct device *dev, struct enetc_cbdr *cbdr)
 	cbdr->bd_base = NULL;
 }
 
-static void enetc_setup_cbdr(struct enetc_hw *hw, struct enetc_cbdr *cbdr)
+void enetc_setup_cbdr(struct enetc_hw *hw, struct enetc_cbdr *cbdr)
 {
 	/* set CBDR cache attributes */
 	enetc_wr(hw, ENETC_SICAR2,
@@ -1033,7 +1033,7 @@ static void enetc_setup_cbdr(struct enetc_hw *hw, struct enetc_cbdr *cbdr)
 	cbdr->cir = hw->reg + ENETC_SICBDRCIR;
 }
 
-static void enetc_clear_cbdr(struct enetc_hw *hw)
+void enetc_clear_cbdr(struct enetc_hw *hw)
 {
 	enetc_wr(hw, ENETC_SICBDRMR, 0);
 }
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index f8275cef3b5c..8b380fc13314 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -310,6 +310,10 @@ int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type,
 void enetc_set_ethtool_ops(struct net_device *ndev);
 
 /* control buffer descriptor ring (CBDR) */
+int enetc_alloc_cbdr(struct device *dev, struct enetc_cbdr *cbdr);
+void enetc_free_cbdr(struct device *dev, struct enetc_cbdr *cbdr);
+void enetc_setup_cbdr(struct enetc_hw *hw, struct enetc_cbdr *cbdr);
+void enetc_clear_cbdr(struct enetc_hw *hw);
 int enetc_set_mac_flt_entry(struct enetc_si *si, int index,
 			    char *mac_addr, int si_map);
 int enetc_clear_mac_flt_entry(struct enetc_si *si, int index);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index d02ecb2e46ae..62ba4bf56f0d 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -1041,6 +1041,26 @@ static int enetc_init_port_rss_memory(struct enetc_si *si)
 	return err;
 }
 
+static void enetc_init_unused_port(struct enetc_si *si)
+{
+	struct device *dev = &si->pdev->dev;
+	struct enetc_hw *hw = &si->hw;
+	int err;
+
+	si->cbd_ring.bd_count = ENETC_CBDR_DEFAULT_SIZE;
+	err = enetc_alloc_cbdr(dev, &si->cbd_ring);
+	if (err)
+		return;
+
+	enetc_setup_cbdr(hw, &si->cbd_ring);
+
+	enetc_init_port_rfs_memory(si);
+	enetc_init_port_rss_memory(si);
+
+	enetc_clear_cbdr(hw);
+	enetc_free_cbdr(dev, &si->cbd_ring);
+}
+
 static int enetc_pf_probe(struct pci_dev *pdev,
 			  const struct pci_device_id *ent)
 {
@@ -1051,11 +1071,6 @@ static int enetc_pf_probe(struct pci_dev *pdev,
 	struct enetc_pf *pf;
 	int err;
 
-	if (node && !of_device_is_available(node)) {
-		dev_info(&pdev->dev, "device is disabled, skipping\n");
-		return -ENODEV;
-	}
-
 	err = enetc_pci_probe(pdev, KBUILD_MODNAME, sizeof(*pf));
 	if (err) {
 		dev_err(&pdev->dev, "PCI probing failed\n");
@@ -1069,6 +1084,13 @@ static int enetc_pf_probe(struct pci_dev *pdev,
 		goto err_map_pf_space;
 	}
 
+	if (node && !of_device_is_available(node)) {
+		enetc_init_unused_port(si);
+		dev_info(&pdev->dev, "device is disabled, skipping\n");
+		err = -ENODEV;
+		goto err_device_disabled;
+	}
+
 	pf = enetc_si_priv(si);
 	pf->si = si;
 	pf->total_vfs = pci_sriov_get_totalvfs(pdev);
@@ -1151,6 +1173,7 @@ err_alloc_si_res:
 	si->ndev = NULL;
 	free_netdev(ndev);
 err_alloc_netdev:
+err_device_disabled:
 err_map_pf_space:
 	enetc_pci_remove(pdev);
 

From 6d36ecdbc4410e61a0e02adc5d3abeee22a8ffd3 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 1 Mar 2021 13:18:13 +0200
Subject: [PATCH 068/164] net: enetc: take the MDIO lock only once per NAPI
 poll cycle

The workaround for the ENETC MDIO erratum caused a performance
degradation of 82 Kpps (seen with IP forwarding of two 1Gbps streams of
64B packets). This is due to excessive locking and unlocking in the fast
path, which can be avoided.

By taking the MDIO read-side lock only once per NAPI poll cycle, we are
able to regain 54 Kpps (65%) of the performance hit. The rest of the
performance degradation comes from the TX data path, but unfortunately
it doesn't look like we can optimize that away easily, even with
netdev_xmit_more(), there just isn't any skb batching done, to help with
taking the MDIO lock less often than once per packet.

We need to change the register accessor type for enetc_get_tx_tstamp,
because it now runs under the enetc_lock_mdio as per the new call path
detailed below:

enetc_msix
-> napi_schedule
   -> enetc_poll
      -> enetc_lock_mdio
      -> enetc_clean_tx_ring
         -> enetc_get_tx_tstamp
      -> enetc_clean_rx_ring
      -> enetc_unlock_mdio

Fixes: fd5736bf9f23 ("enetc: Workaround for MDIO register access issue")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c  | 31 ++++++-------------
 .../net/ethernet/freescale/enetc/enetc_hw.h   |  2 ++
 2 files changed, 11 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index eb45830a1667..9bcceb74fb9c 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -281,6 +281,8 @@ static int enetc_poll(struct napi_struct *napi, int budget)
 	int work_done;
 	int i;
 
+	enetc_lock_mdio();
+
 	for (i = 0; i < v->count_tx_rings; i++)
 		if (!enetc_clean_tx_ring(&v->tx_ring[i], budget))
 			complete = false;
@@ -291,8 +293,10 @@ static int enetc_poll(struct napi_struct *napi, int budget)
 	if (work_done)
 		v->rx_napi_work = true;
 
-	if (!complete)
+	if (!complete) {
+		enetc_unlock_mdio();
 		return budget;
+	}
 
 	napi_complete_done(napi, work_done);
 
@@ -301,8 +305,6 @@ static int enetc_poll(struct napi_struct *napi, int budget)
 
 	v->rx_napi_work = false;
 
-	enetc_lock_mdio();
-
 	/* enable interrupts */
 	enetc_wr_reg_hot(v->rbier, ENETC_RBIER_RXTIE);
 
@@ -327,8 +329,8 @@ static void enetc_get_tx_tstamp(struct enetc_hw *hw, union enetc_tx_bd *txbd,
 {
 	u32 lo, hi, tstamp_lo;
 
-	lo = enetc_rd(hw, ENETC_SICTR0);
-	hi = enetc_rd(hw, ENETC_SICTR1);
+	lo = enetc_rd_hot(hw, ENETC_SICTR0);
+	hi = enetc_rd_hot(hw, ENETC_SICTR1);
 	tstamp_lo = le32_to_cpu(txbd->wb.tstamp);
 	if (lo <= tstamp_lo)
 		hi -= 1;
@@ -358,9 +360,7 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
 	i = tx_ring->next_to_clean;
 	tx_swbd = &tx_ring->tx_swbd[i];
 
-	enetc_lock_mdio();
 	bds_to_clean = enetc_bd_ready_count(tx_ring, i);
-	enetc_unlock_mdio();
 
 	do_tstamp = false;
 
@@ -403,8 +403,6 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
 			tx_swbd = tx_ring->tx_swbd;
 		}
 
-		enetc_lock_mdio();
-
 		/* BD iteration loop end */
 		if (is_eof) {
 			tx_frm_cnt++;
@@ -415,8 +413,6 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
 
 		if (unlikely(!bds_to_clean))
 			bds_to_clean = enetc_bd_ready_count(tx_ring, i);
-
-		enetc_unlock_mdio();
 	}
 
 	tx_ring->next_to_clean = i;
@@ -660,8 +656,6 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 		u32 bd_status;
 		u16 size;
 
-		enetc_lock_mdio();
-
 		if (cleaned_cnt >= ENETC_RXBD_BUNDLE) {
 			int count = enetc_refill_rx_ring(rx_ring, cleaned_cnt);
 
@@ -672,19 +666,15 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 
 		rxbd = enetc_rxbd(rx_ring, i);
 		bd_status = le32_to_cpu(rxbd->r.lstatus);
-		if (!bd_status) {
-			enetc_unlock_mdio();
+		if (!bd_status)
 			break;
-		}
 
 		enetc_wr_reg_hot(rx_ring->idr, BIT(rx_ring->index));
 		dma_rmb(); /* for reading other rxbd fields */
 		size = le16_to_cpu(rxbd->r.buf_len);
 		skb = enetc_map_rx_buff_to_skb(rx_ring, i, size);
-		if (!skb) {
-			enetc_unlock_mdio();
+		if (!skb)
 			break;
-		}
 
 		enetc_get_offloads(rx_ring, rxbd, skb);
 
@@ -696,7 +686,6 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 
 		if (unlikely(bd_status &
 			     ENETC_RXBD_LSTATUS(ENETC_RXBD_ERR_MASK))) {
-			enetc_unlock_mdio();
 			dev_kfree_skb(skb);
 			while (!(bd_status & ENETC_RXBD_LSTATUS_F)) {
 				dma_rmb();
@@ -736,8 +725,6 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 
 		enetc_process_skb(rx_ring, skb);
 
-		enetc_unlock_mdio();
-
 		napi_gro_receive(napi, skb);
 
 		rx_frm_cnt++;
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
index c71fe8d751d5..8b54562f5da6 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
@@ -453,6 +453,8 @@ static inline u64 _enetc_rd_reg64_wa(void __iomem *reg)
 #define enetc_wr_reg(reg, val)		_enetc_wr_reg_wa((reg), (val))
 #define enetc_rd(hw, off)		enetc_rd_reg((hw)->reg + (off))
 #define enetc_wr(hw, off, val)		enetc_wr_reg((hw)->reg + (off), val)
+#define enetc_rd_hot(hw, off)		enetc_rd_reg_hot((hw)->reg + (off))
+#define enetc_wr_hot(hw, off, val)	enetc_wr_reg_hot((hw)->reg + (off), val)
 #define enetc_rd64(hw, off)		_enetc_rd_reg64_wa((hw)->reg + (off))
 /* port register accessors - PF only */
 #define enetc_port_rd(hw, off)		enetc_rd_reg((hw)->port + (off))

From 827b6fd046516af605e190c872949f22208b5d41 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 1 Mar 2021 13:18:14 +0200
Subject: [PATCH 069/164] net: enetc: fix incorrect TPID when receiving 802.1ad
 tagged packets

When the enetc ports have rx-vlan-offload enabled, they report a TPID of
ETH_P_8021Q regardless of what was actually in the packet. When
rx-vlan-offload is disabled, packets have the proper TPID. Fix this
inconsistency by finishing the TODO left in the code.

Fixes: d4fd0404c1c9 ("enetc: Introduce basic PF and VF ENETC ethernet drivers")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c  | 34 ++++++++++++++-----
 .../net/ethernet/freescale/enetc/enetc_hw.h   |  3 ++
 2 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 9bcceb74fb9c..8ddf0cdc37a5 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -523,9 +523,8 @@ static void enetc_get_rx_tstamp(struct net_device *ndev,
 static void enetc_get_offloads(struct enetc_bdr *rx_ring,
 			       union enetc_rx_bd *rxbd, struct sk_buff *skb)
 {
-#ifdef CONFIG_FSL_ENETC_PTP_CLOCK
 	struct enetc_ndev_priv *priv = netdev_priv(rx_ring->ndev);
-#endif
+
 	/* TODO: hashing */
 	if (rx_ring->ndev->features & NETIF_F_RXCSUM) {
 		u16 inet_csum = le16_to_cpu(rxbd->r.inet_csum);
@@ -534,12 +533,31 @@ static void enetc_get_offloads(struct enetc_bdr *rx_ring,
 		skb->ip_summed = CHECKSUM_COMPLETE;
 	}
 
-	/* copy VLAN to skb, if one is extracted, for now we assume it's a
-	 * standard TPID, but HW also supports custom values
-	 */
-	if (le16_to_cpu(rxbd->r.flags) & ENETC_RXBD_FLAG_VLAN)
-		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
-				       le16_to_cpu(rxbd->r.vlan_opt));
+	if (le16_to_cpu(rxbd->r.flags) & ENETC_RXBD_FLAG_VLAN) {
+		__be16 tpid = 0;
+
+		switch (le16_to_cpu(rxbd->r.flags) & ENETC_RXBD_FLAG_TPID) {
+		case 0:
+			tpid = htons(ETH_P_8021Q);
+			break;
+		case 1:
+			tpid = htons(ETH_P_8021AD);
+			break;
+		case 2:
+			tpid = htons(enetc_port_rd(&priv->si->hw,
+						   ENETC_PCVLANR1));
+			break;
+		case 3:
+			tpid = htons(enetc_port_rd(&priv->si->hw,
+						   ENETC_PCVLANR2));
+			break;
+		default:
+			break;
+		}
+
+		__vlan_hwaccel_put_tag(skb, tpid, le16_to_cpu(rxbd->r.vlan_opt));
+	}
+
 #ifdef CONFIG_FSL_ENETC_PTP_CLOCK
 	if (priv->active_offloads & ENETC_F_RX_TSTAMP)
 		enetc_get_rx_tstamp(rx_ring->ndev, rxbd, skb);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
index 8b54562f5da6..a62604a1e54e 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
@@ -172,6 +172,8 @@ enum enetc_bdr_type {TX, RX};
 #define ENETC_PSIPMAR0(n)	(0x0100 + (n) * 0x8) /* n = SI index */
 #define ENETC_PSIPMAR1(n)	(0x0104 + (n) * 0x8)
 #define ENETC_PVCLCTR		0x0208
+#define ENETC_PCVLANR1		0x0210
+#define ENETC_PCVLANR2		0x0214
 #define ENETC_VLAN_TYPE_C	BIT(0)
 #define ENETC_VLAN_TYPE_S	BIT(1)
 #define ENETC_PVCLCTR_OVTPIDL(bmp)	((bmp) & 0xff) /* VLAN_TYPE */
@@ -570,6 +572,7 @@ union enetc_rx_bd {
 #define ENETC_RXBD_LSTATUS(flags)	((flags) << 16)
 #define ENETC_RXBD_FLAG_VLAN	BIT(9)
 #define ENETC_RXBD_FLAG_TSTMP	BIT(10)
+#define ENETC_RXBD_FLAG_TPID	GENMASK(1, 0)
 
 #define ENETC_MAC_ADDR_FILT_CNT	8 /* # of supported entries per port */
 #define EMETC_MAC_ADDR_FILT_RES	3 /* # of reserved entries at the beginning */

From a74dbce9d4541888fe0d39afe69a3a95004669b4 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 1 Mar 2021 13:18:15 +0200
Subject: [PATCH 070/164] net: enetc: don't disable VLAN filtering in
 IFF_PROMISC mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Quoting from the blamed commit:

    In promiscuous mode, it is more intuitive that all traffic is received,
    including VLAN tagged traffic. It appears that it is necessary to set
    the flag in PSIPVMR for that to be the case, so VLAN promiscuous mode is
    also temporarily enabled. On exit from promiscuous mode, the setting
    made by ethtool is restored.

Intuitive or not, there isn't any definition issued by a standards body
which says that promiscuity has anything to do with VLAN filtering - it
only has to do with accepting packets regardless of destination MAC address.

In fact people are already trying to use this misunderstanding/bug of
the enetc driver as a justification to transform promiscuity into
something it never was about: accepting every packet (maybe that would
be the "rx-all" netdev feature?):
https://lore.kernel.org/netdev/20201110153958.ci5ekor3o2ekg3ky@ipetronik.com/

This is relevant because there are use cases in the kernel (such as
tc-flower rules with the protocol 802.1Q and a vlan_id key) which do not
(yet) use the vlan_vid_add API to be compatible with VLAN-filtering NICs
such as enetc, so for those, disabling rx-vlan-filter is currently the
only right solution to make these setups work:
https://lore.kernel.org/netdev/CA+h21hoxwRdhq4y+w8Kwgm74d4cA0xLeiHTrmT-VpSaM7obhkg@mail.gmail.com/
The blamed patch has unintentionally introduced one more way for this to
work, which is to enable IFF_PROMISC, however this is non-portable
because port promiscuity is not meant to disable VLAN filtering.
Therefore, it could invite people to write broken scripts for enetc, and
then wonder why they are broken when migrating to other drivers that
don't handle promiscuity in the same way.

Fixes: 7070eea5e95a ("enetc: permit configuration of rx-vlan-filter with ethtool")
Cc: Markus Blöchl <Markus.Bloechl@ipetronik.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc_pf.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index 62ba4bf56f0d..49681a0566ed 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -190,7 +190,6 @@ static void enetc_pf_set_rx_mode(struct net_device *ndev)
 {
 	struct enetc_ndev_priv *priv = netdev_priv(ndev);
 	struct enetc_pf *pf = enetc_si_priv(priv->si);
-	char vlan_promisc_simap = pf->vlan_promisc_simap;
 	struct enetc_hw *hw = &priv->si->hw;
 	bool uprom = false, mprom = false;
 	struct enetc_mac_filter *filter;
@@ -203,16 +202,12 @@ static void enetc_pf_set_rx_mode(struct net_device *ndev)
 		psipmr = ENETC_PSIPMR_SET_UP(0) | ENETC_PSIPMR_SET_MP(0);
 		uprom = true;
 		mprom = true;
-		/* Enable VLAN promiscuous mode for SI0 (PF) */
-		vlan_promisc_simap |= BIT(0);
 	} else if (ndev->flags & IFF_ALLMULTI) {
 		/* enable multi cast promisc mode for SI0 (PF) */
 		psipmr = ENETC_PSIPMR_SET_MP(0);
 		mprom = true;
 	}
 
-	enetc_set_vlan_promisc(&pf->si->hw, vlan_promisc_simap);
-
 	/* first 2 filter entries belong to PF */
 	if (!uprom) {
 		/* Update unicast filters */

From c76a97218dcbb2cb7cec1404ace43ef96c87d874 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 1 Mar 2021 13:18:16 +0200
Subject: [PATCH 071/164] net: enetc: force the RGMII speed and duplex instead
 of operating in inband mode

The ENETC port 0 MAC supports in-band status signaling coming from a PHY
when operating in RGMII mode, and this feature is enabled by default.

It has been reported that RGMII is broken in fixed-link, and that is not
surprising considering the fact that no PHY is attached to the MAC in
that case, but a switch.

This brings us to the topic of the patch: the enetc driver should have
not enabled the optional in-band status signaling for RGMII unconditionally,
but should have forced the speed and duplex to what was resolved by
phylink.

Note that phylink does not accept the RGMII modes as valid for in-band
signaling, and these operate a bit differently than 1000base-x and SGMII
(notably there is no clause 37 state machine so no ACK required from the
MAC, instead the PHY sends extra code words on RXD[3:0] whenever it is
not transmitting something else, so it should be safe to leave a PHY
with this option unconditionally enabled even if we ignore it). The spec
talks about this here:
https://e2e.ti.com/cfs-file/__key/communityserver-discussions-components-files/138/RGMIIv1_5F00_3.pdf

Fixes: 71b77a7a27a3 ("enetc: Migrate to PHYLINK and PCS_LYNX")
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: Andrew Lunn <andrew@lunn.ch>
Cc: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/freescale/enetc/enetc_hw.h   | 13 +++--
 .../net/ethernet/freescale/enetc/enetc_pf.c   | 53 ++++++++++++++++---
 2 files changed, 56 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
index a62604a1e54e..de0d20b0f489 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
@@ -238,10 +238,17 @@ enum enetc_bdr_type {TX, RX};
 #define ENETC_PM_IMDIO_BASE	0x8030
 
 #define ENETC_PM0_IF_MODE	0x8300
-#define ENETC_PMO_IFM_RG	BIT(2)
+#define ENETC_PM0_IFM_RG	BIT(2)
 #define ENETC_PM0_IFM_RLP	(BIT(5) | BIT(11))
-#define ENETC_PM0_IFM_RGAUTO	(BIT(15) | ENETC_PMO_IFM_RG | BIT(1))
-#define ENETC_PM0_IFM_XGMII	BIT(12)
+#define ENETC_PM0_IFM_EN_AUTO	BIT(15)
+#define ENETC_PM0_IFM_SSP_MASK	GENMASK(14, 13)
+#define ENETC_PM0_IFM_SSP_1000	(2 << 13)
+#define ENETC_PM0_IFM_SSP_100	(0 << 13)
+#define ENETC_PM0_IFM_SSP_10	(1 << 13)
+#define ENETC_PM0_IFM_FULL_DPX	BIT(12)
+#define ENETC_PM0_IFM_IFMODE_MASK GENMASK(1, 0)
+#define ENETC_PM0_IFM_IFMODE_XGMII 0
+#define ENETC_PM0_IFM_IFMODE_GMII 2
 #define ENETC_PSIDCAPR		0x1b08
 #define ENETC_PSIDCAPR_MSK	GENMASK(15, 0)
 #define ENETC_PSFCAPR		0x1b18
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index 49681a0566ed..ca02f033bea2 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -315,7 +315,7 @@ static void enetc_set_loopback(struct net_device *ndev, bool en)
 	u32 reg;
 
 	reg = enetc_port_rd(hw, ENETC_PM0_IF_MODE);
-	if (reg & ENETC_PMO_IFM_RG) {
+	if (reg & ENETC_PM0_IFM_RG) {
 		/* RGMII mode */
 		reg = (reg & ~ENETC_PM0_IFM_RLP) |
 		      (en ? ENETC_PM0_IFM_RLP : 0);
@@ -494,13 +494,20 @@ static void enetc_configure_port_mac(struct enetc_hw *hw)
 
 static void enetc_mac_config(struct enetc_hw *hw, phy_interface_t phy_mode)
 {
-	/* set auto-speed for RGMII */
-	if (enetc_port_rd(hw, ENETC_PM0_IF_MODE) & ENETC_PMO_IFM_RG ||
-	    phy_interface_mode_is_rgmii(phy_mode))
-		enetc_port_wr(hw, ENETC_PM0_IF_MODE, ENETC_PM0_IFM_RGAUTO);
+	u32 val;
 
-	if (phy_mode == PHY_INTERFACE_MODE_USXGMII)
-		enetc_port_wr(hw, ENETC_PM0_IF_MODE, ENETC_PM0_IFM_XGMII);
+	if (phy_interface_mode_is_rgmii(phy_mode)) {
+		val = enetc_port_rd(hw, ENETC_PM0_IF_MODE);
+		val &= ~ENETC_PM0_IFM_EN_AUTO;
+		val &= ENETC_PM0_IFM_IFMODE_MASK;
+		val |= ENETC_PM0_IFM_IFMODE_GMII | ENETC_PM0_IFM_RG;
+		enetc_port_wr(hw, ENETC_PM0_IF_MODE, val);
+	}
+
+	if (phy_mode == PHY_INTERFACE_MODE_USXGMII) {
+		val = ENETC_PM0_IFM_FULL_DPX | ENETC_PM0_IFM_IFMODE_XGMII;
+		enetc_port_wr(hw, ENETC_PM0_IF_MODE, val);
+	}
 }
 
 static void enetc_mac_enable(struct enetc_hw *hw, bool en)
@@ -932,6 +939,34 @@ static void enetc_pl_mac_config(struct phylink_config *config,
 		phylink_set_pcs(priv->phylink, &pf->pcs->pcs);
 }
 
+static void enetc_force_rgmii_mac(struct enetc_hw *hw, int speed, int duplex)
+{
+	u32 old_val, val;
+
+	old_val = val = enetc_port_rd(hw, ENETC_PM0_IF_MODE);
+
+	if (speed == SPEED_1000) {
+		val &= ~ENETC_PM0_IFM_SSP_MASK;
+		val |= ENETC_PM0_IFM_SSP_1000;
+	} else if (speed == SPEED_100) {
+		val &= ~ENETC_PM0_IFM_SSP_MASK;
+		val |= ENETC_PM0_IFM_SSP_100;
+	} else if (speed == SPEED_10) {
+		val &= ~ENETC_PM0_IFM_SSP_MASK;
+		val |= ENETC_PM0_IFM_SSP_10;
+	}
+
+	if (duplex == DUPLEX_FULL)
+		val |= ENETC_PM0_IFM_FULL_DPX;
+	else
+		val &= ~ENETC_PM0_IFM_FULL_DPX;
+
+	if (val == old_val)
+		return;
+
+	enetc_port_wr(hw, ENETC_PM0_IF_MODE, val);
+}
+
 static void enetc_pl_mac_link_up(struct phylink_config *config,
 				 struct phy_device *phy, unsigned int mode,
 				 phy_interface_t interface, int speed,
@@ -944,6 +979,10 @@ static void enetc_pl_mac_link_up(struct phylink_config *config,
 	if (priv->active_offloads & ENETC_F_QBV)
 		enetc_sched_speed_set(priv, speed);
 
+	if (!phylink_autoneg_inband(mode) &&
+	    phy_interface_mode_is_rgmii(interface))
+		enetc_force_rgmii_mac(&pf->si->hw, speed, duplex);
+
 	enetc_mac_enable(&pf->si->hw, true);
 }
 

From 96a5223b918c8b79270fc0fec235a7ebad459098 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 1 Mar 2021 13:18:17 +0200
Subject: [PATCH 072/164] net: enetc: remove bogus write to SIRXIDR from
 enetc_setup_rxbdr

The Station Interface Receive Interrupt Detect Register (SIRXIDR)
contains a 16-bit wide mask of 'interrupt detected' events for each ring
associated with a port. Bit i is write-1-to-clean for RX ring i.

I have no explanation whatsoever how this line of code came to be
inserted in the blamed commit. I checked the downstream versions of that
patch and none of them have it.

The somewhat comical aspect of it is that we're writing a binary number
to the SIRXIDR register, which is derived from enetc_bd_unused(rx_ring).
Since the RX rings have 512 buffer descriptors, we end up writing 511 to
this register, which is 0x1ff, so we are effectively clearing the
'interrupt detected' event for rings 0-8.

This register is not what is used for interrupt handling though - it
only provides a summary for the entire SI. The hardware provides one
separate Interrupt Detect Register per RX ring, which auto-clears upon
read. So there doesn't seem to be any adverse effect caused by this
bogus write.

There is, however, one reason why this should be handled as a bugfix:
next_to_clean _should_ be committed to hardware, just not to that
register, and this was obscuring the fact that it wasn't. This is fixed
in the next patch, and removing the bogus line now allows the fix patch
to be backported beyond that point.

Fixes: fd5736bf9f23 ("enetc: Workaround for MDIO register access issue")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 8ddf0cdc37a5..abb29ee81463 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -1212,7 +1212,6 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring)
 	rx_ring->idr = hw->reg + ENETC_SIRXIDR;
 
 	enetc_refill_rx_ring(rx_ring, enetc_bd_unused(rx_ring));
-	enetc_wr(hw, ENETC_SIRXIDR, rx_ring->next_to_use);
 
 	/* enable ring */
 	enetc_rxbdr_wr(hw, idx, ENETC_RBMR, rbmr);

From 3a5d12c9be6f30080600c8bacaf310194e37d029 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 1 Mar 2021 13:18:18 +0200
Subject: [PATCH 073/164] net: enetc: keep RX ring consumer index in sync with
 hardware

The RX rings have a producer index owned by hardware, where newly
received frame buffers are placed, and a consumer index owned by
software, where newly allocated buffers are placed, in expectation of
hardware being able to place frame data in them.

Hardware increments the producer index when a frame is received, however
it is not allowed to increment the producer index to match the consumer
index (RBCIR) since the ring can hold at most RBLENR[LENGTH]-1 received
BDs. Whenever the producer index matches the value of the consumer
index, the ring has no unprocessed received frames and all BDs in the
ring have been initialized/prepared by software, i.e. hardware owns all
BDs in the ring.

The code uses the next_to_clean variable to keep track of the producer
index, and the next_to_use variable to keep track of the consumer index.

The RX rings are seeded from enetc_refill_rx_ring, which is called from
two places:

1. initially the ring is seeded until full with enetc_bd_unused(rx_ring),
   i.e. with 511 buffers. This will make next_to_clean=0 and next_to_use=511:

.ndo_open
-> enetc_open
   -> enetc_setup_bdrs
      -> enetc_setup_rxbdr
         -> enetc_refill_rx_ring

2. then during the data path processing, it is refilled with 16 buffers
   at a time:

enetc_msix
-> napi_schedule
   -> enetc_poll
      -> enetc_clean_rx_ring
         -> enetc_refill_rx_ring

There is just one problem: the initial seeding done during .ndo_open
updates just the producer index (ENETC_RBPIR) with 0, and the software
next_to_clean and next_to_use variables. Notably, it will not update the
consumer index to make the hardware aware of the newly added buffers.

Wait, what? So how does it work?

Well, the reset values of the producer index and of the consumer index
of a ring are both zero. As per the description in the second paragraph,
it means that the ring is full of buffers waiting for hardware to put
frames in them, which by coincidence is almost true, because we have in
fact seeded 511 buffers into the ring.

But will the hardware attempt to access the 512th entry of the ring,
which has an invalid BD in it? Well, no, because in order to do that, it
would have to first populate the first 511 entries, and the NAPI
enetc_poll will kick in by then. Eventually, after 16 processed slots
have become available in the RX ring, enetc_clean_rx_ring will call
enetc_refill_rx_ring and then will [ finally ] update the consumer index
with the new software next_to_use variable. From now on, the
next_to_clean and next_to_use variables are in sync with the producer
and consumer ring indices.

So the day is saved, right? Well, not quite. Freeing the memory
allocated for the rings is done in:

enetc_close
-> enetc_clear_bdrs
   -> enetc_clear_rxbdr
      -> this just disables the ring
-> enetc_free_rxtx_rings
   -> enetc_free_rx_ring
      -> sets next_to_clean and next_to_use to 0

but again, nothing is committed to the hardware producer and consumer
indices (yay!). The assumption is that the ring is disabled, so the
indices don't matter anyway, and it's the responsibility of the "open"
code path to set those up.

.. Except that the "open" code path does not set those up properly.

While initially, things almost work, during subsequent enetc_close ->
enetc_open sequences, we have problems. To be precise, the enetc_open
that is subsequent to enetc_close will again refill the ring with 511
entries, but it will leave the consumer index untouched. Untouched
means, of course, equal to the value it had before disabling the ring
and draining the old buffers in enetc_close.

But as mentioned, enetc_setup_rxbdr will at least update the producer
index though, through this line of code:

	enetc_rxbdr_wr(hw, idx, ENETC_RBPIR, 0);

so at this stage we'll have:

next_to_clean=0 (in hardware 0)
next_to_use=511 (in hardware we'll have the refill index prior to enetc_close)

Again, the next_to_clean and producer index are in sync and set to
correct values, so the driver manages to limp on. Eventually, 16 ring
entries will be consumed by enetc_poll, and the savior
enetc_clean_rx_ring will come and call enetc_refill_rx_ring, and then
update the hardware consumer ring based upon the new next_to_use.

So.. it works?
Well, by coincidence, it almost does, but there's a circumstance where
enetc_clean_rx_ring won't be there to save us. If the previous value of
the consumer index was 15, there's a problem, because the NAPI poll
sequence will only issue a refill when 16 or more buffers have been
consumed.

It's easiest to illustrate this with an example:

ip link set eno0 up
ip addr add 192.168.100.1/24 dev eno0
ping 192.168.100.1 -c 20 # ping this port from another board
ip link set eno0 down
ip link set eno0 up
ping 192.168.100.1 -c 20 # ping it again from the same other board

One by one:

1. ip link set eno0 up
-> calls enetc_setup_rxbdr:
   -> calls enetc_refill_rx_ring(511 buffers)
   -> next_to_clean=0 (in hw 0)
   -> next_to_use=511 (in hw 0)

2. ping 192.168.100.1 -c 20 # ping this port from another board
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=1 next_to_clean 0 (in hw 1) next_to_use 511 (in hw 0)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=2 next_to_clean 1 (in hw 2) next_to_use 511 (in hw 0)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=3 next_to_clean 2 (in hw 3) next_to_use 511 (in hw 0)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=4 next_to_clean 3 (in hw 4) next_to_use 511 (in hw 0)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=5 next_to_clean 4 (in hw 5) next_to_use 511 (in hw 0)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=6 next_to_clean 5 (in hw 6) next_to_use 511 (in hw 0)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=7 next_to_clean 6 (in hw 7) next_to_use 511 (in hw 0)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=8 next_to_clean 7 (in hw 8) next_to_use 511 (in hw 0)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=9 next_to_clean 8 (in hw 9) next_to_use 511 (in hw 0)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=10 next_to_clean 9 (in hw 10) next_to_use 511 (in hw 0)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=11 next_to_clean 10 (in hw 11) next_to_use 511 (in hw 0)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=12 next_to_clean 11 (in hw 12) next_to_use 511 (in hw 0)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=13 next_to_clean 12 (in hw 13) next_to_use 511 (in hw 0)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=14 next_to_clean 13 (in hw 14) next_to_use 511 (in hw 0)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=15 next_to_clean 14 (in hw 15) next_to_use 511 (in hw 0)
enetc_clean_rx_ring: enetc_refill_rx_ring(16) increments next_to_use by 16 (mod 512) and writes it to hw
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=0 next_to_clean 15 (in hw 16) next_to_use 15 (in hw 15)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=1 next_to_clean 16 (in hw 17) next_to_use 15 (in hw 15)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=2 next_to_clean 17 (in hw 18) next_to_use 15 (in hw 15)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=3 next_to_clean 18 (in hw 19) next_to_use 15 (in hw 15)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=4 next_to_clean 19 (in hw 20) next_to_use 15 (in hw 15)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=5 next_to_clean 20 (in hw 21) next_to_use 15 (in hw 15)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=6 next_to_clean 21 (in hw 22) next_to_use 15 (in hw 15)

20 packets transmitted, 20 packets received, 0% packet loss

3. ip link set eno0 down
enetc_free_rx_ring: next_to_clean 0 (in hw 22), next_to_use 0 (in hw 15)

4. ip link set eno0 up
-> calls enetc_setup_rxbdr:
   -> calls enetc_refill_rx_ring(511 buffers)
   -> next_to_clean=0 (in hw 0)
   -> next_to_use=511 (in hw 15)

5. ping 192.168.100.1 -c 20 # ping it again from the same other board
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=1 next_to_clean 0 (in hw 1) next_to_use 511 (in hw 15)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=2 next_to_clean 1 (in hw 2) next_to_use 511 (in hw 15)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=3 next_to_clean 2 (in hw 3) next_to_use 511 (in hw 15)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=4 next_to_clean 3 (in hw 4) next_to_use 511 (in hw 15)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=5 next_to_clean 4 (in hw 5) next_to_use 511 (in hw 15)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=6 next_to_clean 5 (in hw 6) next_to_use 511 (in hw 15)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=7 next_to_clean 6 (in hw 7) next_to_use 511 (in hw 15)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=8 next_to_clean 7 (in hw 8) next_to_use 511 (in hw 15)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=9 next_to_clean 8 (in hw 9) next_to_use 511 (in hw 15)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=10 next_to_clean 9 (in hw 10) next_to_use 511 (in hw 15)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=11 next_to_clean 10 (in hw 11) next_to_use 511 (in hw 15)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=12 next_to_clean 11 (in hw 12) next_to_use 511 (in hw 15)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=13 next_to_clean 12 (in hw 13) next_to_use 511 (in hw 15)
enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=14 next_to_clean 13 (in hw 14) next_to_use 511 (in hw 15)

20 packets transmitted, 12 packets received, 40% packet loss

And there it dies. No enetc_refill_rx_ring (because cleaned_cnt must be equal
to 15 for that to happen), no nothing. The hardware enters the condition where
the producer (14) + 1 is equal to the consumer (15) index, which makes it
believe it has no more free buffers to put packets in, so it starts discarding
them:

ip netns exec ns0 ethtool -S eno0 | grep -v ': 0'
NIC statistics:
     Rx ring  0 discarded frames: 8

Summarized, if the interface receives between 16 and 32 (mod 512) frames
and then there is a link flap, then the port will eventually die with no
way to recover. If it receives less than 16 (mod 512) frames, then the
initial NAPI poll [ before the link flap ] will not update the consumer
index in hardware (it will remain zero) which will be ok when the buffers
are later reinitialized. If more than 32 (mod 512) frames are received,
the initial NAPI poll has the chance to refill the ring twice, updating
the consumer index to at least 32. So after the link flap, the consumer
index is still wrong, but the post-flap NAPI poll gets a chance to
refill the ring once (because it passes through cleaned_cnt=15) and
makes the consumer index be again back in sync with next_to_use.

The solution to this problem is actually simple, we just need to write
next_to_use into the hardware consumer index at enetc_open time, which
always brings it back in sync after an initial buffer seeding process.

The simpler thing would be to put the write to the consumer index into
enetc_refill_rx_ring directly, but there are issues with the MDIO
locking: in the NAPI poll code we have the enetc_lock_mdio() taken from
top-level and we use the unlocked enetc_wr_reg_hot, whereas in
enetc_open, the enetc_lock_mdio() is not taken at the top level, but
instead by each individual enetc_wr_reg, so we are forced to put an
additional enetc_wr_reg in enetc_setup_rxbdr. Better organization of
the code is left as a refactoring exercise.

Fixes: d4fd0404c1c9 ("enetc: Introduce basic PF and VF ENETC ethernet drivers")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index abb29ee81463..30d7d4e83900 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -1212,6 +1212,8 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring)
 	rx_ring->idr = hw->reg + ENETC_SIRXIDR;
 
 	enetc_refill_rx_ring(rx_ring, enetc_bd_unused(rx_ring));
+	/* update ENETC's consumer index */
+	enetc_rxbdr_wr(hw, idx, ENETC_RBCIR, rx_ring->next_to_use);
 
 	/* enable ring */
 	enetc_rxbdr_wr(hw, idx, ENETC_RBMR, rbmr);

From 2353db75c3db1dd26ff9c8feccfd3543a9cb73be Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Mon, 1 Mar 2021 21:28:23 +0900
Subject: [PATCH 074/164] docs: networking: bonding.rst Fix a typo in
 bonding.rst

This patch fixes a spelling typo in bonding.rst.

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/bonding.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst
index 5f690f0ad0e4..62f2aab8eaec 100644
--- a/Documentation/networking/bonding.rst
+++ b/Documentation/networking/bonding.rst
@@ -1988,7 +1988,7 @@ netif_carrier.
 If use_carrier is 0, then the MII monitor will first query the
 device's (via ioctl) MII registers and check the link state.  If that
 request fails (not just that it returns carrier down), then the MII
-monitor will make an ethtool ETHOOL_GLINK request to attempt to obtain
+monitor will make an ethtool ETHTOOL_GLINK request to attempt to obtain
 the same information.  If both methods fail (i.e., the driver either
 does not support or had some error in processing both the MII register
 and ethtool requests), then the MII monitor will assume the link is

From b228c9b058760500fda5edb3134527f629fc2dc3 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 1 Mar 2021 15:09:44 +0000
Subject: [PATCH 075/164] net: expand textsearch ts_state to fit skb_seq_state

The referenced commit expands the skb_seq_state used by
skb_find_text with a 4B frag_off field, growing it to 48B.

This exceeds container ts_state->cb, causing a stack corruption:

[   73.238353] Kernel panic - not syncing: stack-protector: Kernel stack
is corrupted in: skb_find_text+0xc5/0xd0
[   73.247384] CPU: 1 PID: 376 Comm: nping Not tainted 5.11.0+ #4
[   73.252613] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS 1.14.0-2 04/01/2014
[   73.260078] Call Trace:
[   73.264677]  dump_stack+0x57/0x6a
[   73.267866]  panic+0xf6/0x2b7
[   73.270578]  ? skb_find_text+0xc5/0xd0
[   73.273964]  __stack_chk_fail+0x10/0x10
[   73.277491]  skb_find_text+0xc5/0xd0
[   73.280727]  string_mt+0x1f/0x30
[   73.283639]  ipt_do_table+0x214/0x410

The struct is passed between skb_find_text and its callbacks
skb_prepare_seq_read, skb_seq_read and skb_abort_seq read through
the textsearch interface using TS_SKB_CB.

I assumed that this mapped to skb->cb like other .._SKB_CB wrappers.
skb->cb is 48B. But it maps to ts_state->cb, which is only 40B.

skb->cb was increased from 40B to 48B after ts_state was introduced,
in commit 3e3850e989c5 ("[NETFILTER]: Fix xfrm lookup in
ip_route_me_harder/ip6_route_me_harder").

Increase ts_state.cb[] to 48 to fit the struct.

Also add a BUILD_BUG_ON to avoid a repeat.

The alternative is to directly add a dependency from textsearch onto
linux/skbuff.h, but I think the intent is textsearch to have no such
dependencies on its callers.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=211911
Fixes: 97550f6fa592 ("net: compound page support in skb_seq_read")
Reported-by: Kris Karas <bugs-a17@moonlit-rail.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/textsearch.h | 2 +-
 net/core/skbuff.c          | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/linux/textsearch.h b/include/linux/textsearch.h
index 13770cfe33ad..6673e4d4ac2e 100644
--- a/include/linux/textsearch.h
+++ b/include/linux/textsearch.h
@@ -23,7 +23,7 @@ struct ts_config;
 struct ts_state
 {
 	unsigned int		offset;
-	char			cb[40];
+	char			cb[48];
 };
 
 /**
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 545a472273a5..c421c8f80925 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3659,6 +3659,8 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
 	struct ts_state state;
 	unsigned int ret;
 
+	BUILD_BUG_ON(sizeof(struct skb_seq_state) > sizeof(state.cb));
+
 	config->get_next_block = skb_ts_get_next_block;
 	config->finish = skb_ts_finish;
 

From 9200f515c41f4cbaeffd8fdd1d8b6373a18b1b67 Mon Sep 17 00:00:00 2001
From: DENG Qingfang <dqfext@gmail.com>
Date: Tue, 2 Mar 2021 00:01:59 +0800
Subject: [PATCH 076/164] net: dsa: tag_mtk: fix 802.1ad VLAN egress

A different TPID bit is used for 802.1ad VLAN frames.

Reported-by: Ilario Gelmetti <iochesonome@gmail.com>
Fixes: f0af34317f4b ("net: dsa: mediatek: combine MediaTek tag with VLAN tag")
Signed-off-by: DENG Qingfang <dqfext@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/tag_mtk.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index 38dcdded74c0..59748487664f 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -13,6 +13,7 @@
 #define MTK_HDR_LEN		4
 #define MTK_HDR_XMIT_UNTAGGED		0
 #define MTK_HDR_XMIT_TAGGED_TPID_8100	1
+#define MTK_HDR_XMIT_TAGGED_TPID_88A8	2
 #define MTK_HDR_RECV_SOURCE_PORT_MASK	GENMASK(2, 0)
 #define MTK_HDR_XMIT_DP_BIT_MASK	GENMASK(5, 0)
 #define MTK_HDR_XMIT_SA_DIS		BIT(6)
@@ -21,8 +22,8 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
 				    struct net_device *dev)
 {
 	struct dsa_port *dp = dsa_slave_to_port(dev);
+	u8 xmit_tpid;
 	u8 *mtk_tag;
-	bool is_vlan_skb = true;
 	unsigned char *dest = eth_hdr(skb)->h_dest;
 	bool is_multicast_skb = is_multicast_ether_addr(dest) &&
 				!is_broadcast_ether_addr(dest);
@@ -33,10 +34,17 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
 	 * the both special and VLAN tag at the same time and then look up VLAN
 	 * table with VID.
 	 */
-	if (!skb_vlan_tagged(skb)) {
+	switch (skb->protocol) {
+	case htons(ETH_P_8021Q):
+		xmit_tpid = MTK_HDR_XMIT_TAGGED_TPID_8100;
+		break;
+	case htons(ETH_P_8021AD):
+		xmit_tpid = MTK_HDR_XMIT_TAGGED_TPID_88A8;
+		break;
+	default:
+		xmit_tpid = MTK_HDR_XMIT_UNTAGGED;
 		skb_push(skb, MTK_HDR_LEN);
 		memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN);
-		is_vlan_skb = false;
 	}
 
 	mtk_tag = skb->data + 2 * ETH_ALEN;
@@ -44,8 +52,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
 	/* Mark tag attribute on special tag insertion to notify hardware
 	 * whether that's a combined special tag with 802.1Q header.
 	 */
-	mtk_tag[0] = is_vlan_skb ? MTK_HDR_XMIT_TAGGED_TPID_8100 :
-		     MTK_HDR_XMIT_UNTAGGED;
+	mtk_tag[0] = xmit_tpid;
 	mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK;
 
 	/* Disable SA learning for multicast frames */
@@ -53,7 +60,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
 		mtk_tag[1] |= MTK_HDR_XMIT_SA_DIS;
 
 	/* Tag control information is kept for 802.1Q */
-	if (!is_vlan_skb) {
+	if (xmit_tpid == MTK_HDR_XMIT_UNTAGGED) {
 		mtk_tag[2] = 0;
 		mtk_tag[3] = 0;
 	}

From 3946688edbc5b629110c339b3babf10aa9e7adad Mon Sep 17 00:00:00 2001
From: "Andrea Parri (Microsoft)" <parri.andrea@gmail.com>
Date: Mon, 1 Mar 2021 19:25:30 +0100
Subject: [PATCH 077/164] hv_netvsc: Fix validation in
 netvsc_linkstatus_callback()

Contrary to the RNDIS protocol specification, certain (pre-Fe)
implementations of Hyper-V's vSwitch did not account for the status
buffer field in the length of an RNDIS packet; the bug was fixed in
newer implementations.  Validate the status buffer fields using the
length of the 'vmtransfer_page' packet (all implementations), that
is known/validated to be less than or equal to the receive section
size and not smaller than the length of the RNDIS message.

Reported-by: Dexuan Cui <decui@microsoft.com>
Suggested-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Andrea Parri (Microsoft) <parri.andrea@gmail.com>
Fixes: 505e3f00c3f36 ("hv_netvsc: Add (more) validation for untrusted Hyper-V values")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/hyperv_net.h   |  2 +-
 drivers/net/hyperv/netvsc_drv.c   | 13 +++++++++----
 drivers/net/hyperv/rndis_filter.c |  2 +-
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index e1a497d3c9ba..59ac04a610ad 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -229,7 +229,7 @@ int netvsc_send(struct net_device *net,
 		bool xdp_tx);
 void netvsc_linkstatus_callback(struct net_device *net,
 				struct rndis_message *resp,
-				void *data);
+				void *data, u32 data_buflen);
 int netvsc_recv_callback(struct net_device *net,
 			 struct netvsc_device *nvdev,
 			 struct netvsc_channel *nvchan);
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 8176fa0c8b16..15f262b70489 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -744,7 +744,7 @@ static netdev_tx_t netvsc_start_xmit(struct sk_buff *skb,
  */
 void netvsc_linkstatus_callback(struct net_device *net,
 				struct rndis_message *resp,
-				void *data)
+				void *data, u32 data_buflen)
 {
 	struct rndis_indicate_status *indicate = &resp->msg.indicate_status;
 	struct net_device_context *ndev_ctx = netdev_priv(net);
@@ -765,11 +765,16 @@ void netvsc_linkstatus_callback(struct net_device *net,
 	if (indicate->status == RNDIS_STATUS_LINK_SPEED_CHANGE) {
 		u32 speed;
 
-		/* Validate status_buf_offset */
+		/* Validate status_buf_offset and status_buflen.
+		 *
+		 * Certain (pre-Fe) implementations of Hyper-V's vSwitch didn't account
+		 * for the status buffer field in resp->msg_len; perform the validation
+		 * using data_buflen (>= resp->msg_len).
+		 */
 		if (indicate->status_buflen < sizeof(speed) ||
 		    indicate->status_buf_offset < sizeof(*indicate) ||
-		    resp->msg_len - RNDIS_HEADER_SIZE < indicate->status_buf_offset ||
-		    resp->msg_len - RNDIS_HEADER_SIZE - indicate->status_buf_offset
+		    data_buflen - RNDIS_HEADER_SIZE < indicate->status_buf_offset ||
+		    data_buflen - RNDIS_HEADER_SIZE - indicate->status_buf_offset
 				< indicate->status_buflen) {
 			netdev_err(net, "invalid rndis_indicate_status packet\n");
 			return;
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 123cc9d25f5e..c0e89e107d57 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -620,7 +620,7 @@ int rndis_filter_receive(struct net_device *ndev,
 
 	case RNDIS_MSG_INDICATE:
 		/* notification msgs */
-		netvsc_linkstatus_callback(ndev, rndis_msg, data);
+		netvsc_linkstatus_callback(ndev, rndis_msg, data, buflen);
 		break;
 	default:
 		netdev_err(ndev,

From 8811f4a9836e31c14ecdf79d9f3cb7c5d463265d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 1 Mar 2021 10:29:17 -0800
Subject: [PATCH 078/164] tcp: add sanity tests to TCP_QUEUE_SEQ

Qingyu Li reported a syzkaller bug where the repro
changes RCV SEQ _after_ restoring data in the receive queue.

mprotect(0x4aa000, 12288, PROT_READ)    = 0
mmap(0x1ffff000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x1ffff000
mmap(0x20000000, 16777216, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x20000000
mmap(0x21000000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x21000000
socket(AF_INET6, SOCK_STREAM, IPPROTO_IP) = 3
setsockopt(3, SOL_TCP, TCP_REPAIR, [1], 4) = 0
connect(3, {sa_family=AF_INET6, sin6_port=htons(0), sin6_flowinfo=htonl(0), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_scope_id=0}, 28) = 0
setsockopt(3, SOL_TCP, TCP_REPAIR_QUEUE, [1], 4) = 0
sendmsg(3, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="0x0000000000000003\0\0", iov_len=20}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 20
setsockopt(3, SOL_TCP, TCP_REPAIR, [0], 4) = 0
setsockopt(3, SOL_TCP, TCP_QUEUE_SEQ, [128], 4) = 0
recvfrom(3, NULL, 20, 0, NULL, NULL)    = -1 ECONNRESET (Connection reset by peer)

syslog shows:
[  111.205099] TCP recvmsg seq # bug 2: copied 80, seq 0, rcvnxt 80, fl 0
[  111.207894] WARNING: CPU: 1 PID: 356 at net/ipv4/tcp.c:2343 tcp_recvmsg_locked+0x90e/0x29a0

This should not be allowed. TCP_QUEUE_SEQ should only be used
when queues are empty.

This patch fixes this case, and the tx path as well.

Fixes: ee9952831cfd ("tcp: Initial repair mode")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=212005
Reported-by: Qingyu Li <ieatmuttonchuan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index dfb6f286c1de..de7cc8445ac0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3469,16 +3469,23 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
 		break;
 
 	case TCP_QUEUE_SEQ:
-		if (sk->sk_state != TCP_CLOSE)
+		if (sk->sk_state != TCP_CLOSE) {
 			err = -EPERM;
-		else if (tp->repair_queue == TCP_SEND_QUEUE)
-			WRITE_ONCE(tp->write_seq, val);
-		else if (tp->repair_queue == TCP_RECV_QUEUE) {
-			WRITE_ONCE(tp->rcv_nxt, val);
-			WRITE_ONCE(tp->copied_seq, val);
-		}
-		else
+		} else if (tp->repair_queue == TCP_SEND_QUEUE) {
+			if (!tcp_rtx_queue_empty(sk))
+				err = -EPERM;
+			else
+				WRITE_ONCE(tp->write_seq, val);
+		} else if (tp->repair_queue == TCP_RECV_QUEUE) {
+			if (tp->rcv_nxt != tp->copied_seq) {
+				err = -EPERM;
+			} else {
+				WRITE_ONCE(tp->rcv_nxt, val);
+				WRITE_ONCE(tp->copied_seq, val);
+			}
+		} else {
 			err = -EINVAL;
+		}
 		break;
 
 	case TCP_REPAIR_OPTIONS:

From 42a382a466a967dc053c73b969cd2ac2fec502cf Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Sat, 27 Feb 2021 06:17:26 +0100
Subject: [PATCH 079/164] selftests/bpf: Use the last page in test_snprintf_btf
 on s390

test_snprintf_btf fails on s390, because NULL points to a readable
struct lowcore there. Fix by using the last page instead.

Error message example:

    printing fffffffffffff000 should generate error, got (361)

Fixes: 076a95f5aff2 ("selftests/bpf: Add bpf_snprintf_btf helper tests")
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210227051726.121256-1-iii@linux.ibm.com
---
 .../testing/selftests/bpf/progs/netif_receive_skb.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/bpf/progs/netif_receive_skb.c b/tools/testing/selftests/bpf/progs/netif_receive_skb.c
index 6b670039ea67..1d8918dfbd3f 100644
--- a/tools/testing/selftests/bpf/progs/netif_receive_skb.c
+++ b/tools/testing/selftests/bpf/progs/netif_receive_skb.c
@@ -16,6 +16,13 @@ bool skip = false;
 #define STRSIZE			2048
 #define EXPECTED_STRSIZE	256
 
+#if defined(bpf_target_s390)
+/* NULL points to a readable struct lowcore on s390, so take the last page */
+#define BADPTR			((void *)0xFFFFFFFFFFFFF000ULL)
+#else
+#define BADPTR			0
+#endif
+
 #ifndef ARRAY_SIZE
 #define ARRAY_SIZE(x)	(sizeof(x) / sizeof((x)[0]))
 #endif
@@ -113,11 +120,11 @@ int BPF_PROG(trace_netif_receive_skb, struct sk_buff *skb)
 	}
 
 	/* Check invalid ptr value */
-	p.ptr = 0;
+	p.ptr = BADPTR;
 	__ret = bpf_snprintf_btf(str, STRSIZE, &p, sizeof(p), 0);
 	if (__ret >= 0) {
-		bpf_printk("printing NULL should generate error, got (%d)",
-			   __ret);
+		bpf_printk("printing %llx should generate error, got (%d)",
+			   (unsigned long long)BADPTR, __ret);
 		ret = -ERANGE;
 	}
 

From 6185266c5a853bb0f2a459e3ff594546f277609b Mon Sep 17 00:00:00 2001
From: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Date: Sun, 28 Feb 2021 12:30:17 +0200
Subject: [PATCH 080/164] selftests/bpf: Mask bpf_csum_diff() return value to
 16 bits in test_verifier

The verifier test labelled "valid read map access into a read-only array
2" calls the bpf_csum_diff() helper and checks its return value. However,
architecture implementations of csum_partial() (which is what the helper
uses) differ in whether they fold the return value to 16 bit or not. For
example, x86 version has ...

	if (unlikely(odd)) {
		result = from32to16(result);
		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
	}

... while generic lib/checksum.c does:

	result = from32to16(result);
	if (odd)
		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);

This makes the helper return different values on different architectures,
breaking the test on non-x86. To fix this, add an additional instruction
to always mask the return value to 16 bits, and update the expected return
value accordingly.

Fixes: fb2abb73e575 ("bpf, selftest: test {rd, wr}only flags and direct value access")
Signed-off-by: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210228103017.320240-1-yauheni.kaliuta@redhat.com
---
 tools/testing/selftests/bpf/verifier/array_access.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c
index bed53b561e04..1b138cd2b187 100644
--- a/tools/testing/selftests/bpf/verifier/array_access.c
+++ b/tools/testing/selftests/bpf/verifier/array_access.c
@@ -250,12 +250,13 @@
 	BPF_MOV64_IMM(BPF_REG_5, 0),
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 		     BPF_FUNC_csum_diff),
+	BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff),
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.fixup_map_array_ro = { 3 },
 	.result = ACCEPT,
-	.retval = -29,
+	.retval = 65507,
 },
 {
 	"invalid write map access into a read-only array 1",

From 9cc0001a18b4e5f46ec481201c88ae16f0a69bb0 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 27 Feb 2021 22:31:27 +0100
Subject: [PATCH 081/164] netfilter: nftables: disallow updates on table
 ownership

Disallow updating the ownership bit on an existing table: Do not allow
to grab ownership on an existing table. Do not allow to drop ownership
on an existing table.

Fixes: 6001a930ce03 ("netfilter: nftables: introduce table ownership")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index c1eb5cdb3033..b07703e19108 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -916,6 +916,12 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
 	if (flags == ctx->table->flags)
 		return 0;
 
+	if ((nft_table_has_owner(ctx->table) &&
+	     !(flags & NFT_TABLE_F_OWNER)) ||
+	    (!nft_table_has_owner(ctx->table) &&
+	     flags & NFT_TABLE_F_OWNER))
+		return -EOPNOTSUPP;
+
 	trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
 				sizeof(struct nft_trans_table));
 	if (trans == NULL)

From fa706dce2f2d7012654e2eab40da2b526c1424b3 Mon Sep 17 00:00:00 2001
From: Wong Vee Khee <vee.khee.wong@intel.com>
Date: Tue, 2 Mar 2021 16:57:21 +0800
Subject: [PATCH 082/164] stmmac: intel: Fix mdio bus registration issue for
 TGL-H/ADL-S

On Intel platforms which consist of two Ethernet Controllers such as
TGL-H and ADL-S, a unique MDIO bus id is required for MDIO bus to be
successful registered:

[   13.076133] sysfs: cannot create duplicate filename '/class/mdio_bus/stmmac-1'
[   13.083404] CPU: 8 PID: 1898 Comm: systemd-udevd Tainted: G     U            5.11.0-net-next #106
[   13.092410] Hardware name: Intel Corporation Alder Lake Client Platform/AlderLake-S ADP-S DRR4 CRB, BIOS ADLIFSI1.R00.1494.B00.2012031421 12/03/2020
[   13.105709] Call Trace:
[   13.108176]  dump_stack+0x64/0x7c
[   13.111553]  sysfs_warn_dup+0x56/0x70
[   13.115273]  sysfs_do_create_link_sd.isra.2+0xbd/0xd0
[   13.120371]  device_add+0x4df/0x840
[   13.123917]  ? complete_all+0x2a/0x40
[   13.127636]  __mdiobus_register+0x98/0x310 [libphy]
[   13.132572]  stmmac_mdio_register+0x1c5/0x3f0 [stmmac]
[   13.137771]  ? stmmac_napi_add+0xa5/0xf0 [stmmac]
[   13.142493]  stmmac_dvr_probe+0x806/0xee0 [stmmac]
[   13.147341]  intel_eth_pci_probe+0x1cb/0x250 [dwmac_intel]
[   13.152884]  pci_device_probe+0xd2/0x150
[   13.156897]  really_probe+0xf7/0x4d0
[   13.160527]  driver_probe_device+0x5d/0x140
[   13.164761]  device_driver_attach+0x4f/0x60
[   13.168996]  __driver_attach+0xa2/0x140
[   13.172891]  ? device_driver_attach+0x60/0x60
[   13.177300]  bus_for_each_dev+0x76/0xc0
[   13.181188]  bus_add_driver+0x189/0x230
[   13.185083]  ? 0xffffffffc0795000
[   13.188446]  driver_register+0x5b/0xf0
[   13.192249]  ? 0xffffffffc0795000
[   13.195577]  do_one_initcall+0x4d/0x210
[   13.199467]  ? kmem_cache_alloc_trace+0x2ff/0x490
[   13.204228]  do_init_module+0x5b/0x21c
[   13.208031]  load_module+0x2a0c/0x2de0
[   13.211838]  ? __do_sys_finit_module+0xb1/0x110
[   13.216420]  __do_sys_finit_module+0xb1/0x110
[   13.220825]  do_syscall_64+0x33/0x40
[   13.224451]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[   13.229515] RIP: 0033:0x7fc2b1919ccd
[   13.233113] Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 93 31 0c 00 f7 d8 64 89 01 48
[   13.251912] RSP: 002b:00007ffcea2e5b98 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
[   13.259527] RAX: ffffffffffffffda RBX: 0000560558920f10 RCX: 00007fc2b1919ccd
[   13.266706] RDX: 0000000000000000 RSI: 00007fc2b1a881e3 RDI: 0000000000000012
[   13.273887] RBP: 0000000000020000 R08: 0000000000000000 R09: 0000000000000000
[   13.281036] R10: 0000000000000012 R11: 0000000000000246 R12: 00007fc2b1a881e3
[   13.288183] R13: 0000000000000000 R14: 0000000000000000 R15: 00007ffcea2e5d58
[   13.295389] libphy: mii_bus stmmac-1 failed to register

Fixes: 88af9bd4efbd ("stmmac: intel: Add ADL-S 1Gbps PCI IDs")
Fixes: 8450e23f142f ("stmmac: intel: Add PCI IDs for TGL-H platform")
Signed-off-by: Wong Vee Khee <vee.khee.wong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/stmicro/stmmac/dwmac-intel.c | 54 ++++++++++++++-----
 1 file changed, 41 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 751dfdeec41c..f2896872a86c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -446,8 +446,8 @@ static int tgl_common_data(struct pci_dev *pdev,
 	return intel_mgbe_common_data(pdev, plat);
 }
 
-static int tgl_sgmii_data(struct pci_dev *pdev,
-			  struct plat_stmmacenet_data *plat)
+static int tgl_sgmii_phy0_data(struct pci_dev *pdev,
+			       struct plat_stmmacenet_data *plat)
 {
 	plat->bus_id = 1;
 	plat->phy_interface = PHY_INTERFACE_MODE_SGMII;
@@ -456,12 +456,26 @@ static int tgl_sgmii_data(struct pci_dev *pdev,
 	return tgl_common_data(pdev, plat);
 }
 
-static struct stmmac_pci_info tgl_sgmii1g_info = {
-	.setup = tgl_sgmii_data,
+static struct stmmac_pci_info tgl_sgmii1g_phy0_info = {
+	.setup = tgl_sgmii_phy0_data,
 };
 
-static int adls_sgmii_data(struct pci_dev *pdev,
-			   struct plat_stmmacenet_data *plat)
+static int tgl_sgmii_phy1_data(struct pci_dev *pdev,
+			       struct plat_stmmacenet_data *plat)
+{
+	plat->bus_id = 2;
+	plat->phy_interface = PHY_INTERFACE_MODE_SGMII;
+	plat->serdes_powerup = intel_serdes_powerup;
+	plat->serdes_powerdown = intel_serdes_powerdown;
+	return tgl_common_data(pdev, plat);
+}
+
+static struct stmmac_pci_info tgl_sgmii1g_phy1_info = {
+	.setup = tgl_sgmii_phy1_data,
+};
+
+static int adls_sgmii_phy0_data(struct pci_dev *pdev,
+				struct plat_stmmacenet_data *plat)
 {
 	plat->bus_id = 1;
 	plat->phy_interface = PHY_INTERFACE_MODE_SGMII;
@@ -471,10 +485,24 @@ static int adls_sgmii_data(struct pci_dev *pdev,
 	return tgl_common_data(pdev, plat);
 }
 
-static struct stmmac_pci_info adls_sgmii1g_info = {
-	.setup = adls_sgmii_data,
+static struct stmmac_pci_info adls_sgmii1g_phy0_info = {
+	.setup = adls_sgmii_phy0_data,
 };
 
+static int adls_sgmii_phy1_data(struct pci_dev *pdev,
+				struct plat_stmmacenet_data *plat)
+{
+	plat->bus_id = 2;
+	plat->phy_interface = PHY_INTERFACE_MODE_SGMII;
+
+	/* SerDes power up and power down are done in BIOS for ADL */
+
+	return tgl_common_data(pdev, plat);
+}
+
+static struct stmmac_pci_info adls_sgmii1g_phy1_info = {
+	.setup = adls_sgmii_phy1_data,
+};
 static const struct stmmac_pci_func_data galileo_stmmac_func_data[] = {
 	{
 		.func = 6,
@@ -756,11 +784,11 @@ static const struct pci_device_id intel_eth_pci_id_table[] = {
 	{ PCI_DEVICE_DATA(INTEL, EHL_PSE1_RGMII1G_ID, &ehl_pse1_rgmii1g_info) },
 	{ PCI_DEVICE_DATA(INTEL, EHL_PSE1_SGMII1G_ID, &ehl_pse1_sgmii1g_info) },
 	{ PCI_DEVICE_DATA(INTEL, EHL_PSE1_SGMII2G5_ID, &ehl_pse1_sgmii1g_info) },
-	{ PCI_DEVICE_DATA(INTEL, TGL_SGMII1G_ID, &tgl_sgmii1g_info) },
-	{ PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_0_ID, &tgl_sgmii1g_info) },
-	{ PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_1_ID, &tgl_sgmii1g_info) },
-	{ PCI_DEVICE_DATA(INTEL, ADLS_SGMII1G_0_ID, &adls_sgmii1g_info) },
-	{ PCI_DEVICE_DATA(INTEL, ADLS_SGMII1G_1_ID, &adls_sgmii1g_info) },
+	{ PCI_DEVICE_DATA(INTEL, TGL_SGMII1G_ID, &tgl_sgmii1g_phy0_info) },
+	{ PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_0_ID, &tgl_sgmii1g_phy0_info) },
+	{ PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_1_ID, &tgl_sgmii1g_phy1_info) },
+	{ PCI_DEVICE_DATA(INTEL, ADLS_SGMII1G_0_ID, &adls_sgmii1g_phy0_info) },
+	{ PCI_DEVICE_DATA(INTEL, ADLS_SGMII1G_1_ID, &adls_sgmii1g_phy1_info) },
 	{}
 };
 MODULE_DEVICE_TABLE(pci, intel_eth_pci_id_table);

From 95b39f07a17faef3a9b225248ba449b976e529c8 Mon Sep 17 00:00:00 2001
From: Biao Huang <biao.huang@mediatek.com>
Date: Tue, 2 Mar 2021 11:33:23 +0800
Subject: [PATCH 083/164] net: ethernet: mtk-star-emac: fix wrong unmap in RX
 handling

mtk_star_dma_unmap_rx() should unmap the dma_addr of old skb rather than
that of new skb.
Assign new_dma_addr to desc_data.dma_addr after all handling of old skb
ends to avoid unexpected receive side error.

Fixes: f96e9641e92b ("net: ethernet: mtk-star-emac: fix error path in RX handling")
Signed-off-by: Biao Huang <biao.huang@mediatek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_star_emac.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c
index a8641a407c06..96d2891f1675 100644
--- a/drivers/net/ethernet/mediatek/mtk_star_emac.c
+++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c
@@ -1225,8 +1225,6 @@ static int mtk_star_receive_packet(struct mtk_star_priv *priv)
 		goto push_new_skb;
 	}
 
-	desc_data.dma_addr = new_dma_addr;
-
 	/* We can't fail anymore at this point: it's safe to unmap the skb. */
 	mtk_star_dma_unmap_rx(priv, &desc_data);
 
@@ -1236,6 +1234,9 @@ static int mtk_star_receive_packet(struct mtk_star_priv *priv)
 	desc_data.skb->dev = ndev;
 	netif_receive_skb(desc_data.skb);
 
+	/* update dma_addr for new skb */
+	desc_data.dma_addr = new_dma_addr;
+
 push_new_skb:
 	desc_data.len = skb_tailroom(new_skb);
 	desc_data.skb = new_skb;

From a22549f12767fce49c74c53a853595f82b727935 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Tue, 23 Feb 2021 14:00:39 +0000
Subject: [PATCH 084/164] iwlwifi: mvm: add terminate entry for dmi_system_id
 tables

Make sure dmi_system_id tables are NULL terminated. This crashed when LTO was enabled:

BUG: KASAN: global-out-of-bounds in dmi_check_system+0x5a/0x70
Read of size 1 at addr ffffffffc16af750 by task NetworkManager/1913

CPU: 4 PID: 1913 Comm: NetworkManager Not tainted 5.12.0-rc1+ #10057
Hardware name: LENOVO 20THCTO1WW/20THCTO1WW, BIOS N2VET27W (1.12 ) 12/21/2020
Call Trace:
 dump_stack+0x90/0xbe
 print_address_description.constprop.0+0x1d/0x140
 ? dmi_check_system+0x5a/0x70
 ? dmi_check_system+0x5a/0x70
 kasan_report.cold+0x7b/0xd4
 ? dmi_check_system+0x5a/0x70
 __asan_load1+0x4d/0x50
 dmi_check_system+0x5a/0x70
 iwl_mvm_up+0x1360/0x1690 [iwlmvm]
 ? iwl_mvm_send_recovery_cmd+0x270/0x270 [iwlmvm]
 ? setup_object.isra.0+0x27/0xd0
 ? kasan_poison+0x20/0x50
 ? ___slab_alloc.constprop.0+0x483/0x5b0
 ? mempool_kmalloc+0x17/0x20
 ? ftrace_graph_ret_addr+0x2a/0xb0
 ? kasan_poison+0x3c/0x50
 ? cfg80211_iftype_allowed+0x2e/0x90 [cfg80211]
 ? __kasan_check_write+0x14/0x20
 ? mutex_lock+0x86/0xe0
 ? __mutex_lock_slowpath+0x20/0x20
 __iwl_mvm_mac_start+0x49/0x290 [iwlmvm]
 iwl_mvm_mac_start+0x37/0x50 [iwlmvm]
 drv_start+0x73/0x1b0 [mac80211]
 ieee80211_do_open+0x53e/0xf10 [mac80211]
 ? ieee80211_check_concurrent_iface+0x266/0x2e0 [mac80211]
 ieee80211_open+0xb9/0x100 [mac80211]
 __dev_open+0x1b8/0x280

Fixes: a2ac0f48a07c ("iwlwifi: mvm: implement approved list for the PPAG feature")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Tested-by: Victor Michel <vic.michel.web@gmail.com>
Acked-by: Luca Coelho <luciano.coelho@intel.com>
[kvalo@codeaurora.org: improve commit log]
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210223140039.1708534-1-weiyongjun1@huawei.com
---
 drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 15e2773ce7e7..5ee64f7f3c85 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -1083,6 +1083,7 @@ static const struct dmi_system_id dmi_ppag_approved_list[] = {
 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTek COMPUTER INC."),
 		},
 	},
+	{}
 };
 
 static int iwl_mvm_ppag_init(struct iwl_mvm *mvm)

From 436b265671d653787eed9bc716f44882d2a458cb Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 1 Mar 2021 19:16:37 -0600
Subject: [PATCH 085/164] iwlwifi: fix ARCH=i386 compilation warnings

An unsigned long variable should rely on '%lu' format strings, not '%zd'

Fixes: a1a6a4cf49ece ("iwlwifi: pnvm: implement reading PNVM from UEFI")
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Acked-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210302011640.1276636-1-pierre-louis.bossart@linux.intel.com
---
 drivers/net/wireless/intel/iwlwifi/fw/pnvm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c
index fd070ca5e517..40f2109a097f 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c
@@ -271,12 +271,12 @@ static int iwl_pnvm_get_from_efi(struct iwl_trans *trans,
 	err = efivar_entry_get(pnvm_efivar, NULL, &package_size, package);
 	if (err) {
 		IWL_DEBUG_FW(trans,
-			     "PNVM UEFI variable not found %d (len %zd)\n",
+			     "PNVM UEFI variable not found %d (len %lu)\n",
 			     err, package_size);
 		goto out;
 	}
 
-	IWL_DEBUG_FW(trans, "Read PNVM fro UEFI with size %zd\n", package_size);
+	IWL_DEBUG_FW(trans, "Read PNVM fro UEFI with size %lu\n", package_size);
 
 	*data = kmemdup(package->data, *len, GFP_KERNEL);
 	if (!*data)

From 295d4cd82b0181dd36b145fd535c13d623d7a335 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Tue, 2 Mar 2021 11:34:51 +0100
Subject: [PATCH 086/164] iwlwifi: don't call netif_napi_add() with rxq->lock
 held (was Re: Lockdep warning in iwl_pcie_rx_handle())

We can't call netif_napi_add() with rxq-lock held, as there is a potential
for deadlock as spotted by lockdep (see below). rxq->lock is not
protecting anything over the netif_napi_add() codepath anyway, so let's
drop it just before calling into NAPI.

 ========================================================
 WARNING: possible irq lock inversion dependency detected
 5.12.0-rc1-00002-gbada49429032 #5 Not tainted
 --------------------------------------------------------
 irq/136-iwlwifi/565 just changed the state of lock:
 ffff89f28433b0b0 (&rxq->lock){+.-.}-{2:2}, at: iwl_pcie_rx_handle+0x7f/0x960 [iwlwifi]
 but this lock took another, SOFTIRQ-unsafe lock in the past:
  (napi_hash_lock){+.+.}-{2:2}

 and interrupts could create inverse lock ordering between them.

 other info that might help us debug this:
  Possible interrupt unsafe locking scenario:

        CPU0                    CPU1
        ----                    ----
   lock(napi_hash_lock);
                                local_irq_disable();
                                lock(&rxq->lock);
                                lock(napi_hash_lock);
   <Interrupt>
     lock(&rxq->lock);

  *** DEADLOCK ***

 1 lock held by irq/136-iwlwifi/565:
  #0: ffff89f2b1440170 (sync_cmd_lockdep_map){+.+.}-{0:0}, at: iwl_pcie_irq_handler+0x5/0xb30

 the shortest dependencies between 2nd lock and 1st lock:
  -> (napi_hash_lock){+.+.}-{2:2} {
     HARDIRQ-ON-W at:
                       lock_acquire+0x277/0x3d0
                       _raw_spin_lock+0x2c/0x40
                       netif_napi_add+0x14b/0x270
                       e1000_probe+0x2fe/0xee0 [e1000e]
                       local_pci_probe+0x42/0x90
                       pci_device_probe+0x10b/0x1c0
                       really_probe+0xef/0x4b0
                       driver_probe_device+0xde/0x150
                       device_driver_attach+0x4f/0x60
                       __driver_attach+0x9c/0x140
                       bus_for_each_dev+0x79/0xc0
                       bus_add_driver+0x18d/0x220
                       driver_register+0x5b/0xf0
                       do_one_initcall+0x5b/0x300
                       do_init_module+0x5b/0x21c
                       load_module+0x1dae/0x22c0
                       __do_sys_finit_module+0xad/0x110
                       do_syscall_64+0x33/0x80
                       entry_SYSCALL_64_after_hwframe+0x44/0xae
     SOFTIRQ-ON-W at:
                       lock_acquire+0x277/0x3d0
                       _raw_spin_lock+0x2c/0x40
                       netif_napi_add+0x14b/0x270
                       e1000_probe+0x2fe/0xee0 [e1000e]
                       local_pci_probe+0x42/0x90
                       pci_device_probe+0x10b/0x1c0
                       really_probe+0xef/0x4b0
                       driver_probe_device+0xde/0x150
                       device_driver_attach+0x4f/0x60
                       __driver_attach+0x9c/0x140
                       bus_for_each_dev+0x79/0xc0
                       bus_add_driver+0x18d/0x220
                       driver_register+0x5b/0xf0
                       do_one_initcall+0x5b/0x300
                       do_init_module+0x5b/0x21c
                       load_module+0x1dae/0x22c0
                       __do_sys_finit_module+0xad/0x110
                       do_syscall_64+0x33/0x80
                       entry_SYSCALL_64_after_hwframe+0x44/0xae
     INITIAL USE at:
                      lock_acquire+0x277/0x3d0
                      _raw_spin_lock+0x2c/0x40
                      netif_napi_add+0x14b/0x270
                      e1000_probe+0x2fe/0xee0 [e1000e]
                      local_pci_probe+0x42/0x90
                      pci_device_probe+0x10b/0x1c0
                      really_probe+0xef/0x4b0
                      driver_probe_device+0xde/0x150
                      device_driver_attach+0x4f/0x60
                      __driver_attach+0x9c/0x140
                      bus_for_each_dev+0x79/0xc0
                      bus_add_driver+0x18d/0x220
                      driver_register+0x5b/0xf0
                      do_one_initcall+0x5b/0x300
                      do_init_module+0x5b/0x21c
                      load_module+0x1dae/0x22c0
                      __do_sys_finit_module+0xad/0x110
                      do_syscall_64+0x33/0x80
                      entry_SYSCALL_64_after_hwframe+0x44/0xae
   }
   ... key      at: [<ffffffffae84ef38>] napi_hash_lock+0x18/0x40
   ... acquired at:
    _raw_spin_lock+0x2c/0x40
    netif_napi_add+0x14b/0x270
    _iwl_pcie_rx_init+0x1f4/0x710 [iwlwifi]
    iwl_pcie_rx_init+0x1b/0x3b0 [iwlwifi]
    iwl_trans_pcie_start_fw+0x2ac/0x6a0 [iwlwifi]
    iwl_mvm_load_ucode_wait_alive+0x116/0x460 [iwlmvm]
    iwl_run_init_mvm_ucode+0xa4/0x3a0 [iwlmvm]
    iwl_op_mode_mvm_start+0x9ed/0xbf0 [iwlmvm]
    _iwl_op_mode_start.isra.4+0x42/0x80 [iwlwifi]
    iwl_opmode_register+0x71/0xe0 [iwlwifi]
    iwl_mvm_init+0x34/0x1000 [iwlmvm]
    do_one_initcall+0x5b/0x300
    do_init_module+0x5b/0x21c
    load_module+0x1dae/0x22c0
    __do_sys_finit_module+0xad/0x110
    do_syscall_64+0x33/0x80
    entry_SYSCALL_64_after_hwframe+0x44/0xae

[ ... lockdep output trimmed .... ]

Fixes: 25edc8f259c7106 ("iwlwifi: pcie: properly implement NAPI")
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Acked-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/nycvar.YFH.7.76.2103021134060.12405@cbobk.fhfr.pm
---
 drivers/net/wireless/intel/iwlwifi/pcie/rx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
index 42426e25cac6..2bec97133119 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
@@ -1129,6 +1129,8 @@ static int _iwl_pcie_rx_init(struct iwl_trans *trans)
 
 		iwl_pcie_rx_init_rxb_lists(rxq);
 
+		spin_unlock_bh(&rxq->lock);
+
 		if (!rxq->napi.poll) {
 			int (*poll)(struct napi_struct *, int) = iwl_pcie_napi_poll;
 
@@ -1149,7 +1151,6 @@ static int _iwl_pcie_rx_init(struct iwl_trans *trans)
 			napi_enable(&rxq->napi);
 		}
 
-		spin_unlock_bh(&rxq->lock);
 	}
 
 	/* move the pool to the default queue and allocator ownerships */

From 2378b2c9ecf437b918dff246b81b5b624ec14f80 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 2 Mar 2021 14:21:54 +0300
Subject: [PATCH 087/164] octeontx2-af: cn10k: fix an array overflow in
 is_lmac_valid()

The value of "lmac_id" can be controlled by the user and if it is larger
then the number of bits in long then it reads outside the bitmap.
The highest valid value is less than MAX_LMAC_PER_CGX (4).

Fixes: 91c6945ea1f9 ("octeontx2-af: cn10k: Add RPM MAC support")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
index 9caa375d01b1..68deae529bc9 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
@@ -56,7 +56,9 @@ static bool is_dev_rpm(void *cgxd)
 
 bool is_lmac_valid(struct cgx *cgx, int lmac_id)
 {
-	return cgx && test_bit(lmac_id, &cgx->lmac_bmap);
+	if (!cgx || lmac_id < 0 || lmac_id >= MAX_LMAC_PER_CGX)
+		return false;
+	return test_bit(lmac_id, &cgx->lmac_bmap);
 }
 
 struct mac_ops *get_mac_ops(void *cgxd)

From 6881b07fdd24850def1f03761c66042b983ff86e Mon Sep 17 00:00:00 2001
From: Michal Suchanek <msuchanek@suse.de>
Date: Tue, 2 Mar 2021 20:47:47 +0100
Subject: [PATCH 088/164] ibmvnic: Fix possibly uninitialized old_num_tx_queues
 variable warning.

GCC 7.5 reports:
../drivers/net/ethernet/ibm/ibmvnic.c: In function 'ibmvnic_reset_init':
../drivers/net/ethernet/ibm/ibmvnic.c:5373:51: warning: 'old_num_tx_queues' may be used uninitialized in this function [-Wmaybe-uninitialized]
../drivers/net/ethernet/ibm/ibmvnic.c:5373:6: warning: 'old_num_rx_queues' may be used uninitialized in this function [-Wmaybe-uninitialized]

The variable is initialized only if(reset) and used only if(reset &&
something) so this is a false positive. However, there is no reason to
not initialize the variables unconditionally avoiding the warning.

Fixes: 635e442f4a48 ("ibmvnic: merge ibmvnic_reset_init and ibmvnic_init")
Signed-off-by: Michal Suchanek <msuchanek@suse.de>
Reviewed-by: Sukadev Bhattiprolu <sukadev@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 118a4bd3f877..3bad762083c5 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -5219,16 +5219,14 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset)
 {
 	struct device *dev = &adapter->vdev->dev;
 	unsigned long timeout = msecs_to_jiffies(20000);
-	u64 old_num_rx_queues, old_num_tx_queues;
+	u64 old_num_rx_queues = adapter->req_rx_queues;
+	u64 old_num_tx_queues = adapter->req_tx_queues;
 	int rc;
 
 	adapter->from_passive_init = false;
 
-	if (reset) {
-		old_num_rx_queues = adapter->req_rx_queues;
-		old_num_tx_queues = adapter->req_tx_queues;
+	if (reset)
 		reinit_completion(&adapter->init_done);
-	}
 
 	adapter->init_done_rc = 0;
 	rc = ibmvnic_send_crq_init(adapter);

From 879c348c35bb5fb758dd881d8a97409c1862dae8 Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Wed, 3 Mar 2021 20:38:40 +0530
Subject: [PATCH 089/164] net: stmmac: fix incorrect DMA channel intr enable
 setting of EQoS v4.10

We introduce dwmac410_dma_init_channel() here for both EQoS v4.10 and
above which use different DMA_CH(n)_Interrupt_Enable bit definitions for
NIE and AIE.

Fixes: 48863ce5940f ("stmmac: add DMA support for GMAC 4.xx")
Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: Ramesh Babu B <ramesh.babu.b@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/stmicro/stmmac/dwmac4_dma.c  | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
index bb29bfcd62c3..62aa0e95beb7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
@@ -124,6 +124,23 @@ static void dwmac4_dma_init_channel(void __iomem *ioaddr,
 	       ioaddr + DMA_CHAN_INTR_ENA(chan));
 }
 
+static void dwmac410_dma_init_channel(void __iomem *ioaddr,
+				      struct stmmac_dma_cfg *dma_cfg, u32 chan)
+{
+	u32 value;
+
+	/* common channel control register config */
+	value = readl(ioaddr + DMA_CHAN_CONTROL(chan));
+	if (dma_cfg->pblx8)
+		value = value | DMA_BUS_MODE_PBL;
+
+	writel(value, ioaddr + DMA_CHAN_CONTROL(chan));
+
+	/* Mask interrupts by writing to CSR7 */
+	writel(DMA_CHAN_INTR_DEFAULT_MASK_4_10,
+	       ioaddr + DMA_CHAN_INTR_ENA(chan));
+}
+
 static void dwmac4_dma_init(void __iomem *ioaddr,
 			    struct stmmac_dma_cfg *dma_cfg, int atds)
 {
@@ -523,7 +540,7 @@ const struct stmmac_dma_ops dwmac4_dma_ops = {
 const struct stmmac_dma_ops dwmac410_dma_ops = {
 	.reset = dwmac4_dma_reset,
 	.init = dwmac4_dma_init,
-	.init_chan = dwmac4_dma_init_channel,
+	.init_chan = dwmac410_dma_init_channel,
 	.init_rx_chan = dwmac4_dma_init_rx_chan,
 	.init_tx_chan = dwmac4_dma_init_tx_chan,
 	.axi = dwmac4_dma_axi,

From dbbe7c962c3a8163bf724dbc3c9fdfc9b16d3117 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 2 Mar 2021 18:46:43 -0800
Subject: [PATCH 090/164] docs: networking: drop special stable handling

Leave it to Greg.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/netdev-FAQ.rst       | 72 ++-----------------
 Documentation/process/stable-kernel-rules.rst |  6 --
 Documentation/process/submitting-patches.rst  |  5 --
 3 files changed, 6 insertions(+), 77 deletions(-)

diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst
index a64c01b52b4c..91b2cf712801 100644
--- a/Documentation/networking/netdev-FAQ.rst
+++ b/Documentation/networking/netdev-FAQ.rst
@@ -142,73 +142,13 @@ Please send incremental versions on top of what has been merged in order to fix
 the patches the way they would look like if your latest patch series was to be
 merged.
 
-How can I tell what patches are queued up for backporting to the various stable releases?
------------------------------------------------------------------------------------------
-Normally Greg Kroah-Hartman collects stable commits himself, but for
-networking, Dave collects up patches he deems critical for the
-networking subsystem, and then hands them off to Greg.
-
-There is a patchworks queue that you can see here:
-
-  https://patchwork.kernel.org/bundle/netdev/stable/?state=*
-
-It contains the patches which Dave has selected, but not yet handed off
-to Greg.  If Greg already has the patch, then it will be here:
-
-  https://git.kernel.org/pub/scm/linux/kernel/git/stable/stable-queue.git
-
-A quick way to find whether the patch is in this stable-queue is to
-simply clone the repo, and then git grep the mainline commit ID, e.g.
-::
-
-  stable-queue$ git grep -l 284041ef21fdf2e
-  releases/3.0.84/ipv6-fix-possible-crashes-in-ip6_cork_release.patch
-  releases/3.4.51/ipv6-fix-possible-crashes-in-ip6_cork_release.patch
-  releases/3.9.8/ipv6-fix-possible-crashes-in-ip6_cork_release.patch
-  stable/stable-queue$
-
-I see a network patch and I think it should be backported to stable. Should I request it via stable@vger.kernel.org like the references in the kernel's Documentation/process/stable-kernel-rules.rst file say?
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-No, not for networking.  Check the stable queues as per above first
-to see if it is already queued.  If not, then send a mail to netdev,
-listing the upstream commit ID and why you think it should be a stable
-candidate.
-
-Before you jump to go do the above, do note that the normal stable rules
-in :ref:`Documentation/process/stable-kernel-rules.rst <stable_kernel_rules>`
-still apply.  So you need to explicitly indicate why it is a critical
-fix and exactly what users are impacted.  In addition, you need to
-convince yourself that you *really* think it has been overlooked,
-vs. having been considered and rejected.
-
-Generally speaking, the longer it has had a chance to "soak" in
-mainline, the better the odds that it is an OK candidate for stable.  So
-scrambling to request a commit be added the day after it appears should
-be avoided.
-
-I have created a network patch and I think it should be backported to stable. Should I add a Cc: stable@vger.kernel.org like the references in the kernel's Documentation/ directory say?
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-No.  See above answer.  In short, if you think it really belongs in
-stable, then ensure you write a decent commit log that describes who
-gets impacted by the bug fix and how it manifests itself, and when the
-bug was introduced.  If you do that properly, then the commit will get
-handled appropriately and most likely get put in the patchworks stable
-queue if it really warrants it.
-
-If you think there is some valid information relating to it being in
-stable that does *not* belong in the commit log, then use the three dash
-marker line as described in
-:ref:`Documentation/process/submitting-patches.rst <the_canonical_patch_format>`
-to temporarily embed that information into the patch that you send.
-
-Are all networking bug fixes backported to all stable releases?
+Are there special rules regarding stable submissions on netdev?
 ---------------------------------------------------------------
-Due to capacity, Dave could only take care of the backports for the
-last two stable releases. For earlier stable releases, each stable
-branch maintainer is supposed to take care of them. If you find any
-patch is missing from an earlier stable branch, please notify
-stable@vger.kernel.org with either a commit ID or a formal patch
-backported, and CC Dave and other relevant networking developers.
+While it used to be the case that netdev submissions were not supposed
+to carry explicit ``CC: stable@vger.kernel.org`` tags that is no longer
+the case today. Please follow the standard stable rules in
+:ref:`Documentation/process/stable-kernel-rules.rst <stable_kernel_rules>`,
+and make sure you include appropriate Fixes tags!
 
 Is the comment style convention different for the networking content?
 ---------------------------------------------------------------------
diff --git a/Documentation/process/stable-kernel-rules.rst b/Documentation/process/stable-kernel-rules.rst
index 3973556250e1..003c865e9c21 100644
--- a/Documentation/process/stable-kernel-rules.rst
+++ b/Documentation/process/stable-kernel-rules.rst
@@ -35,12 +35,6 @@ Rules on what kind of patches are accepted, and which ones are not, into the
 Procedure for submitting patches to the -stable tree
 ----------------------------------------------------
 
- - If the patch covers files in net/ or drivers/net please follow netdev stable
-   submission guidelines as described in
-   :ref:`Documentation/networking/netdev-FAQ.rst <netdev-FAQ>`
-   after first checking the stable networking queue at
-   https://patchwork.kernel.org/bundle/netdev/stable/?state=*
-   to ensure the requested patch is not already queued up.
  - Security patches should not be handled (solely) by the -stable review
    process but should follow the procedures in
    :ref:`Documentation/admin-guide/security-bugs.rst <securitybugs>`.
diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst
index 8c991c863628..91de63b201c1 100644
--- a/Documentation/process/submitting-patches.rst
+++ b/Documentation/process/submitting-patches.rst
@@ -250,11 +250,6 @@ should also read
 :ref:`Documentation/process/stable-kernel-rules.rst <stable_kernel_rules>`
 in addition to this file.
 
-Note, however, that some subsystem maintainers want to come to their own
-conclusions on which patches should go to the stable trees.  The networking
-maintainer, in particular, would rather not see individual developers
-adding lines like the above to their patches.
-
 If changes affect userland-kernel interfaces, please send the MAN-PAGES
 maintainer (as listed in the MAINTAINERS file) a man-pages patch, or at
 least a notification of the change, so that some information makes its way

From b12422362ce947098ac420ac3c975fc006af4c02 Mon Sep 17 00:00:00 2001
From: Atish Patra <atish.patra@wdc.com>
Date: Wed, 3 Mar 2021 11:55:49 -0800
Subject: [PATCH 091/164] net: macb: Add default usrio config to default gem
 config

There is no usrio config defined for default gem config leading to
a kernel panic devices that don't define a data. This issue can be
reprdouced with microchip polar fire soc where compatible string
is defined as "cdns,macb".

Fixes: edac63861db7 ("add userio bits as platform configuration")

Signed-off-by: Atish Patra <atish.patra@wdc.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cadence/macb_main.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 472bf8f220bc..15362d016a87 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -3954,6 +3954,13 @@ static int macb_init(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct macb_usrio_config macb_default_usrio = {
+	.mii = MACB_BIT(MII),
+	.rmii = MACB_BIT(RMII),
+	.rgmii = GEM_BIT(RGMII),
+	.refclk = MACB_BIT(CLKEN),
+};
+
 #if defined(CONFIG_OF)
 /* 1518 rounded up */
 #define AT91ETHER_MAX_RBUFF_SZ	0x600
@@ -4439,13 +4446,6 @@ static int fu540_c000_init(struct platform_device *pdev)
 	return macb_init(pdev);
 }
 
-static const struct macb_usrio_config macb_default_usrio = {
-	.mii = MACB_BIT(MII),
-	.rmii = MACB_BIT(RMII),
-	.rgmii = GEM_BIT(RGMII),
-	.refclk = MACB_BIT(CLKEN),
-};
-
 static const struct macb_usrio_config sama7g5_usrio = {
 	.mii = 0,
 	.rmii = 1,
@@ -4594,6 +4594,7 @@ static const struct macb_config default_gem_config = {
 	.dma_burst_length = 16,
 	.clk_init = macb_clk_init,
 	.init = macb_init,
+	.usrio = &macb_default_usrio,
 	.jumbo_max_len = 10240,
 };
 

From 3e59e8856758eb5a2dfe1f831ef53b168fd58105 Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <mschiffer@universe-factory.net>
Date: Wed, 3 Mar 2021 16:50:49 +0100
Subject: [PATCH 092/164] net: l2tp: reduce log level of messages in receive
 path, add counter instead

Commit 5ee759cda51b ("l2tp: use standard API for warning log messages")
changed a number of warnings about invalid packets in the receive path
so that they are always shown, instead of only when a special L2TP debug
flag is set. Even with rate limiting these warnings can easily cause
significant log spam - potentially triggered by a malicious party
sending invalid packets on purpose.

In addition these warnings were noticed by projects like Tunneldigger [1],
which uses L2TP for its data path, but implements its own control
protocol (which is sufficiently different from L2TP data packets that it
would always be passed up to userspace even with future extensions of
L2TP).

Some of the warnings were already redundant, as l2tp_stats has a counter
for these packets. This commit adds one additional counter for invalid
packets that are passed up to userspace. Packets with unknown session are
not counted as invalid, as there is nothing wrong with the format of
these packets.

With the additional counter, all of these messages are either redundant
or benign, so we reduce them to pr_debug_ratelimited().

[1] https://github.com/wlanslovenija/tunneldigger/issues/160

Fixes: 5ee759cda51b ("l2tp: use standard API for warning log messages")
Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/l2tp.h |  1 +
 net/l2tp/l2tp_core.c      | 41 +++++++++++++++++++++------------------
 net/l2tp/l2tp_core.h      |  1 +
 net/l2tp/l2tp_netlink.c   |  6 ++++++
 4 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h
index 30c80d5ba4bf..bab8c9708611 100644
--- a/include/uapi/linux/l2tp.h
+++ b/include/uapi/linux/l2tp.h
@@ -145,6 +145,7 @@ enum {
 	L2TP_ATTR_RX_ERRORS,		/* u64 */
 	L2TP_ATTR_STATS_PAD,
 	L2TP_ATTR_RX_COOKIE_DISCARDS,	/* u64 */
+	L2TP_ATTR_RX_INVALID,		/* u64 */
 	__L2TP_ATTR_STATS_MAX,
 };
 
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 7be5103ff2a8..203890e378cb 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -649,9 +649,9 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 	/* Parse and check optional cookie */
 	if (session->peer_cookie_len > 0) {
 		if (memcmp(ptr, &session->peer_cookie[0], session->peer_cookie_len)) {
-			pr_warn_ratelimited("%s: cookie mismatch (%u/%u). Discarding.\n",
-					    tunnel->name, tunnel->tunnel_id,
-					    session->session_id);
+			pr_debug_ratelimited("%s: cookie mismatch (%u/%u). Discarding.\n",
+					     tunnel->name, tunnel->tunnel_id,
+					     session->session_id);
 			atomic_long_inc(&session->stats.rx_cookie_discards);
 			goto discard;
 		}
@@ -702,8 +702,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 		 * If user has configured mandatory sequence numbers, discard.
 		 */
 		if (session->recv_seq) {
-			pr_warn_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n",
-					    session->name);
+			pr_debug_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n",
+					     session->name);
 			atomic_long_inc(&session->stats.rx_seq_discards);
 			goto discard;
 		}
@@ -718,8 +718,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 			session->send_seq = 0;
 			l2tp_session_set_header_len(session, tunnel->version);
 		} else if (session->send_seq) {
-			pr_warn_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n",
-					    session->name);
+			pr_debug_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n",
+					     session->name);
 			atomic_long_inc(&session->stats.rx_seq_discards);
 			goto discard;
 		}
@@ -809,9 +809,9 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
 
 	/* Short packet? */
 	if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX)) {
-		pr_warn_ratelimited("%s: recv short packet (len=%d)\n",
-				    tunnel->name, skb->len);
-		goto error;
+		pr_debug_ratelimited("%s: recv short packet (len=%d)\n",
+				     tunnel->name, skb->len);
+		goto invalid;
 	}
 
 	/* Point to L2TP header */
@@ -824,9 +824,9 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
 	/* Check protocol version */
 	version = hdrflags & L2TP_HDR_VER_MASK;
 	if (version != tunnel->version) {
-		pr_warn_ratelimited("%s: recv protocol version mismatch: got %d expected %d\n",
-				    tunnel->name, version, tunnel->version);
-		goto error;
+		pr_debug_ratelimited("%s: recv protocol version mismatch: got %d expected %d\n",
+				     tunnel->name, version, tunnel->version);
+		goto invalid;
 	}
 
 	/* Get length of L2TP packet */
@@ -834,7 +834,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
 
 	/* If type is control packet, it is handled by userspace. */
 	if (hdrflags & L2TP_HDRFLAG_T)
-		goto error;
+		goto pass;
 
 	/* Skip flags */
 	ptr += 2;
@@ -863,21 +863,24 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
 			l2tp_session_dec_refcount(session);
 
 		/* Not found? Pass to userspace to deal with */
-		pr_warn_ratelimited("%s: no session found (%u/%u). Passing up.\n",
-				    tunnel->name, tunnel_id, session_id);
-		goto error;
+		pr_debug_ratelimited("%s: no session found (%u/%u). Passing up.\n",
+				     tunnel->name, tunnel_id, session_id);
+		goto pass;
 	}
 
 	if (tunnel->version == L2TP_HDR_VER_3 &&
 	    l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr))
-		goto error;
+		goto invalid;
 
 	l2tp_recv_common(session, skb, ptr, optr, hdrflags, length);
 	l2tp_session_dec_refcount(session);
 
 	return 0;
 
-error:
+invalid:
+	atomic_long_inc(&tunnel->stats.rx_invalid);
+
+pass:
 	/* Put UDP header back */
 	__skb_push(skb, sizeof(struct udphdr));
 
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index cb21d906343e..98ea98eb9567 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -39,6 +39,7 @@ struct l2tp_stats {
 	atomic_long_t		rx_oos_packets;
 	atomic_long_t		rx_errors;
 	atomic_long_t		rx_cookie_discards;
+	atomic_long_t		rx_invalid;
 };
 
 struct l2tp_tunnel;
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 83956c9ee1fc..96eb91be9238 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -428,6 +428,9 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
 			      L2TP_ATTR_STATS_PAD) ||
 	    nla_put_u64_64bit(skb, L2TP_ATTR_RX_ERRORS,
 			      atomic_long_read(&tunnel->stats.rx_errors),
+			      L2TP_ATTR_STATS_PAD) ||
+	    nla_put_u64_64bit(skb, L2TP_ATTR_RX_INVALID,
+			      atomic_long_read(&tunnel->stats.rx_invalid),
 			      L2TP_ATTR_STATS_PAD))
 		goto nla_put_failure;
 	nla_nest_end(skb, nest);
@@ -771,6 +774,9 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
 			      L2TP_ATTR_STATS_PAD) ||
 	    nla_put_u64_64bit(skb, L2TP_ATTR_RX_ERRORS,
 			      atomic_long_read(&session->stats.rx_errors),
+			      L2TP_ATTR_STATS_PAD) ||
+	    nla_put_u64_64bit(skb, L2TP_ATTR_RX_INVALID,
+			      atomic_long_read(&session->stats.rx_invalid),
 			      L2TP_ATTR_STATS_PAD))
 		goto nla_put_failure;
 	nla_nest_end(skb, nest);

From 4b5dc1a94d4f92b5845e98bd9ae344b26d933aad Mon Sep 17 00:00:00 2001
From: Hayes Wang <hayeswang@realtek.com>
Date: Wed, 3 Mar 2021 16:39:47 +0800
Subject: [PATCH 093/164] Revert "r8152: adjust the settings about MAC clock
 speed down for RTL8153"

This reverts commit 134f98bcf1b898fb9d6f2b91bc85dd2e5478b4b8.

The r8153_mac_clk_spd() is used for RTL8153A only, because the register
table of RTL8153B is different from RTL8153A. However, this function would
be called when RTL8153B calls r8153_first_init() and r8153_enter_oob().
That causes RTL8153B becomes unstable when suspending and resuming. The
worst case may let the device stop working.

Besides, revert this commit to disable MAC clock speed down for RTL8153A.
It would avoid the known issue when enabling U1. The data of the first
control transfer may be wrong when exiting U1.

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 35 ++++++-----------------------------
 1 file changed, 6 insertions(+), 29 deletions(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index b246817f3405..90f1c0200042 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -3021,29 +3021,6 @@ static void __rtl_set_wol(struct r8152 *tp, u32 wolopts)
 		device_set_wakeup_enable(&tp->udev->dev, false);
 }
 
-static void r8153_mac_clk_spd(struct r8152 *tp, bool enable)
-{
-	/* MAC clock speed down */
-	if (enable) {
-		ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL,
-			       ALDPS_SPDWN_RATIO);
-		ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2,
-			       EEE_SPDWN_RATIO);
-		ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3,
-			       PKT_AVAIL_SPDWN_EN | SUSPEND_SPDWN_EN |
-			       U1U2_SPDWN_EN | L1_SPDWN_EN);
-		ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4,
-			       PWRSAVE_SPDWN_EN | RXDV_SPDWN_EN | TX10MIDLE_EN |
-			       TP100_SPDWN_EN | TP500_SPDWN_EN | EEE_SPDWN_EN |
-			       TP1000_SPDWN_EN);
-	} else {
-		ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL, 0);
-		ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, 0);
-		ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, 0);
-		ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, 0);
-	}
-}
-
 static void r8153_u1u2en(struct r8152 *tp, bool enable)
 {
 	u8 u1u2[8];
@@ -3338,11 +3315,9 @@ static void rtl8153_runtime_enable(struct r8152 *tp, bool enable)
 	if (enable) {
 		r8153_u1u2en(tp, false);
 		r8153_u2p3en(tp, false);
-		r8153_mac_clk_spd(tp, true);
 		rtl_runtime_suspend_enable(tp, true);
 	} else {
 		rtl_runtime_suspend_enable(tp, false);
-		r8153_mac_clk_spd(tp, false);
 
 		switch (tp->version) {
 		case RTL_VER_03:
@@ -4718,7 +4693,6 @@ static void r8153_first_init(struct r8152 *tp)
 {
 	u32 ocp_data;
 
-	r8153_mac_clk_spd(tp, false);
 	rxdy_gated_en(tp, true);
 	r8153_teredo_off(tp);
 
@@ -4769,8 +4743,6 @@ static void r8153_enter_oob(struct r8152 *tp)
 {
 	u32 ocp_data;
 
-	r8153_mac_clk_spd(tp, true);
-
 	ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
 	ocp_data &= ~NOW_IS_OOB;
 	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
@@ -5496,10 +5468,15 @@ static void r8153_init(struct r8152 *tp)
 
 	ocp_write_word(tp, MCU_TYPE_USB, USB_CONNECT_TIMER, 0x0001);
 
+	/* MAC clock speed down */
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL, 0);
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, 0);
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, 0);
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, 0);
+
 	r8153_power_cut_en(tp, false);
 	rtl_runtime_suspend_enable(tp, false);
 	r8153_u1u2en(tp, true);
-	r8153_mac_clk_spd(tp, false);
 	usb_enable_lpm(tp->udev);
 
 	ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CONFIG6);

From d65614a01d24704b016635abf5cc028a54e45a62 Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Date: Tue, 2 Mar 2021 17:19:32 +0800
Subject: [PATCH 094/164] net: 9p: advance iov on empty read

I met below warning when cating a small size(about 80bytes) txt file
on 9pfs(msize=2097152 is passed to 9p mount option), the reason is we
miss iov_iter_advance() if the read count is 0 for zerocopy case, so
we didn't truncate the pipe, then iov_iter_pipe() thinks the pipe is
full. Fix it by removing the exception for 0 to ensure to call
iov_iter_advance() even on empty read for zerocopy case.

[    8.279568] WARNING: CPU: 0 PID: 39 at lib/iov_iter.c:1203 iov_iter_pipe+0x31/0x40
[    8.280028] Modules linked in:
[    8.280561] CPU: 0 PID: 39 Comm: cat Not tainted 5.11.0+ #6
[    8.281260] RIP: 0010:iov_iter_pipe+0x31/0x40
[    8.281974] Code: 2b 42 54 39 42 5c 76 22 c7 07 20 00 00 00 48 89 57 18 8b 42 50 48 c7 47 08 b
[    8.283169] RSP: 0018:ffff888000cbbd80 EFLAGS: 00000246
[    8.283512] RAX: 0000000000000010 RBX: ffff888000117d00 RCX: 0000000000000000
[    8.283876] RDX: ffff88800031d600 RSI: 0000000000000000 RDI: ffff888000cbbd90
[    8.284244] RBP: ffff888000cbbe38 R08: 0000000000000000 R09: ffff8880008d2058
[    8.284605] R10: 0000000000000002 R11: ffff888000375510 R12: 0000000000000050
[    8.284964] R13: ffff888000cbbe80 R14: 0000000000000050 R15: ffff88800031d600
[    8.285439] FS:  00007f24fd8af600(0000) GS:ffff88803ec00000(0000) knlGS:0000000000000000
[    8.285844] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    8.286150] CR2: 00007f24fd7d7b90 CR3: 0000000000c97000 CR4: 00000000000406b0
[    8.286710] Call Trace:
[    8.288279]  generic_file_splice_read+0x31/0x1a0
[    8.289273]  ? do_splice_to+0x2f/0x90
[    8.289511]  splice_direct_to_actor+0xcc/0x220
[    8.289788]  ? pipe_to_sendpage+0xa0/0xa0
[    8.290052]  do_splice_direct+0x8b/0xd0
[    8.290314]  do_sendfile+0x1ad/0x470
[    8.290576]  do_syscall_64+0x2d/0x40
[    8.290818]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[    8.291409] RIP: 0033:0x7f24fd7dca0a
[    8.292511] Code: c3 0f 1f 80 00 00 00 00 4c 89 d2 4c 89 c6 e9 bd fd ff ff 0f 1f 44 00 00 31 8
[    8.293360] RSP: 002b:00007ffc20932818 EFLAGS: 00000206 ORIG_RAX: 0000000000000028
[    8.293800] RAX: ffffffffffffffda RBX: 0000000001000000 RCX: 00007f24fd7dca0a
[    8.294153] RDX: 0000000000000000 RSI: 0000000000000003 RDI: 0000000000000001
[    8.294504] RBP: 0000000000000003 R08: 0000000000000000 R09: 0000000000000000
[    8.294867] R10: 0000000001000000 R11: 0000000000000206 R12: 0000000000000003
[    8.295217] R13: 0000000000000001 R14: 0000000000000001 R15: 0000000000000000
[    8.295782] ---[ end trace 63317af81b3ca24b ]---

Signed-off-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/9p/client.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/net/9p/client.c b/net/9p/client.c
index 4f62f299da0c..0a9019da18f3 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1623,10 +1623,6 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to,
 	}
 
 	p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
-	if (!count) {
-		p9_tag_remove(clnt, req);
-		return 0;
-	}
 
 	if (non_zc) {
 		int n = copy_to_iter(dataptr, count, to);

From a9ecb0cbf03746b17a7c13bd8e3464e6789f73e8 Mon Sep 17 00:00:00 2001
From: zhang kai <zhangkaiheb@126.com>
Date: Tue, 2 Mar 2021 18:16:07 +0800
Subject: [PATCH 095/164] rtnetlink: using dev_base_seq from target net

Signed-off-by: zhang kai <zhangkaiheb@126.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 0edc0b2baaa4..1bdcb33fb561 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2147,7 +2147,7 @@ out:
 out_err:
 	cb->args[1] = idx;
 	cb->args[0] = h;
-	cb->seq = net->dev_base_seq;
+	cb->seq = tgt_net->dev_base_seq;
 	nl_dump_check_consistent(cb, nlmsg_hdr(skb));
 	if (netnsid >= 0)
 		put_net(tgt_net);

From 2888b080d05c819205bbfe52c624a639f44c266a Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 3 Mar 2021 23:58:27 +0100
Subject: [PATCH 096/164] netfilter: nftables: fix possible double hook
 unregistration with table owner

Skip hook unregistration of owner tables from the netns exit path,
nft_rcv_nl_event() unregisters the table hooks before tearing down
the table content.

Fixes: 6001a930ce03 ("netfilter: nftables: introduce table ownership")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index b07703e19108..796ce86ef7eb 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -9028,8 +9028,12 @@ static void __nft_release_hooks(struct net *net)
 {
 	struct nft_table *table;
 
-	list_for_each_entry(table, &net->nft.tables, list)
+	list_for_each_entry(table, &net->nft.tables, list) {
+		if (nft_table_has_owner(table))
+			continue;
+
 		__nft_release_hook(net, table);
+	}
 }
 
 static void __nft_release_table(struct net *net, struct nft_table *table)

From bd1777b3a88f98e223392221b330668458aac7f1 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 4 Mar 2021 04:00:09 +0100
Subject: [PATCH 097/164] netfilter: nftables: bogus check for netlink portID
 with table owner

The existing branch checks for 0 != table->nlpid which always evaluates
true for tables that have an owner.

Fixes: 6001a930ce03 ("netfilter: nftables: introduce table ownership")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 796ce86ef7eb..224c8e537cb3 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -9083,13 +9083,12 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
 	nf_tables_table_destroy(&ctx);
 }
 
-static void __nft_release_tables(struct net *net, u32 nlpid)
+static void __nft_release_tables(struct net *net)
 {
 	struct nft_table *table, *nt;
 
 	list_for_each_entry_safe(table, nt, &net->nft.tables, list) {
-		if (nft_table_has_owner(table) &&
-		    nlpid != table->nlpid)
+		if (nft_table_has_owner(table))
 			continue;
 
 		__nft_release_table(net, table);
@@ -9155,7 +9154,7 @@ static void __net_exit nf_tables_exit_net(struct net *net)
 	mutex_lock(&net->nft.commit_mutex);
 	if (!list_empty(&net->nft.commit_list))
 		__nf_tables_abort(net, NFNL_ABORT_NONE);
-	__nft_release_tables(net, 0);
+	__nft_release_tables(net);
 	mutex_unlock(&net->nft.commit_mutex);
 	WARN_ON_ONCE(!list_empty(&net->nft.tables));
 	WARN_ON_ONCE(!list_empty(&net->nft.module_list));

From c95c34f01bbda4421c25fdc9b04a4a4aab10d36c Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Wed, 3 Mar 2021 19:56:34 +0100
Subject: [PATCH 098/164] xsk: Remove dangling function declaration from header
 file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

xdp_umem_query() is dead for a long time, drop the declaration from
include/linux/netdevice.h

Fixes: c9b47cc1fabc ("xsk: fix bug when trying to use both copy and zero-copy on one queue id")
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/20210303185636.18070-2-maciej.fijalkowski@intel.com
---
 include/linux/netdevice.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f06fbee8638e..5b67ea89d5f2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3959,8 +3959,6 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
 u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode);
 
-int xdp_umem_query(struct net_device *dev, u16 queue_id);
-
 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 int dev_forward_skb_nomtu(struct net_device *dev, struct sk_buff *skb);

From 6bc6699881012b5bd5d49fa861a69a37fc01b49c Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Wed, 3 Mar 2021 19:56:35 +0100
Subject: [PATCH 099/164] samples, bpf: Add missing munmap in xdpsock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We mmap the umem region, but we never munmap it.
Add the missing call at the end of the cleanup.

Fixes: 3945b37a975d ("samples/bpf: use hugepages in xdpsock app")
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/20210303185636.18070-3-maciej.fijalkowski@intel.com
---
 samples/bpf/xdpsock_user.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index db0cb73513a5..1e2a1105d0e6 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -1699,5 +1699,7 @@ int main(int argc, char **argv)
 
 	xdpsock_cleanup();
 
+	munmap(bufs, NUM_FRAMES * opt_xsk_frame_size);
+
 	return 0;
 }

From 2b2aedabc44e9660f90ccf7ba1ca2706d75f411f Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Wed, 3 Mar 2021 19:56:36 +0100
Subject: [PATCH 100/164] libbpf: Clear map_info before each
 bpf_obj_get_info_by_fd
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

xsk_lookup_bpf_maps, based on prog_fd, looks whether current prog has a
reference to XSKMAP. BPF prog can include insns that work on various BPF
maps and this is covered by iterating through map_ids.

The bpf_map_info that is passed to bpf_obj_get_info_by_fd for filling
needs to be cleared at each iteration, so that it doesn't contain any
outdated fields and that is currently missing in the function of
interest.

To fix that, zero-init map_info via memset before each
bpf_obj_get_info_by_fd call.

Also, since the area of this code is touched, in general strcmp is
considered harmful, so let's convert it to strncmp and provide the
size of the array name for current map_info.

While at it, do s/continue/break/ once we have found the xsks_map to
terminate the search.

Fixes: 5750902a6e9b ("libbpf: proper XSKMAP cleanup")
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/20210303185636.18070-4-maciej.fijalkowski@intel.com
---
 tools/lib/bpf/xsk.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index ffbb588724d8..526fc35c0b23 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -610,15 +610,16 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
 		if (fd < 0)
 			continue;
 
+		memset(&map_info, 0, map_len);
 		err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len);
 		if (err) {
 			close(fd);
 			continue;
 		}
 
-		if (!strcmp(map_info.name, "xsks_map")) {
+		if (!strncmp(map_info.name, "xsks_map", sizeof(map_info.name))) {
 			ctx->xsks_map_fd = fd;
-			continue;
+			break;
 		}
 
 		close(fd);

From 83a2881903f3d5bc08ded4fb04f6e3bedb1fba65 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Mon, 1 Mar 2021 16:40:19 +0100
Subject: [PATCH 101/164] bpf: Account for BPF_FETCH in insn_has_def32()

insn_has_def32() returns false for 32-bit BPF_FETCH insns. This makes
adjust_insn_aux_data() incorrectly set zext_dst, as can be seen in [1].
This happens because insn_no_def() does not know about the BPF_FETCH
variants of BPF_STX.

Fix in two steps.

First, replace insn_no_def() with insn_def_regno(), which returns the
register an insn defines. Normally insn_no_def() calls are followed by
insn->dst_reg uses; replace those with the insn_def_regno() return
value.

Second, adjust the BPF_STX special case in is_reg64() to deal with
queries made from opt_subreg_zext_lo32_rnd_hi32(), where the state
information is no longer available. Add a comment, since the purpose
of this special case is not clear at first glance.

  [1] https://lore.kernel.org/bpf/20210223150845.1857620-1-jackmanb@google.com/

Fixes: 5ffa25502b5a ("bpf: Add instructions for atomic_[cmp]xchg")
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Brendan Jackman <jackmanb@google.com>
Link: https://lore.kernel.org/bpf/20210301154019.129110-1-iii@linux.ibm.com
---
 kernel/bpf/verifier.c | 70 ++++++++++++++++++++++++-------------------
 1 file changed, 39 insertions(+), 31 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 3d34ba492d46..bb3eaab934f3 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1703,7 +1703,11 @@ static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	}
 
 	if (class == BPF_STX) {
-		if (reg->type != SCALAR_VALUE)
+		/* BPF_STX (including atomic variants) has multiple source
+		 * operands, one of which is a ptr. Check whether the caller is
+		 * asking about it.
+		 */
+		if (t == SRC_OP && reg->type != SCALAR_VALUE)
 			return true;
 		return BPF_SIZE(code) == BPF_DW;
 	}
@@ -1735,22 +1739,38 @@ static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	return true;
 }
 
-/* Return TRUE if INSN doesn't have explicit value define. */
-static bool insn_no_def(struct bpf_insn *insn)
+/* Return the regno defined by the insn, or -1. */
+static int insn_def_regno(const struct bpf_insn *insn)
 {
-	u8 class = BPF_CLASS(insn->code);
-
-	return (class == BPF_JMP || class == BPF_JMP32 ||
-		class == BPF_STX || class == BPF_ST);
+	switch (BPF_CLASS(insn->code)) {
+	case BPF_JMP:
+	case BPF_JMP32:
+	case BPF_ST:
+		return -1;
+	case BPF_STX:
+		if (BPF_MODE(insn->code) == BPF_ATOMIC &&
+		    (insn->imm & BPF_FETCH)) {
+			if (insn->imm == BPF_CMPXCHG)
+				return BPF_REG_0;
+			else
+				return insn->src_reg;
+		} else {
+			return -1;
+		}
+	default:
+		return insn->dst_reg;
+	}
 }
 
 /* Return TRUE if INSN has defined any 32-bit value explicitly. */
 static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
-	if (insn_no_def(insn))
+	int dst_reg = insn_def_regno(insn);
+
+	if (dst_reg == -1)
 		return false;
 
-	return !is_reg64(env, insn, insn->dst_reg, NULL, DST_OP);
+	return !is_reg64(env, insn, dst_reg, NULL, DST_OP);
 }
 
 static void mark_insn_zext(struct bpf_verifier_env *env,
@@ -11006,9 +11026,10 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
 	for (i = 0; i < len; i++) {
 		int adj_idx = i + delta;
 		struct bpf_insn insn;
-		u8 load_reg;
+		int load_reg;
 
 		insn = insns[adj_idx];
+		load_reg = insn_def_regno(&insn);
 		if (!aux[adj_idx].zext_dst) {
 			u8 code, class;
 			u32 imm_rnd;
@@ -11018,14 +11039,14 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
 
 			code = insn.code;
 			class = BPF_CLASS(code);
-			if (insn_no_def(&insn))
+			if (load_reg == -1)
 				continue;
 
 			/* NOTE: arg "reg" (the fourth one) is only used for
-			 *       BPF_STX which has been ruled out in above
-			 *       check, it is safe to pass NULL here.
+			 *       BPF_STX + SRC_OP, so it is safe to pass NULL
+			 *       here.
 			 */
-			if (is_reg64(env, &insn, insn.dst_reg, NULL, DST_OP)) {
+			if (is_reg64(env, &insn, load_reg, NULL, DST_OP)) {
 				if (class == BPF_LD &&
 				    BPF_MODE(code) == BPF_IMM)
 					i++;
@@ -11040,7 +11061,7 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
 			imm_rnd = get_random_int();
 			rnd_hi32_patch[0] = insn;
 			rnd_hi32_patch[1].imm = imm_rnd;
-			rnd_hi32_patch[3].dst_reg = insn.dst_reg;
+			rnd_hi32_patch[3].dst_reg = load_reg;
 			patch = rnd_hi32_patch;
 			patch_len = 4;
 			goto apply_patch_buffer;
@@ -11049,22 +11070,9 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
 		if (!bpf_jit_needs_zext())
 			continue;
 
-		/* zext_dst means that we want to zero-extend whatever register
-		 * the insn defines, which is dst_reg most of the time, with
-		 * the notable exception of BPF_STX + BPF_ATOMIC + BPF_FETCH.
-		 */
-		if (BPF_CLASS(insn.code) == BPF_STX &&
-		    BPF_MODE(insn.code) == BPF_ATOMIC) {
-			/* BPF_STX + BPF_ATOMIC insns without BPF_FETCH do not
-			 * define any registers, therefore zext_dst cannot be
-			 * set.
-			 */
-			if (WARN_ON(!(insn.imm & BPF_FETCH)))
-				return -EINVAL;
-			load_reg = insn.imm == BPF_CMPXCHG ? BPF_REG_0
-							   : insn.src_reg;
-		} else {
-			load_reg = insn.dst_reg;
+		if (WARN_ON(load_reg == -1)) {
+			verbose(env, "verifier bug. zext_dst is set, but no reg is defined\n");
+			return -EFAULT;
 		}
 
 		zext_patch[0] = insn;

From d785e1fec60179f534fbe8d006c890e5ad186e51 Mon Sep 17 00:00:00 2001
From: Antony Antony <antony@phenome.org>
Date: Wed, 14 Oct 2020 16:17:48 +0200
Subject: [PATCH 102/164] ixgbe: fail to create xfrm offload of IPsec tunnel
 mode SA

Based on talks and indirect references ixgbe IPsec offlod do not
support IPsec tunnel mode offload. It can only support IPsec transport
mode offload. Now explicitly fail when creating non transport mode SA
with offload to avoid false performance expectations.

Fixes: 63a67fe229ea ("ixgbe: add ipsec offload add and remove SA")
Signed-off-by: Antony Antony <antony@phenome.org>
Acked-by: Shannon Nelson <snelson@pensando.io>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 5 +++++
 drivers/net/ethernet/intel/ixgbevf/ipsec.c     | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index eca73526ac86..54d47265a7ac 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -575,6 +575,11 @@ static int ixgbe_ipsec_add_sa(struct xfrm_state *xs)
 		return -EINVAL;
 	}
 
+	if (xs->props.mode != XFRM_MODE_TRANSPORT) {
+		netdev_err(dev, "Unsupported mode for ipsec offload\n");
+		return -EINVAL;
+	}
+
 	if (ixgbe_ipsec_check_mgmt_ip(xs)) {
 		netdev_err(dev, "IPsec IP addr clash with mgmt filters\n");
 		return -EINVAL;
diff --git a/drivers/net/ethernet/intel/ixgbevf/ipsec.c b/drivers/net/ethernet/intel/ixgbevf/ipsec.c
index 5170dd9d8705..caaea2c920a6 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ipsec.c
@@ -272,6 +272,11 @@ static int ixgbevf_ipsec_add_sa(struct xfrm_state *xs)
 		return -EINVAL;
 	}
 
+	if (xs->props.mode != XFRM_MODE_TRANSPORT) {
+		netdev_err(dev, "Unsupported mode for ipsec offload\n");
+		return -EINVAL;
+	}
+
 	if (xs->xso.flags & XFRM_OFFLOAD_INBOUND) {
 		struct rx_sa rsa;
 

From 7a766381634da19fc837619b0a34590498d9d29a Mon Sep 17 00:00:00 2001
From: Dinghao Liu <dinghao.liu@zju.edu.cn>
Date: Sun, 3 Jan 2021 16:08:42 +0800
Subject: [PATCH 103/164] ixgbe: Fix memleak in ixgbe_configure_clsu32

When ixgbe_fdir_write_perfect_filter_82599() fails,
input allocated by kzalloc() has not been freed,
which leads to memleak.

Signed-off-by: Dinghao Liu <dinghao.liu@zju.edu.cn>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index fae84202d870..9f3f12e2ccf2 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -9565,8 +9565,10 @@ static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter,
 	ixgbe_atr_compute_perfect_hash_82599(&input->filter, mask);
 	err = ixgbe_fdir_write_perfect_filter_82599(hw, &input->filter,
 						    input->sw_idx, queue);
-	if (!err)
-		ixgbe_update_ethtool_fdir_entry(adapter, input, input->sw_idx);
+	if (err)
+		goto err_out_w_lock;
+
+	ixgbe_update_ethtool_fdir_entry(adapter, input, input->sw_idx);
 	spin_unlock(&adapter->fdir_perfect_lock);
 
 	if ((uhtid != 0x800) && (adapter->jump_tables[uhtid]))

From d93ef301644ee82925bce1d57fdfe70475dc0bae Mon Sep 17 00:00:00 2001
From: Drew Fustini <drew@beagleboard.org>
Date: Wed, 3 Mar 2021 21:55:49 -0800
Subject: [PATCH 104/164] net: sctp: trivial: fix typo in comment

Fix typo of 'overflow' for comment in sctp_tsnmap_check().

Reported-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Drew Fustini <drew@beagleboard.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/tsnmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c
index a9c6af5795d8..5ba456727f63 100644
--- a/net/sctp/tsnmap.c
+++ b/net/sctp/tsnmap.c
@@ -75,7 +75,7 @@ int sctp_tsnmap_check(const struct sctp_tsnmap *map, __u32 tsn)
 		return 1;
 
 	/* Verify that we can hold this TSN and that it will not
-	 * overlfow our map
+	 * overflow our map
 	 */
 	if (!TSN_lt(tsn, map->base_tsn + SCTP_TSN_MAP_SIZE))
 		return -1;

From 76c03bf8e2624076b88d93542d78e22d5345c88e Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 4 Mar 2021 10:57:53 +0200
Subject: [PATCH 105/164] nexthop: Do not flush blackhole nexthops when
 loopback goes down

As far as user space is concerned, blackhole nexthops do not have a
nexthop device and therefore should not be affected by the
administrative or carrier state of any netdev.

However, when the loopback netdev goes down all the blackhole nexthops
are flushed. This happens because internally the kernel associates
blackhole nexthops with the loopback netdev.

This behavior is both confusing to those not familiar with kernel
internals and also diverges from the legacy API where blackhole IPv4
routes are not flushed when the loopback netdev goes down:

 # ip route add blackhole 198.51.100.0/24
 # ip link set dev lo down
 # ip route show 198.51.100.0/24
 blackhole 198.51.100.0/24

Blackhole IPv6 routes are flushed, but at least user space knows that
they are associated with the loopback netdev:

 # ip -6 route show 2001:db8:1::/64
 blackhole 2001:db8:1::/64 dev lo metric 1024 pref medium

Fix this by only flushing blackhole nexthops when the loopback netdev is
unregistered.

Fixes: ab84be7e54fc ("net: Initial nexthop code")
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reported-by: Donald Sharp <sharpd@nvidia.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/nexthop.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index f1c6cbdb9e43..743777bce179 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -1399,7 +1399,7 @@ out:
 
 /* rtnl */
 /* remove all nexthops tied to a device being deleted */
-static void nexthop_flush_dev(struct net_device *dev)
+static void nexthop_flush_dev(struct net_device *dev, unsigned long event)
 {
 	unsigned int hash = nh_dev_hashfn(dev->ifindex);
 	struct net *net = dev_net(dev);
@@ -1411,6 +1411,10 @@ static void nexthop_flush_dev(struct net_device *dev)
 		if (nhi->fib_nhc.nhc_dev != dev)
 			continue;
 
+		if (nhi->reject_nh &&
+		    (event == NETDEV_DOWN || event == NETDEV_CHANGE))
+			continue;
+
 		remove_nexthop(net, nhi->nh_parent, NULL);
 	}
 }
@@ -2189,11 +2193,11 @@ static int nh_netdev_event(struct notifier_block *this,
 	switch (event) {
 	case NETDEV_DOWN:
 	case NETDEV_UNREGISTER:
-		nexthop_flush_dev(dev);
+		nexthop_flush_dev(dev, event);
 		break;
 	case NETDEV_CHANGE:
 		if (!(dev_get_flags(dev) & (IFF_RUNNING | IFF_LOWER_UP)))
-			nexthop_flush_dev(dev);
+			nexthop_flush_dev(dev, event);
 		break;
 	case NETDEV_CHANGEMTU:
 		info_ext = ptr;

From 3a1099d3147f391fa11320a759bbcb1bb857fca1 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 4 Mar 2021 10:57:54 +0200
Subject: [PATCH 106/164] selftests: fib_nexthops: Test blackhole nexthops when
 loopback goes down

Test that blackhole nexthops are not flushed when the loopback device
goes down.

Output without previous patch:

 # ./fib_nexthops.sh -t basic

 Basic functional tests
 ----------------------
 TEST: List with nothing defined                                     [ OK ]
 TEST: Nexthop get on non-existent id                                [ OK ]
 TEST: Nexthop with no device or gateway                             [ OK ]
 TEST: Nexthop with down device                                      [ OK ]
 TEST: Nexthop with device that is linkdown                          [ OK ]
 TEST: Nexthop with device only                                      [ OK ]
 TEST: Nexthop with duplicate id                                     [ OK ]
 TEST: Blackhole nexthop                                             [ OK ]
 TEST: Blackhole nexthop with other attributes                       [ OK ]
 TEST: Blackhole nexthop with loopback device down                   [FAIL]
 TEST: Create group                                                  [ OK ]
 TEST: Create group with blackhole nexthop                           [FAIL]
 TEST: Create multipath group where 1 path is a blackhole            [ OK ]
 TEST: Multipath group can not have a member replaced by blackhole   [ OK ]
 TEST: Create group with non-existent nexthop                        [ OK ]
 TEST: Create group with same nexthop multiple times                 [ OK ]
 TEST: Replace nexthop with nexthop group                            [ OK ]
 TEST: Replace nexthop group with nexthop                            [ OK ]
 TEST: Nexthop group and device                                      [ OK ]
 TEST: Test proto flush                                              [ OK ]
 TEST: Nexthop group and blackhole                                   [ OK ]

 Tests passed:  19
 Tests failed:   2

Output with previous patch:

 # ./fib_nexthops.sh -t basic

 Basic functional tests
 ----------------------
 TEST: List with nothing defined                                     [ OK ]
 TEST: Nexthop get on non-existent id                                [ OK ]
 TEST: Nexthop with no device or gateway                             [ OK ]
 TEST: Nexthop with down device                                      [ OK ]
 TEST: Nexthop with device that is linkdown                          [ OK ]
 TEST: Nexthop with device only                                      [ OK ]
 TEST: Nexthop with duplicate id                                     [ OK ]
 TEST: Blackhole nexthop                                             [ OK ]
 TEST: Blackhole nexthop with other attributes                       [ OK ]
 TEST: Blackhole nexthop with loopback device down                   [ OK ]
 TEST: Create group                                                  [ OK ]
 TEST: Create group with blackhole nexthop                           [ OK ]
 TEST: Create multipath group where 1 path is a blackhole            [ OK ]
 TEST: Multipath group can not have a member replaced by blackhole   [ OK ]
 TEST: Create group with non-existent nexthop                        [ OK ]
 TEST: Create group with same nexthop multiple times                 [ OK ]
 TEST: Replace nexthop with nexthop group                            [ OK ]
 TEST: Replace nexthop group with nexthop                            [ OK ]
 TEST: Nexthop group and device                                      [ OK ]
 TEST: Test proto flush                                              [ OK ]
 TEST: Nexthop group and blackhole                                   [ OK ]

 Tests passed:  21
 Tests failed:   0

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_nexthops.sh | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 4c7d33618437..d98fb85e201c 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -1524,6 +1524,14 @@ basic()
 	run_cmd "$IP nexthop replace id 2 blackhole dev veth1"
 	log_test $? 2 "Blackhole nexthop with other attributes"
 
+	# blackhole nexthop should not be affected by the state of the loopback
+	# device
+	run_cmd "$IP link set dev lo down"
+	check_nexthop "id 2" "id 2 blackhole"
+	log_test $? 0 "Blackhole nexthop with loopback device down"
+
+	run_cmd "$IP link set dev lo up"
+
 	#
 	# groups
 	#

From f1becbed411c6fa29d7ce3def3a1dcd4f63f2d74 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 4 Mar 2021 12:29:43 +0200
Subject: [PATCH 107/164] net: mscc: ocelot: properly reject destination IP
 keys in VCAP IS1

An attempt is made to warn the user about the fact that VCAP IS1 cannot
offload keys matching on destination IP (at least given the current half
key format), but sadly that warning fails miserably in practice, due to
the fact that it operates on an uninitialized "match" variable. We must
first decode the keys from the flow rule.

Fixes: 75944fda1dfe ("net: mscc: ocelot: offload ingress skbedit and vlan actions to VCAP IS1")
Reported-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot_flower.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c
index c3ac026f6aea..a41b458b1b3e 100644
--- a/drivers/net/ethernet/mscc/ocelot_flower.c
+++ b/drivers/net/ethernet/mscc/ocelot_flower.c
@@ -540,13 +540,14 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress,
 			return -EOPNOTSUPP;
 		}
 
+		flow_rule_match_ipv4_addrs(rule, &match);
+
 		if (filter->block_id == VCAP_IS1 && *(u32 *)&match.mask->dst) {
 			NL_SET_ERR_MSG_MOD(extack,
 					   "Key type S1_NORMAL cannot match on destination IP");
 			return -EOPNOTSUPP;
 		}
 
-		flow_rule_match_ipv4_addrs(rule, &match);
 		tmp = &filter->key.ipv4.sip.value.addr[0];
 		memcpy(tmp, &match.key->src, 4);
 

From 053d8ad10d585adf9891fcd049637536e2fe9ea7 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 4 Mar 2021 12:56:53 +0200
Subject: [PATCH 108/164] net: dsa: sja1105: fix SGMII PCS being forced to
 SPEED_UNKNOWN instead of SPEED_10

When using MLO_AN_PHY or MLO_AN_FIXED, the MII_BMCR of the SGMII PCS is
read before resetting the switch so it can be reprogrammed afterwards.
This works for the speeds of 1Gbps and 100Mbps, but not for 10Mbps,
because SPEED_10 is actually 0, so AND-ing anything with 0 is false,
therefore that last branch is dead code.

Do what others do (genphy_read_status_fixed, phy_mii_ioctl) and just
remove the check for SPEED_10, let it fall into the default case.

Fixes: ffe10e679cec ("net: dsa: sja1105: Add support for the SGMII port")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 7692338730df..c1982615c631 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1922,7 +1922,7 @@ out_unlock_ptp:
 				speed = SPEED_1000;
 			else if (bmcr & BMCR_SPEED100)
 				speed = SPEED_100;
-			else if (bmcr & BMCR_SPEED10)
+			else
 				speed = SPEED_10;
 
 			sja1105_sgmii_pcs_force_speed(priv, speed);

From 6a5166e07c029182ee0e15c1a97b08c3179b2aaf Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 4 Mar 2021 12:56:54 +0200
Subject: [PATCH 109/164] net: dsa: sja1105: fix ucast/bcast flooding always
 remaining enabled

In the blamed patch I managed to introduce a bug while moving code
around: the same logic is applied to the ucast_egress_floods and
bcast_egress_floods variables both on the "if" and the "else" branches.

This is clearly an unintended change compared to how the code used to be
prior to that bugfix, so restore it.

Fixes: 7f7ccdea8c73 ("net: dsa: sja1105: fix leakage of flooded frames outside bridging domain")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index c1982615c631..51ea104c63bb 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -3369,14 +3369,14 @@ static int sja1105_port_ucast_bcast_flood(struct sja1105_private *priv, int to,
 		if (flags.val & BR_FLOOD)
 			priv->ucast_egress_floods |= BIT(to);
 		else
-			priv->ucast_egress_floods |= BIT(to);
+			priv->ucast_egress_floods &= ~BIT(to);
 	}
 
 	if (flags.mask & BR_BCAST_FLOOD) {
 		if (flags.val & BR_BCAST_FLOOD)
 			priv->bcast_egress_floods |= BIT(to);
 		else
-			priv->bcast_egress_floods |= BIT(to);
+			priv->bcast_egress_floods &= ~BIT(to);
 	}
 
 	return sja1105_manage_flood_domains(priv);

From 6c59cff38e66584ae3ac6c2f0cbd8d039c710ba7 Mon Sep 17 00:00:00 2001
From: Daniele Palmas <dnlplm@gmail.com>
Date: Thu, 4 Mar 2021 14:15:13 +0100
Subject: [PATCH 110/164] net: usb: qmi_wwan: allow qmimux add/del with master
 up
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There's no reason for preventing the creation and removal
of qmimux network interfaces when the underlying interface
is up.

This makes qmi_wwan mux implementation more similar to the
rmnet one, simplifying userspace management of the same
logical interfaces.

Fixes: c6adf77953bc ("net: usb: qmi_wwan: add qmap mux protocol support")
Reported-by: Aleksander Morgado <aleksander@aleksander.es>
Signed-off-by: Daniele Palmas <dnlplm@gmail.com>
Acked-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/qmi_wwan.c | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 17a050521b86..6700f1970b24 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -429,13 +429,6 @@ static ssize_t add_mux_store(struct device *d,  struct device_attribute *attr, c
 		goto err;
 	}
 
-	/* we don't want to modify a running netdev */
-	if (netif_running(dev->net)) {
-		netdev_err(dev->net, "Cannot change a running device\n");
-		ret = -EBUSY;
-		goto err;
-	}
-
 	ret = qmimux_register_device(dev->net, mux_id);
 	if (!ret) {
 		info->flags |= QMI_WWAN_FLAG_MUX;
@@ -465,13 +458,6 @@ static ssize_t del_mux_store(struct device *d,  struct device_attribute *attr, c
 	if (!rtnl_trylock())
 		return restart_syscall();
 
-	/* we don't want to modify a running netdev */
-	if (netif_running(dev->net)) {
-		netdev_err(dev->net, "Cannot change a running device\n");
-		ret = -EBUSY;
-		goto err;
-	}
-
 	del_dev = qmimux_find_dev(dev, mux_id);
 	if (!del_dev) {
 		netdev_err(dev->net, "mux_id not present\n");

From bfc2560563586372212b0a8aeca7428975fa91fe Mon Sep 17 00:00:00 2001
From: Maximilian Heyne <mheyne@amazon.de>
Date: Thu, 4 Mar 2021 14:43:17 +0000
Subject: [PATCH 111/164] net: sched: avoid duplicates in classes dump

This is a follow up of commit ea3274695353 ("net: sched: avoid
duplicates in qdisc dump") which has fixed the issue only for the qdisc
dump.

The duplicate printing also occurs when dumping the classes via
  tc class show dev eth0

Fixes: 59cc1f61f09c ("net: sched: convert qdisc linked list to hashtable")
Signed-off-by: Maximilian Heyne <mheyne@amazon.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index e2e4353db8a7..f87d07736a14 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -2168,7 +2168,7 @@ static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
 
 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
 			       struct tcmsg *tcm, struct netlink_callback *cb,
-			       int *t_p, int s_t)
+			       int *t_p, int s_t, bool recur)
 {
 	struct Qdisc *q;
 	int b;
@@ -2179,7 +2179,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
 	if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
 		return -1;
 
-	if (!qdisc_dev(root))
+	if (!qdisc_dev(root) || !recur)
 		return 0;
 
 	if (tcm->tcm_parent) {
@@ -2214,13 +2214,13 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 	s_t = cb->args[0];
 	t = 0;
 
-	if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
+	if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t, true) < 0)
 		goto done;
 
 	dev_queue = dev_ingress_queue(dev);
 	if (dev_queue &&
 	    tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
-				&t, s_t) < 0)
+				&t, s_t, false) < 0)
 		goto done;
 
 done:

From e0be4931f3fee2e04dec4013ea4f27ec2db8556f Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 4 Mar 2021 13:32:08 -0800
Subject: [PATCH 112/164] mptcp: reset last_snd on subflow close

Send logic caches last active subflow in the msk, so it needs to be
cleared when the cached subflow is closed.

Fixes: d5f49190def61c ("mptcp: allow picking different xmit subflows")
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/155
Reported-by: Christoph Paasch <cpaasch@apple.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index c5d5e68940ea..7362a536cbc0 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2126,6 +2126,8 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
 static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 			      struct mptcp_subflow_context *subflow)
 {
+	struct mptcp_sock *msk = mptcp_sk(sk);
+
 	list_del(&subflow->node);
 
 	lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
@@ -2154,6 +2156,9 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 	release_sock(ssk);
 
 	sock_put(ssk);
+
+	if (ssk == msk->last_snd)
+		msk->last_snd = NULL;
 }
 
 void mptcp_close_ssk(struct sock *sk, struct sock *ssk,

From f07157792c633b528de5fc1dbe2e4ea54f8e09d4 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 4 Mar 2021 13:32:09 -0800
Subject: [PATCH 113/164] mptcp: put subflow sock on connect error

mptcp_add_pending_subflow() performs a sock_hold() on the subflow,
then adds the subflow to the join list.

Without a sock_put the subflow sk won't be freed in case connect() fails.

unreferenced object 0xffff88810c03b100 (size 3000):
[..]
    sk_prot_alloc.isra.0+0x2f/0x110
    sk_alloc+0x5d/0xc20
    inet6_create+0x2b7/0xd30
    __sock_create+0x17f/0x410
    mptcp_subflow_create_socket+0xff/0x9c0
    __mptcp_subflow_connect+0x1da/0xaf0
    mptcp_pm_nl_work+0x6e0/0x1120
    mptcp_worker+0x508/0x9a0

Fixes: 5b950ff4331ddda ("mptcp: link MPC subflow into msk only after accept")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/subflow.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index e1fbcab257e6..41695e26c374 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1297,6 +1297,7 @@ failed_unlink:
 	spin_lock_bh(&msk->join_list_lock);
 	list_del(&subflow->node);
 	spin_unlock_bh(&msk->join_list_lock);
+	sock_put(mptcp_subflow_tcp_sock(subflow));
 
 failed:
 	subflow->disposable = 1;

From eaeef1ce55ec9161e0c44ff27017777b1644b421 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 4 Mar 2021 13:32:10 -0800
Subject: [PATCH 114/164] mptcp: fix memory accounting on allocation error

In case of memory pressure the MPTCP xmit path keeps
at most a single skb in the tx cache, eventually freeing
additional ones.

The associated counter for forward memory is not update
accordingly, and that causes the following splat:

WARNING: CPU: 0 PID: 12 at net/core/stream.c:208 sk_stream_kill_queues+0x3ca/0x530 net/core/stream.c:208
Modules linked in:
CPU: 0 PID: 12 Comm: kworker/0:1 Not tainted 5.11.0-rc2 #59
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
Workqueue: events mptcp_worker
RIP: 0010:sk_stream_kill_queues+0x3ca/0x530 net/core/stream.c:208
Code: 03 0f b6 04 02 84 c0 74 08 3c 03 0f 8e 63 01 00 00 8b ab 00 01 00 00 e9 60 ff ff ff e8 2f 24 d3 fe 0f 0b eb 97 e8 26 24 d3 fe <0f> 0b eb a0 e8 1d 24 d3 fe 0f 0b e9 a5 fe ff ff 4c 89 e7 e8 0e d0
RSP: 0018:ffffc900000c7bc8 EFLAGS: 00010293
RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
RDX: ffff88810030ac40 RSI: ffffffff8262ca4a RDI: 0000000000000003
RBP: 0000000000000d00 R08: 0000000000000000 R09: ffffffff85095aa7
R10: ffffffff8262c9ea R11: 0000000000000001 R12: ffff888108908100
R13: ffffffff85095aa0 R14: ffffc900000c7c48 R15: 1ffff92000018f85
FS:  0000000000000000(0000) GS:ffff88811b200000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fa7444baef8 CR3: 0000000035ee9005 CR4: 0000000000170ef0
Call Trace:
 __mptcp_destroy_sock+0x4a7/0x6c0 net/mptcp/protocol.c:2547
 mptcp_worker+0x7dd/0x1610 net/mptcp/protocol.c:2272
 process_one_work+0x896/0x1170 kernel/workqueue.c:2275
 worker_thread+0x605/0x1350 kernel/workqueue.c:2421
 kthread+0x344/0x410 kernel/kthread.c:292
 ret_from_fork+0x22/0x30 arch/x86/entry/entry_64.S:296

At close time, as reported by syzkaller/Christoph.

This change address the issue properly updating the fwd
allocated memory counter in the error path.

Reported-by: Christoph Paasch <cpaasch@apple.com>
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/136
Fixes: 724cfd2ee8aa ("mptcp: allocate TX skbs in msk context")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 7362a536cbc0..aa59101ffe54 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1189,6 +1189,7 @@ static bool mptcp_tx_cache_refill(struct sock *sk, int size,
 			 */
 			while (skbs->qlen > 1) {
 				skb = __skb_dequeue_tail(skbs);
+				*total_ts -= skb->truesize;
 				__kfree_skb(skb);
 			}
 			return skbs->qlen > 0;

From 17aee05dc8822e354f5ad2d68ee39e3ba4b6acf2 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 4 Mar 2021 13:32:11 -0800
Subject: [PATCH 115/164] mptcp: dispose initial struct socket when its subflow
 is closed

Christoph Paasch reported following crash:
dst_release underflow
WARNING: CPU: 0 PID: 1319 at net/core/dst.c:175 dst_release+0xc1/0xd0 net/core/dst.c:175
CPU: 0 PID: 1319 Comm: syz-executor217 Not tainted 5.11.0-rc6af8e85128b4d0d24083c5cac646e891227052e0c #70
Call Trace:
 rt_cache_route+0x12e/0x140 net/ipv4/route.c:1503
 rt_set_nexthop.constprop.0+0x1fc/0x590 net/ipv4/route.c:1612
 __mkroute_output net/ipv4/route.c:2484 [inline]
...

The worker leaves msk->subflow alone even when it
happened to close the subflow ssk associated with it.

Fixes: 866f26f2a9c33b ("mptcp: always graft subflow socket to parent")
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/157
Reported-by: Christoph Paasch <cpaasch@apple.com>
Suggested-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index aa59101ffe54..a58da04bed71 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2116,6 +2116,14 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
 	return backup;
 }
 
+static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk)
+{
+	if (msk->subflow) {
+		iput(SOCK_INODE(msk->subflow));
+		msk->subflow = NULL;
+	}
+}
+
 /* subflow sockets can be either outgoing (connect) or incoming
  * (accept).
  *
@@ -2160,6 +2168,9 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 
 	if (ssk == msk->last_snd)
 		msk->last_snd = NULL;
+
+	if (msk->subflow && ssk == msk->subflow->sk)
+		mptcp_dispose_initial_subflow(msk);
 }
 
 void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
@@ -2529,12 +2540,6 @@ static void __mptcp_destroy_sock(struct sock *sk)
 
 	might_sleep();
 
-	/* dispose the ancillatory tcp socket, if any */
-	if (msk->subflow) {
-		iput(SOCK_INODE(msk->subflow));
-		msk->subflow = NULL;
-	}
-
 	/* be sure to always acquire the join list lock, to sync vs
 	 * mptcp_finish_join().
 	 */
@@ -2559,6 +2564,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
 	sk_stream_kill_queues(sk);
 	xfrm_sk_free_policy(sk);
 	sk_refcnt_debug_release(sk);
+	mptcp_dispose_initial_subflow(msk);
 	sock_put(sk);
 }
 

From c8fe62f0768cc9378103fc89fb96804645f527c9 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 4 Mar 2021 13:32:12 -0800
Subject: [PATCH 116/164] mptcp: reset 'first' and ack_hint on subflow close

Just like with last_snd, we have to NULL 'first' on subflow close.

ack_hint isn't strictly required (its never dereferenced), but better to
clear this explicitly as well instead of making it an exception.

msk->first is dereferenced unconditionally at accept time, but
at that point the ssk is not on the conn_list yet -- this means
worker can't see it when iterating the conn_list.

Reported-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index a58da04bed71..3dcb564b03ad 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2169,6 +2169,12 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 	if (ssk == msk->last_snd)
 		msk->last_snd = NULL;
 
+	if (ssk == msk->ack_hint)
+		msk->ack_hint = NULL;
+
+	if (ssk == msk->first)
+		msk->first = NULL;
+
 	if (msk->subflow && ssk == msk->subflow->sk)
 		mptcp_dispose_initial_subflow(msk);
 }
@@ -3297,6 +3303,9 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
 		/* PM/worker can now acquire the first subflow socket
 		 * lock without racing with listener queue cleanup,
 		 * we can notify it, if needed.
+		 *
+		 * Even if remote has reset the initial subflow by now
+		 * the refcnt is still at least one.
 		 */
 		subflow = mptcp_subflow_ctx(msk->first);
 		list_add(&subflow->node, &msk->conn_list);

From 2948d0a1e5aedc789fed27a4473040b6db741426 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 4 Mar 2021 13:32:13 -0800
Subject: [PATCH 117/164] mptcp: factor out __mptcp_retrans helper()

Will simplify the following patch, no functional change
intended.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 93 ++++++++++++++++++++++++--------------------
 1 file changed, 50 insertions(+), 43 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 3dcb564b03ad..67aaf7154dca 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2261,14 +2261,58 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
 	mptcp_close_wake_up(sk);
 }
 
-static void mptcp_worker(struct work_struct *work)
+static void __mptcp_retrans(struct sock *sk)
 {
-	struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
-	struct sock *ssk, *sk = &msk->sk.icsk_inet.sk;
+	struct mptcp_sock *msk = mptcp_sk(sk);
 	struct mptcp_sendmsg_info info = {};
 	struct mptcp_data_frag *dfrag;
 	size_t copied = 0;
-	int state, ret;
+	struct sock *ssk;
+	int ret;
+
+	__mptcp_clean_una(sk);
+	dfrag = mptcp_rtx_head(sk);
+	if (!dfrag)
+		return;
+
+	ssk = mptcp_subflow_get_retrans(msk);
+	if (!ssk)
+		goto reset_timer;
+
+	lock_sock(ssk);
+
+	/* limit retransmission to the bytes already sent on some subflows */
+	info.sent = 0;
+	info.limit = dfrag->already_sent;
+	while (info.sent < dfrag->already_sent) {
+		if (!mptcp_alloc_tx_skb(sk, ssk))
+			break;
+
+		ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
+		if (ret <= 0)
+			break;
+
+		MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RETRANSSEGS);
+		copied += ret;
+		info.sent += ret;
+	}
+	if (copied)
+		tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
+			 info.size_goal);
+
+	mptcp_set_timeout(sk, ssk);
+	release_sock(ssk);
+
+reset_timer:
+	if (!mptcp_timer_pending(sk))
+		mptcp_reset_timer(sk);
+}
+
+static void mptcp_worker(struct work_struct *work)
+{
+	struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
+	struct sock *sk = &msk->sk.icsk_inet.sk;
+	int state;
 
 	lock_sock(sk);
 	state = sk->sk_state;
@@ -2303,45 +2347,8 @@ static void mptcp_worker(struct work_struct *work)
 	if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
 		__mptcp_close_subflow(msk);
 
-	if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
-		goto unlock;
-
-	__mptcp_clean_una(sk);
-	dfrag = mptcp_rtx_head(sk);
-	if (!dfrag)
-		goto unlock;
-
-	ssk = mptcp_subflow_get_retrans(msk);
-	if (!ssk)
-		goto reset_unlock;
-
-	lock_sock(ssk);
-
-	/* limit retransmission to the bytes already sent on some subflows */
-	info.sent = 0;
-	info.limit = dfrag->already_sent;
-	while (info.sent < dfrag->already_sent) {
-		if (!mptcp_alloc_tx_skb(sk, ssk))
-			break;
-
-		ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
-		if (ret <= 0)
-			break;
-
-		MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RETRANSSEGS);
-		copied += ret;
-		info.sent += ret;
-	}
-	if (copied)
-		tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
-			 info.size_goal);
-
-	mptcp_set_timeout(sk, ssk);
-	release_sock(ssk);
-
-reset_unlock:
-	if (!mptcp_timer_pending(sk))
-		mptcp_reset_timer(sk);
+	if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
+		__mptcp_retrans(sk);
 
 unlock:
 	release_sock(sk);

From c2e6048fa1cf2228063aec299f93ac6eb256b457 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 4 Mar 2021 13:32:14 -0800
Subject: [PATCH 118/164] mptcp: fix race in release_cb

If we receive a MPTCP_PUSH_PENDING even from a subflow when
mptcp_release_cb() is serving the previous one, the latter
will be delayed up to the next release_sock(msk).

Address the issue implementing a test/serve loop for such
event.

Additionally rename the push helper to __mptcp_push_pending()
to be more consistent with the existing code.

Fixes: 6e628cd3a8f7 ("mptcp: use mptcp release_cb for delayed tasks")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 33 +++++++++++++++++++++------------
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 67aaf7154dca..d2a2169e6d9e 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1445,7 +1445,7 @@ static void mptcp_push_release(struct sock *sk, struct sock *ssk,
 	release_sock(ssk);
 }
 
-static void mptcp_push_pending(struct sock *sk, unsigned int flags)
+static void __mptcp_push_pending(struct sock *sk, unsigned int flags)
 {
 	struct sock *prev_ssk = NULL, *ssk = NULL;
 	struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1697,14 +1697,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
 wait_for_memory:
 		mptcp_set_nospace(sk);
-		mptcp_push_pending(sk, msg->msg_flags);
+		__mptcp_push_pending(sk, msg->msg_flags);
 		ret = sk_stream_wait_memory(sk, &timeo);
 		if (ret)
 			goto out;
 	}
 
 	if (copied)
-		mptcp_push_pending(sk, msg->msg_flags);
+		__mptcp_push_pending(sk, msg->msg_flags);
 
 out:
 	release_sock(sk);
@@ -2959,13 +2959,14 @@ static void mptcp_release_cb(struct sock *sk)
 {
 	unsigned long flags, nflags;
 
-	/* push_pending may touch wmem_reserved, do it before the later
-	 * cleanup
-	 */
-	if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags))
-		__mptcp_clean_una(sk);
-	if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags)) {
-		/* mptcp_push_pending() acquires the subflow socket lock
+	for (;;) {
+		flags = 0;
+		if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags))
+			flags |= MPTCP_PUSH_PENDING;
+		if (!flags)
+			break;
+
+		/* the following actions acquire the subflow socket lock
 		 *
 		 * 1) can't be invoked in atomic scope
 		 * 2) must avoid ABBA deadlock with msk socket spinlock: the RX
@@ -2974,13 +2975,21 @@ static void mptcp_release_cb(struct sock *sk)
 		 */
 
 		spin_unlock_bh(&sk->sk_lock.slock);
-		mptcp_push_pending(sk, 0);
+		if (flags & MPTCP_PUSH_PENDING)
+			__mptcp_push_pending(sk, 0);
+
+		cond_resched();
 		spin_lock_bh(&sk->sk_lock.slock);
 	}
+
+	if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags))
+		__mptcp_clean_una(sk);
 	if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags))
 		__mptcp_error_report(sk);
 
-	/* clear any wmem reservation and errors */
+	/* push_pending may touch wmem_reserved, ensure we do the cleanup
+	 * later
+	 */
 	__mptcp_update_wmem(sk);
 	__mptcp_update_rmem(sk);
 

From 417789df4a03bc820b082bcc503f0d4c5e4704b9 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 4 Mar 2021 13:32:15 -0800
Subject: [PATCH 119/164] mptcp: fix missing wakeup

__mptcp_clean_una() can free write memory and should wake-up
user-space processes when needed.

When such function is invoked by the MPTCP receive path, the wakeup
is not needed, as the TCP stack will later trigger subflow_write_space
which will do the wakeup as needed.

Other __mptcp_clean_una() call sites need an additional wakeup check
Let's bundle the relevant code in a new helper and use it.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/165
Fixes: 6e628cd3a8f7 ("mptcp: use mptcp release_cb for delayed tasks")
Fixes: 64b9cea7a0af ("mptcp: fix spurious retransmissions")
Tested-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index d2a2169e6d9e..76958570ae7f 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1061,6 +1061,12 @@ out:
 	}
 }
 
+static void __mptcp_clean_una_wakeup(struct sock *sk)
+{
+	__mptcp_clean_una(sk);
+	mptcp_write_space(sk);
+}
+
 static void mptcp_enter_memory_pressure(struct sock *sk)
 {
 	struct mptcp_subflow_context *subflow;
@@ -2270,7 +2276,7 @@ static void __mptcp_retrans(struct sock *sk)
 	struct sock *ssk;
 	int ret;
 
-	__mptcp_clean_una(sk);
+	__mptcp_clean_una_wakeup(sk);
 	dfrag = mptcp_rtx_head(sk);
 	if (!dfrag)
 		return;
@@ -2983,7 +2989,7 @@ static void mptcp_release_cb(struct sock *sk)
 	}
 
 	if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags))
-		__mptcp_clean_una(sk);
+		__mptcp_clean_una_wakeup(sk);
 	if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags))
 		__mptcp_error_report(sk);
 

From 9238e900d6ec2e9b9ca3d8a9731acfd587fc577a Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Thu, 4 Mar 2021 13:32:16 -0800
Subject: [PATCH 120/164] mptcp: free resources when the port number is
 mismatched

When the port number is mismatched with the announced ones, use
'goto dispose_child' to free the resources instead of using 'goto out'.

This patch also moves the port number checking code in
subflow_syn_recv_sock before mptcp_finish_join, otherwise subflow_drop_ctx
will fail in dispose_child.

Fixes: 5bc56388c74f ("mptcp: add port number check for MP_JOIN")
Reported-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/subflow.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 41695e26c374..3d47d670e665 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -687,11 +687,6 @@ create_child:
 			/* move the msk reference ownership to the subflow */
 			subflow_req->msk = NULL;
 			ctx->conn = (struct sock *)owner;
-			if (!mptcp_finish_join(child))
-				goto dispose_child;
-
-			SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
-			tcp_rsk(req)->drop_req = true;
 
 			if (subflow_use_different_sport(owner, sk)) {
 				pr_debug("ack inet_sport=%d %d",
@@ -699,10 +694,16 @@ create_child:
 					 ntohs(inet_sk((struct sock *)owner)->inet_sport));
 				if (!mptcp_pm_sport_in_anno_list(owner, sk)) {
 					SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTACKRX);
-					goto out;
+					goto dispose_child;
 				}
 				SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINPORTACKRX);
 			}
+
+			if (!mptcp_finish_join(child))
+				goto dispose_child;
+
+			SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
+			tcp_rsk(req)->drop_req = true;
 		}
 	}
 

From 863a42b289c22df63db62b10fc2c2ffc237e2125 Mon Sep 17 00:00:00 2001
From: Hillf Danton <hdanton@sina.com>
Date: Thu, 4 Mar 2021 10:30:09 -0800
Subject: [PATCH 121/164] netdevsim: init u64 stats for 32bit hardware

Init the u64 stats in order to avoid the lockdep prints on the 32bit
hardware like

 INFO: trying to register non-static key.
 the code is fine but needs lockdep annotation.
 turning off the locking correctness validator.
 CPU: 0 PID: 4695 Comm: syz-executor.0 Not tainted 5.11.0-rc5-syzkaller #0
 Hardware name: ARM-Versatile Express
 Backtrace:
 [<826fc5b8>] (dump_backtrace) from [<826fc82c>] (show_stack+0x18/0x1c arch/arm/kernel/traps.c:252)
 [<826fc814>] (show_stack) from [<8270d1f8>] (__dump_stack lib/dump_stack.c:79 [inline])
 [<826fc814>] (show_stack) from [<8270d1f8>] (dump_stack+0xa8/0xc8 lib/dump_stack.c:120)
 [<8270d150>] (dump_stack) from [<802bf9c0>] (assign_lock_key kernel/locking/lockdep.c:935 [inline])
 [<8270d150>] (dump_stack) from [<802bf9c0>] (register_lock_class+0xabc/0xb68 kernel/locking/lockdep.c:1247)
 [<802bef04>] (register_lock_class) from [<802baa2c>] (__lock_acquire+0x84/0x32d4 kernel/locking/lockdep.c:4711)
 [<802ba9a8>] (__lock_acquire) from [<802be840>] (lock_acquire.part.0+0xf0/0x554 kernel/locking/lockdep.c:5442)
 [<802be750>] (lock_acquire.part.0) from [<802bed10>] (lock_acquire+0x6c/0x74 kernel/locking/lockdep.c:5415)
 [<802beca4>] (lock_acquire) from [<81560548>] (seqcount_lockdep_reader_access include/linux/seqlock.h:103 [inline])
 [<802beca4>] (lock_acquire) from [<81560548>] (__u64_stats_fetch_begin include/linux/u64_stats_sync.h:164 [inline])
 [<802beca4>] (lock_acquire) from [<81560548>] (u64_stats_fetch_begin include/linux/u64_stats_sync.h:175 [inline])
 [<802beca4>] (lock_acquire) from [<81560548>] (nsim_get_stats64+0xdc/0xf0 drivers/net/netdevsim/netdev.c:70)
 [<8156046c>] (nsim_get_stats64) from [<81e2efa0>] (dev_get_stats+0x44/0xd0 net/core/dev.c:10405)
 [<81e2ef5c>] (dev_get_stats) from [<81e53204>] (rtnl_fill_stats+0x38/0x120 net/core/rtnetlink.c:1211)
 [<81e531cc>] (rtnl_fill_stats) from [<81e59d58>] (rtnl_fill_ifinfo+0x6d4/0x148c net/core/rtnetlink.c:1783)
 [<81e59684>] (rtnl_fill_ifinfo) from [<81e5ceb4>] (rtmsg_ifinfo_build_skb+0x9c/0x108 net/core/rtnetlink.c:3798)
 [<81e5ce18>] (rtmsg_ifinfo_build_skb) from [<81e5d0ac>] (rtmsg_ifinfo_event net/core/rtnetlink.c:3830 [inline])
 [<81e5ce18>] (rtmsg_ifinfo_build_skb) from [<81e5d0ac>] (rtmsg_ifinfo_event net/core/rtnetlink.c:3821 [inline])
 [<81e5ce18>] (rtmsg_ifinfo_build_skb) from [<81e5d0ac>] (rtmsg_ifinfo+0x44/0x70 net/core/rtnetlink.c:3839)
 [<81e5d068>] (rtmsg_ifinfo) from [<81e45c2c>] (register_netdevice+0x664/0x68c net/core/dev.c:10103)
 [<81e455c8>] (register_netdevice) from [<815608bc>] (nsim_create+0xf8/0x124 drivers/net/netdevsim/netdev.c:317)
 [<815607c4>] (nsim_create) from [<81561184>] (__nsim_dev_port_add+0x108/0x188 drivers/net/netdevsim/dev.c:941)
 [<8156107c>] (__nsim_dev_port_add) from [<815620d8>] (nsim_dev_port_add_all drivers/net/netdevsim/dev.c:990 [inline])
 [<8156107c>] (__nsim_dev_port_add) from [<815620d8>] (nsim_dev_probe+0x5cc/0x750 drivers/net/netdevsim/dev.c:1119)
 [<81561b0c>] (nsim_dev_probe) from [<815661dc>] (nsim_bus_probe+0x10/0x14 drivers/net/netdevsim/bus.c:287)
 [<815661cc>] (nsim_bus_probe) from [<811724c0>] (really_probe+0x100/0x50c drivers/base/dd.c:554)
 [<811723c0>] (really_probe) from [<811729c4>] (driver_probe_device+0xf8/0x1c8 drivers/base/dd.c:740)
 [<811728cc>] (driver_probe_device) from [<81172fe4>] (__device_attach_driver+0x8c/0xf0 drivers/base/dd.c:846)
 [<81172f58>] (__device_attach_driver) from [<8116fee0>] (bus_for_each_drv+0x88/0xd8 drivers/base/bus.c:431)
 [<8116fe58>] (bus_for_each_drv) from [<81172c6c>] (__device_attach+0xdc/0x1d0 drivers/base/dd.c:914)
 [<81172b90>] (__device_attach) from [<8117305c>] (device_initial_probe+0x14/0x18 drivers/base/dd.c:961)
 [<81173048>] (device_initial_probe) from [<81171358>] (bus_probe_device+0x90/0x98 drivers/base/bus.c:491)
 [<811712c8>] (bus_probe_device) from [<8116e77c>] (device_add+0x320/0x824 drivers/base/core.c:3109)
 [<8116e45c>] (device_add) from [<8116ec9c>] (device_register+0x1c/0x20 drivers/base/core.c:3182)
 [<8116ec80>] (device_register) from [<81566710>] (nsim_bus_dev_new drivers/net/netdevsim/bus.c:336 [inline])
 [<8116ec80>] (device_register) from [<81566710>] (new_device_store+0x178/0x208 drivers/net/netdevsim/bus.c:215)
 [<81566598>] (new_device_store) from [<8116fcb4>] (bus_attr_store+0x2c/0x38 drivers/base/bus.c:122)
 [<8116fc88>] (bus_attr_store) from [<805b4b8c>] (sysfs_kf_write+0x48/0x54 fs/sysfs/file.c:139)
 [<805b4b44>] (sysfs_kf_write) from [<805b3c90>] (kernfs_fop_write_iter+0x128/0x1ec fs/kernfs/file.c:296)
 [<805b3b68>] (kernfs_fop_write_iter) from [<804d22fc>] (call_write_iter include/linux/fs.h:1901 [inline])
 [<805b3b68>] (kernfs_fop_write_iter) from [<804d22fc>] (new_sync_write fs/read_write.c:518 [inline])
 [<805b3b68>] (kernfs_fop_write_iter) from [<804d22fc>] (vfs_write+0x3dc/0x57c fs/read_write.c:605)
 [<804d1f20>] (vfs_write) from [<804d2604>] (ksys_write+0x68/0xec fs/read_write.c:658)
 [<804d259c>] (ksys_write) from [<804d2698>] (__do_sys_write fs/read_write.c:670 [inline])
 [<804d259c>] (ksys_write) from [<804d2698>] (sys_write+0x10/0x14 fs/read_write.c:667)
 [<804d2688>] (sys_write) from [<80200060>] (ret_fast_syscall+0x0/0x2c arch/arm/mm/proc-v7.S:64)

Fixes: 83c9e13aa39a ("netdevsim: add software driver for testing offloads")
Reported-by: syzbot+e74a6857f2d0efe3ad81@syzkaller.appspotmail.com
Tested-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Hillf Danton <hdanton@sina.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/netdev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index aec92440eef1..659d3dceb687 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -294,6 +294,7 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
 	dev_net_set(dev, nsim_dev_net(nsim_dev));
 	ns = netdev_priv(dev);
 	ns->netdev = dev;
+	u64_stats_init(&ns->syncp);
 	ns->nsim_dev = nsim_dev;
 	ns->nsim_dev_port = nsim_dev_port;
 	ns->nsim_bus_dev = nsim_dev->nsim_bus_dev;

From 67eb211487f0c993d9f402d1c196ef159fd6a3b5 Mon Sep 17 00:00:00 2001
From: Jiri Wiesner <jwiesner@suse.com>
Date: Thu, 4 Mar 2021 17:18:28 +0100
Subject: [PATCH 122/164] ibmvnic: always store valid MAC address

The last change to ibmvnic_set_mac(), 8fc3672a8ad3, meant to prevent
users from setting an invalid MAC address on an ibmvnic interface
that has not been brought up yet. The change also prevented the
requested MAC address from being stored by the adapter object for an
ibmvnic interface when the state of the ibmvnic interface is
VNIC_PROBED - that is after probing has finished but before the
ibmvnic interface is brought up. The MAC address stored by the
adapter object is used and sent to the hypervisor for checking when
an ibmvnic interface is brought up.

The ibmvnic driver ignoring the requested MAC address when in
VNIC_PROBED state caused LACP bonds (bonds in 802.3ad mode) with more
than one slave to malfunction. The bonding code must be able to
change the MAC address of its slaves before they are brought up
during enslaving. The inability of kernels with 8fc3672a8ad3 to set
the MAC addresses of bonding slaves is observable in the output of
"ip address show". The MAC addresses of the slaves are the same as
the MAC address of the bond on a working system whereas the slaves
retain their original MAC addresses on a system with a malfunctioning
LACP bond.

Fixes: 8fc3672a8ad3 ("ibmvnic: fix ibmvnic_set_mac")
Signed-off-by: Jiri Wiesner <jwiesner@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 3bad762083c5..b6102ccf9b90 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1906,10 +1906,9 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p)
 	if (!is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
 
-	if (adapter->state != VNIC_PROBED) {
-		ether_addr_copy(adapter->mac_addr, addr->sa_data);
+	ether_addr_copy(adapter->mac_addr, addr->sa_data);
+	if (adapter->state != VNIC_PROBED)
 		rc = __ibmvnic_set_mac(netdev, addr->sa_data);
-	}
 
 	return rc;
 }

From ad5d07f4a9cd671233ae20983848874731102c08 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Thu, 4 Mar 2021 16:29:51 -0500
Subject: [PATCH 123/164] cipso,calipso: resolve a number of problems with the
 DOI refcounts

The current CIPSO and CALIPSO refcounting scheme for the DOI
definitions is a bit flawed in that we:

1. Don't correctly match gets/puts in netlbl_cipsov4_list().
2. Decrement the refcount on each attempt to remove the DOI from the
   DOI list, only removing it from the list once the refcount drops
   to zero.

This patch fixes these problems by adding the missing "puts" to
netlbl_cipsov4_list() and introduces a more conventional, i.e.
not-buggy, refcounting mechanism to the DOI definitions.  Upon the
addition of a DOI to the DOI list, it is initialized with a refcount
of one, removing a DOI from the list removes it from the list and
drops the refcount by one; "gets" and "puts" behave as expected with
respect to refcounts, increasing and decreasing the DOI's refcount by
one.

Fixes: b1edeb102397 ("netlabel: Replace protocol/NetLabel linking with refrerence counts")
Fixes: d7cce01504a0 ("netlabel: Add support for removing a CALIPSO DOI.")
Reported-by: syzbot+9ec037722d2603a9f52e@syzkaller.appspotmail.com
Signed-off-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/cipso_ipv4.c            | 11 +----------
 net/ipv6/calipso.c               | 14 +++++---------
 net/netlabel/netlabel_cipso_v4.c |  3 +++
 3 files changed, 9 insertions(+), 19 deletions(-)

diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 471d33a0d095..be09c7669a79 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -519,16 +519,10 @@ int cipso_v4_doi_remove(u32 doi, struct netlbl_audit *audit_info)
 		ret_val = -ENOENT;
 		goto doi_remove_return;
 	}
-	if (!refcount_dec_and_test(&doi_def->refcount)) {
-		spin_unlock(&cipso_v4_doi_list_lock);
-		ret_val = -EBUSY;
-		goto doi_remove_return;
-	}
 	list_del_rcu(&doi_def->list);
 	spin_unlock(&cipso_v4_doi_list_lock);
 
-	cipso_v4_cache_invalidate();
-	call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu);
+	cipso_v4_doi_putdef(doi_def);
 	ret_val = 0;
 
 doi_remove_return:
@@ -585,9 +579,6 @@ void cipso_v4_doi_putdef(struct cipso_v4_doi *doi_def)
 
 	if (!refcount_dec_and_test(&doi_def->refcount))
 		return;
-	spin_lock(&cipso_v4_doi_list_lock);
-	list_del_rcu(&doi_def->list);
-	spin_unlock(&cipso_v4_doi_list_lock);
 
 	cipso_v4_cache_invalidate();
 	call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu);
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index 51184a70ac7e..1578ed9e97d8 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -83,6 +83,9 @@ struct calipso_map_cache_entry {
 
 static struct calipso_map_cache_bkt *calipso_cache;
 
+static void calipso_cache_invalidate(void);
+static void calipso_doi_putdef(struct calipso_doi *doi_def);
+
 /* Label Mapping Cache Functions
  */
 
@@ -444,15 +447,10 @@ static int calipso_doi_remove(u32 doi, struct netlbl_audit *audit_info)
 		ret_val = -ENOENT;
 		goto doi_remove_return;
 	}
-	if (!refcount_dec_and_test(&doi_def->refcount)) {
-		spin_unlock(&calipso_doi_list_lock);
-		ret_val = -EBUSY;
-		goto doi_remove_return;
-	}
 	list_del_rcu(&doi_def->list);
 	spin_unlock(&calipso_doi_list_lock);
 
-	call_rcu(&doi_def->rcu, calipso_doi_free_rcu);
+	calipso_doi_putdef(doi_def);
 	ret_val = 0;
 
 doi_remove_return:
@@ -508,10 +506,8 @@ static void calipso_doi_putdef(struct calipso_doi *doi_def)
 
 	if (!refcount_dec_and_test(&doi_def->refcount))
 		return;
-	spin_lock(&calipso_doi_list_lock);
-	list_del_rcu(&doi_def->list);
-	spin_unlock(&calipso_doi_list_lock);
 
+	calipso_cache_invalidate();
 	call_rcu(&doi_def->rcu, calipso_doi_free_rcu);
 }
 
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 726dda95934c..4f50a64315cf 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -575,6 +575,7 @@ list_start:
 
 		break;
 	}
+	cipso_v4_doi_putdef(doi_def);
 	rcu_read_unlock();
 
 	genlmsg_end(ans_skb, data);
@@ -583,12 +584,14 @@ list_start:
 list_retry:
 	/* XXX - this limit is a guesstimate */
 	if (nlsze_mult < 4) {
+		cipso_v4_doi_putdef(doi_def);
 		rcu_read_unlock();
 		kfree_skb(ans_skb);
 		nlsze_mult *= 2;
 		goto list_start;
 	}
 list_failure_lock:
+	cipso_v4_doi_putdef(doi_def);
 	rcu_read_unlock();
 list_failure:
 	kfree_skb(ans_skb);

From 39491867ace594b4912c35f576864d204beed2b3 Mon Sep 17 00:00:00 2001
From: Brendan Jackman <jackmanb@google.com>
Date: Thu, 4 Mar 2021 18:56:46 -0800
Subject: [PATCH 124/164] bpf: Explicitly zero-extend R0 after 32-bit cmpxchg

As pointed out by Ilya and explained in the new comment, there's a
discrepancy between x86 and BPF CMPXCHG semantics: BPF always loads
the value from memory into r0, while x86 only does so when r0 and the
value in memory are different. The same issue affects s390.

At first this might sound like pure semantics, but it makes a real
difference when the comparison is 32-bit, since the load will
zero-extend r0/rax.

The fix is to explicitly zero-extend rax after doing such a
CMPXCHG. Since this problem affects multiple archs, this is done in
the verifier by patching in a BPF_ZEXT_REG instruction after every
32-bit cmpxchg. Any archs that don't need such manual zero-extension
can do a look-ahead with insn_is_zext to skip the unnecessary mov.

Note this still goes on top of Ilya's patch:

https://lore.kernel.org/bpf/20210301154019.129110-1-iii@linux.ibm.com/T/#u

Differences v5->v6[1]:
 - Moved is_cmpxchg_insn and ensured it can be safely re-used. Also renamed it
   and removed 'inline' to match the style of the is_*_function helpers.
 - Fixed up comments in verifier test (thanks for the careful review, Martin!)

Differences v4->v5[1]:
 - Moved the logic entirely into opt_subreg_zext_lo32_rnd_hi32, thanks to Martin
   for suggesting this.

Differences v3->v4[1]:
 - Moved the optimization against pointless zext into the correct place:
   opt_subreg_zext_lo32_rnd_hi32 is called _after_ fixup_bpf_calls.

Differences v2->v3[1]:
 - Moved patching into fixup_bpf_calls (patch incoming to rename this function)
 - Added extra commentary on bpf_jit_needs_zext
 - Added check to avoid adding a pointless zext(r0) if there's already one there.

Difference v1->v2[1]: Now solved centrally in the verifier instead of
  specifically for the x86 JIT. Thanks to Ilya and Daniel for the suggestions!

[1] v5: https://lore.kernel.org/bpf/CA+i-1C3ytZz6FjcPmUg5s4L51pMQDxWcZNvM86w4RHZ_o2khwg@mail.gmail.com/T/#t
    v4: https://lore.kernel.org/bpf/CA+i-1C3ytZz6FjcPmUg5s4L51pMQDxWcZNvM86w4RHZ_o2khwg@mail.gmail.com/T/#t
    v3: https://lore.kernel.org/bpf/08669818-c99d-0d30-e1db-53160c063611@iogearbox.net/T/#t
    v2: https://lore.kernel.org/bpf/08669818-c99d-0d30-e1db-53160c063611@iogearbox.net/T/#t
    v1: https://lore.kernel.org/bpf/d7ebaefb-bfd6-a441-3ff2-2fdfe699b1d2@iogearbox.net/T/#t

Reported-by: Ilya Leoshkevich <iii@linux.ibm.com>
Fixes: 5ffa25502b5a ("bpf: Add instructions for atomic_[cmp]xchg")
Signed-off-by: Brendan Jackman <jackmanb@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/core.c                             |  4 +++
 kernel/bpf/verifier.c                         | 19 +++++++++++++-
 .../selftests/bpf/verifier/atomic_cmpxchg.c   | 25 +++++++++++++++++++
 .../selftests/bpf/verifier/atomic_or.c        | 25 +++++++++++++++++++
 4 files changed, 72 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index aa1e64196d8d..3a283bf97f2f 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2344,6 +2344,10 @@ bool __weak bpf_helper_changes_pkt_data(void *func)
 /* Return TRUE if the JIT backend wants verifier to enable sub-register usage
  * analysis code and wants explicit zero extension inserted by verifier.
  * Otherwise, return FALSE.
+ *
+ * The verifier inserts an explicit zero extension after BPF_CMPXCHGs even if
+ * you don't override this. JITs that don't want these extra insns can detect
+ * them using insn_is_zext.
  */
 bool __weak bpf_jit_needs_zext(void)
 {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index bb3eaab934f3..c56e3fcb5f1a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -504,6 +504,13 @@ static bool is_ptr_cast_function(enum bpf_func_id func_id)
 		func_id == BPF_FUNC_skc_to_tcp_request_sock;
 }
 
+static bool is_cmpxchg_insn(const struct bpf_insn *insn)
+{
+	return BPF_CLASS(insn->code) == BPF_STX &&
+	       BPF_MODE(insn->code) == BPF_ATOMIC &&
+	       insn->imm == BPF_CMPXCHG;
+}
+
 /* string representation of 'enum bpf_reg_type' */
 static const char * const reg_type_str[] = {
 	[NOT_INIT]		= "?",
@@ -11067,7 +11074,17 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
 			goto apply_patch_buffer;
 		}
 
-		if (!bpf_jit_needs_zext())
+		/* Add in an zero-extend instruction if a) the JIT has requested
+		 * it or b) it's a CMPXCHG.
+		 *
+		 * The latter is because: BPF_CMPXCHG always loads a value into
+		 * R0, therefore always zero-extends. However some archs'
+		 * equivalent instruction only does this load when the
+		 * comparison is successful. This detail of CMPXCHG is
+		 * orthogonal to the general zero-extension behaviour of the
+		 * CPU, so it's treated independently of bpf_jit_needs_zext.
+		 */
+		if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
 			continue;
 
 		if (WARN_ON(load_reg == -1)) {
diff --git a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
index 2efd8bcf57a1..6e52dfc64415 100644
--- a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
+++ b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
@@ -94,3 +94,28 @@
 	.result = REJECT,
 	.errstr = "invalid read from stack",
 },
+{
+	"BPF_W cmpxchg should zero top 32 bits",
+	.insns = {
+		/* r0 = U64_MAX; */
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 1),
+		/* u64 val = r0; */
+		BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+		/* r0 = (u32)atomic_cmpxchg((u32 *)&val, r0, 1); */
+		BPF_MOV32_IMM(BPF_REG_1, 1),
+		BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -8),
+		/* r1 = 0x00000000FFFFFFFFull; */
+		BPF_MOV64_IMM(BPF_REG_1, 1),
+		BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32),
+		BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
+		/* if (r0 != r1) exit(1); */
+		BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_1, 2),
+		BPF_MOV32_IMM(BPF_REG_0, 1),
+		BPF_EXIT_INSN(),
+		/* exit(0); */
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/atomic_or.c b/tools/testing/selftests/bpf/verifier/atomic_or.c
index 70f982e1f9f0..9d0716ac5080 100644
--- a/tools/testing/selftests/bpf/verifier/atomic_or.c
+++ b/tools/testing/selftests/bpf/verifier/atomic_or.c
@@ -75,3 +75,28 @@
 	},
 	.result = ACCEPT,
 },
+{
+	"BPF_W atomic_fetch_or should zero top 32 bits",
+	.insns = {
+		/* r1 = U64_MAX; */
+		BPF_MOV64_IMM(BPF_REG_1, 0),
+		BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
+		/* u64 val = r1; */
+		BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+		/* r1 = (u32)atomic_fetch_or((u32 *)&val, 2); */
+		BPF_MOV32_IMM(BPF_REG_1, 2),
+		BPF_ATOMIC_OP(BPF_W, BPF_OR | BPF_FETCH, BPF_REG_10, BPF_REG_1, -8),
+		/* r2 = 0x00000000FFFFFFFF; */
+		BPF_MOV64_IMM(BPF_REG_2, 1),
+		BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 32),
+		BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 1),
+		/* if (r2 != r1) exit(1); */
+		BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_1, 2),
+		BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+		BPF_EXIT_INSN(),
+		/* exit(0); */
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+},

From 00ff801bb8ce6711e919af4530b6ffa14a22390a Mon Sep 17 00:00:00 2001
From: "Kevin(Yudong) Yang" <yyd@google.com>
Date: Wed, 3 Mar 2021 09:43:54 -0500
Subject: [PATCH 125/164] net/mlx4_en: update moderation when config reset

This patch fixes a bug that the moderation config will not be
applied when calling mlx4_en_reset_config. For example, when
turning on rx timestamping, mlx4_en_reset_config() will be called,
causing the NIC to forget previous moderation config.

This fix is in phase with a previous fix:
commit 79c54b6bbf06 ("net/mlx4_en: Fix TX moderation info loss
after set_ringparam is called")

Tested: Before this patch, on a host with NIC using mlx4, run
netserver and stream TCP to the host at full utilization.
$ sar -I SUM 1
                 INTR    intr/s
14:03:56          sum  48758.00

After rx hwtstamp is enabled:
$ sar -I SUM 1
14:10:38          sum 317771.00
We see the moderation is not working properly and issued 7x more
interrupts.

After the patch, and turned on rx hwtstamp, the rate of interrupts
is as expected:
$ sar -I SUM 1
14:52:11          sum  49332.00

Fixes: 79c54b6bbf06 ("net/mlx4_en: Fix TX moderation info loss after set_ringparam is called")
Signed-off-by: Kevin(Yudong) Yang <yyd@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Neal Cardwell <ncardwell@google.com>
CC: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 2 +-
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c  | 2 ++
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h    | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index 23849f2b9c25..1434df66fcf2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -47,7 +47,7 @@
 #define EN_ETHTOOL_SHORT_MASK cpu_to_be16(0xffff)
 #define EN_ETHTOOL_WORD_MASK  cpu_to_be32(0xffffffff)
 
-static int mlx4_en_moderation_update(struct mlx4_en_priv *priv)
+int mlx4_en_moderation_update(struct mlx4_en_priv *priv)
 {
 	int i, t;
 	int err = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 51b9700fce83..5d0c9c62382d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -3554,6 +3554,8 @@ int mlx4_en_reset_config(struct net_device *dev,
 			en_err(priv, "Failed starting port\n");
 	}
 
+	if (!err)
+		err = mlx4_en_moderation_update(priv);
 out:
 	mutex_unlock(&mdev->state_lock);
 	kfree(tmp);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index e8ed23190de0..f3d1a20201ef 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -775,6 +775,7 @@ void mlx4_en_ptp_overflow_check(struct mlx4_en_dev *mdev);
 #define DEV_FEATURE_CHANGED(dev, new_features, feature) \
 	((dev->features & feature) ^ (new_features & feature))
 
+int mlx4_en_moderation_update(struct mlx4_en_priv *priv);
 int mlx4_en_reset_config(struct net_device *dev,
 			 struct hwtstamp_config ts_config,
 			 netdev_features_t new_features);

From 38c26ff3048af50eee3fcd591921357ee5bfd9ee Mon Sep 17 00:00:00 2001
From: Jia-Ju Bai <baijiaju1990@gmail.com>
Date: Thu, 4 Mar 2021 18:06:48 -0800
Subject: [PATCH 126/164] net: tehuti: fix error return code in bdx_probe()

When bdx_read_mac() fails, no error return code of bdx_probe()
is assigned.
To fix this bug, err is assigned with -EFAULT as error return code.

Reported-by: TOTE Robot <oslab@tsinghua.edu.cn>
Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/tehuti/tehuti.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c
index b8f4f419173f..d054c6e83b1c 100644
--- a/drivers/net/ethernet/tehuti/tehuti.c
+++ b/drivers/net/ethernet/tehuti/tehuti.c
@@ -2044,6 +2044,7 @@ bdx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		/*bdx_hw_reset(priv); */
 		if (bdx_read_mac(priv)) {
 			pr_err("load MAC address failed\n");
+			err = -EFAULT;
 			goto err_out_iomap;
 		}
 		SET_NETDEV_DEV(ndev, &pdev->dev);

From 6650d31f21b8a0043613ae0a4a2e42e49dc20b2d Mon Sep 17 00:00:00 2001
From: Jia-Ju Bai <baijiaju1990@gmail.com>
Date: Thu, 4 Mar 2021 19:10:10 -0800
Subject: [PATCH 127/164] net: intel: iavf: fix error return code of
 iavf_init_get_resources()

When iavf_process_config() fails, no error return code of
iavf_init_get_resources() is assigned.
To fix this bug, err is assigned with the return value of
iavf_process_config(), and then err is checked.

Reported-by: TOTE Robot <oslab@tsinghua.edu.cn>
Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/iavf/iavf_main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 0a867d64d467..dc5b3c06d1e0 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -1776,7 +1776,8 @@ static int iavf_init_get_resources(struct iavf_adapter *adapter)
 		goto err_alloc;
 	}
 
-	if (iavf_process_config(adapter))
+	err = iavf_process_config(adapter);
+	if (err)
 		goto err_alloc;
 	adapter->current_op = VIRTCHNL_OP_UNKNOWN;
 

From 9a7b3950c7e15968e23d83be215e95ccc7c92a53 Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Fri, 5 Mar 2021 13:49:30 +0800
Subject: [PATCH 128/164] net: stmmac: Fix VLAN filter delete timeout issue in
 Intel mGBE SGMII

For Intel mGbE controller, MAC VLAN filter delete operation will time-out
if serdes power-down sequence happened first during driver remove() with
below message.

[82294.764958] intel-eth-pci 0000:00:1e.4 eth2: stmmac_dvr_remove: removing driver
[82294.778677] intel-eth-pci 0000:00:1e.4 eth2: Timeout accessing MAC_VLAN_Tag_Filter
[82294.779997] intel-eth-pci 0000:00:1e.4 eth2: failed to kill vid 0081/0
[82294.947053] intel-eth-pci 0000:00:1d.2 eth1: stmmac_dvr_remove: removing driver
[82295.002091] intel-eth-pci 0000:00:1d.1 eth0: stmmac_dvr_remove: removing driver

Therefore, we delay the serdes power-down to be after unregister_netdev()
which triggers the VLAN filter delete.

Fixes: b9663b7ca6ff ("net: stmmac: Enable SERDES power up/down sequence")
Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 0eba44e9c1f8..208cae344ffa 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -5249,13 +5249,16 @@ int stmmac_dvr_remove(struct device *dev)
 	netdev_info(priv->dev, "%s: removing driver", __func__);
 
 	stmmac_stop_all_dma(priv);
-
-	if (priv->plat->serdes_powerdown)
-		priv->plat->serdes_powerdown(ndev, priv->plat->bsp_priv);
-
 	stmmac_mac_set(priv, priv->ioaddr, false);
 	netif_carrier_off(ndev);
 	unregister_netdev(ndev);
+
+	/* Serdes power down needs to happen after VLAN filter
+	 * is deleted that is triggered by unregister_netdev().
+	 */
+	if (priv->plat->serdes_powerdown)
+		priv->plat->serdes_powerdown(ndev, priv->plat->bsp_priv);
+
 #ifdef CONFIG_DEBUG_FS
 	stmmac_exit_fs(ndev);
 #endif

From 8eb37ab7cc045ec6305a6a1a9c32374695a1a977 Mon Sep 17 00:00:00 2001
From: Wong Vee Khee <vee.khee.wong@intel.com>
Date: Fri, 5 Mar 2021 14:03:42 +0800
Subject: [PATCH 129/164] stmmac: intel: Fixes clock registration error seen
 for multiple interfaces

Issue seen when enumerating multiple Intel mGbE interfaces in EHL.

[    6.898141] intel-eth-pci 0000:00:1d.2: enabling device (0000 -> 0002)
[    6.900971] intel-eth-pci 0000:00:1d.2: Fail to register stmmac-clk
[    6.906434] intel-eth-pci 0000:00:1d.2: User ID: 0x51, Synopsys ID: 0x52

We fix it by making the clock name to be unique following the format
of stmmac-pci_name(pci_dev) so that we can differentiate the clock for
these Intel mGbE interfaces in EHL platform as follow:

  /sys/kernel/debug/clk/stmmac-0000:00:1d.1
  /sys/kernel/debug/clk/stmmac-0000:00:1d.2
  /sys/kernel/debug/clk/stmmac-0000:00:1e.4

Fixes: 58da0cfa6cf1 ("net: stmmac: create dwmac-intel.c to contain all Intel platform")
Signed-off-by: Wong Vee Khee <vee.khee.wong@intel.com>
Signed-off-by: Voon Weifeng <weifeng.voon@intel.com>
Co-developed-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index f2896872a86c..0b64f7710d17 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -233,6 +233,7 @@ static void common_default_data(struct plat_stmmacenet_data *plat)
 static int intel_mgbe_common_data(struct pci_dev *pdev,
 				  struct plat_stmmacenet_data *plat)
 {
+	char clk_name[20];
 	int ret;
 	int i;
 
@@ -301,8 +302,10 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
 	plat->eee_usecs_rate = plat->clk_ptp_rate;
 
 	/* Set system clock */
+	sprintf(clk_name, "%s-%s", "stmmac", pci_name(pdev));
+
 	plat->stmmac_clk = clk_register_fixed_rate(&pdev->dev,
-						   "stmmac-clk", NULL, 0,
+						   clk_name, NULL, 0,
 						   plat->clk_ptp_rate);
 
 	if (IS_ERR(plat->stmmac_clk)) {

From e233febda6ebab750e30662a7cc9b9efad127685 Mon Sep 17 00:00:00 2001
From: Sergey Nazarov <s-nazarov@yandex.ru>
Date: Fri, 5 Mar 2021 11:05:54 +0300
Subject: [PATCH 130/164] CIPSO: Fix unaligned memory access in
 cipso_v4_gentag_hdr

We need to use put_unaligned when writing 32-bit DOI value
in cipso_v4_gentag_hdr to avoid unaligned memory access.

v2: unneeded type cast removed as Ondrej Mosnacek suggested.

Signed-off-by: Sergey Nazarov <s-nazarov@yandex.ru>
Acked-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/cipso_ipv4.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index be09c7669a79..bfaf327e9d12 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1153,7 +1153,7 @@ static void cipso_v4_gentag_hdr(const struct cipso_v4_doi *doi_def,
 {
 	buf[0] = IPOPT_CIPSO;
 	buf[1] = CIPSO_V4_HDR_LEN + len;
-	*(__be32 *)&buf[2] = htonl(doi_def->doi);
+	put_unaligned_be32(doi_def->doi, &buf[2]);
 }
 
 /**

From 69cdb7947adb816fc9325b4ec02a6dddd5070b82 Mon Sep 17 00:00:00 2001
From: Junlin Yang <yangjunlin@yulong.com>
Date: Fri, 5 Mar 2021 16:48:39 +0800
Subject: [PATCH 131/164] ibmvnic: remove excessive irqsave

ibmvnic_remove locks multiple spinlocks while disabling interrupts:
spin_lock_irqsave(&adapter->state_lock, flags);
spin_lock_irqsave(&adapter->rwi_lock, flags);

As reported by coccinelle, the second _irqsave() overwrites the value
saved in 'flags' by the first _irqsave(),   therefore when the second
_irqrestore() comes,the value in 'flags' is not valid,the value saved
by the first _irqsave() has been lost.
This likely leads to IRQs remaining disabled. So remove the second
_irqsave():
spin_lock_irqsave(&adapter->state_lock, flags);
spin_lock(&adapter->rwi_lock);

Generated by: ./scripts/coccinelle/locks/flags.cocci
./drivers/net/ethernet/ibm/ibmvnic.c:5413:1-18:
ERROR: nested lock+irqsave that reuses flags from line 5404.

Fixes: 4a41c421f367 ("ibmvnic: serialize access to work queue on remove")
Signed-off-by: Junlin Yang <yangjunlin@yulong.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index b6102ccf9b90..161fa95e8768 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -5408,9 +5408,9 @@ static int ibmvnic_remove(struct vio_dev *dev)
 	 * after setting state, so __ibmvnic_reset() which is called
 	 * from the flush_work() below, can make progress.
 	 */
-	spin_lock_irqsave(&adapter->rwi_lock, flags);
+	spin_lock(&adapter->rwi_lock);
 	adapter->state = VNIC_REMOVING;
-	spin_unlock_irqrestore(&adapter->rwi_lock, flags);
+	spin_unlock(&adapter->rwi_lock);
 
 	spin_unlock_irqrestore(&adapter->state_lock, flags);
 

From 0a7e0c3b5702a6a76cf7e5b8cc10a73e51dc221e Mon Sep 17 00:00:00 2001
From: Xu Wang <vulab@iscas.ac.cn>
Date: Fri, 5 Mar 2021 09:33:06 +0000
Subject: [PATCH 132/164] selftest/net/ipsec.c: Remove unneeded semicolon

fix semicolon.cocci warning:
tools/testing/selftests/net/ipsec.c:1788:2-3: Unneeded semicolon

Signed-off-by: Xu Wang <vulab@iscas.ac.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/ipsec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c
index 17ced7d6ce25..f23438d512c5 100644
--- a/tools/testing/selftests/net/ipsec.c
+++ b/tools/testing/selftests/net/ipsec.c
@@ -1785,7 +1785,7 @@ static void grand_child_serv(unsigned int nr, int cmd_fd, void *buf,
 		break;
 	default:
 		printk("got unknown msg type %d", msg->type);
-	};
+	}
 }
 
 static int grand_child_f(unsigned int nr, int cmd_fd, void *buf)

From abbf9a0ef8848dca58c5b97750c1c59bbee45637 Mon Sep 17 00:00:00 2001
From: Hayes Wang <hayeswang@realtek.com>
Date: Fri, 5 Mar 2021 17:34:41 +0800
Subject: [PATCH 133/164] r8169: fix r8168fp_adjust_ocp_cmd function

The (0xBAF70000 & 0x00FFF000) << 6 should be (0xf70 << 18).

Fixes: 561535b0f239 ("r8169: fix OCP access on RTL8117")
Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Acked-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index f704da3f214c..7aad0ba53372 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -767,7 +767,7 @@ static void r8168fp_adjust_ocp_cmd(struct rtl8169_private *tp, u32 *cmd, int typ
 	if (type == ERIAR_OOB &&
 	    (tp->mac_version == RTL_GIGA_MAC_VER_52 ||
 	     tp->mac_version == RTL_GIGA_MAC_VER_53))
-		*cmd |= 0x7f0 << 18;
+		*cmd |= 0xf70 << 18;
 }
 
 DECLARE_RTL_COND(rtl_eriar_cond)

From 85554bcd123e307282631defe6bf6fac5031cf60 Mon Sep 17 00:00:00 2001
From: Denis Efremov <efremov@linux.com>
Date: Fri, 5 Mar 2021 19:26:22 +0300
Subject: [PATCH 134/164] net/hamradio/6pack: remove redundant check in
 sp_encaps()

"len > sp->mtu" checked twice in a row in sp_encaps().
Remove the second check.

Signed-off-by: Denis Efremov <efremov@linux.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hamradio/6pack.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 71d6629e65c9..9f5b5614a150 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -171,11 +171,6 @@ static void sp_encaps(struct sixpack *sp, unsigned char *icp, int len)
 		goto out_drop;
 	}
 
-	if (len > sp->mtu) {	/* sp->mtu = AX25_MTU = max. PACLEN = 256 */
-		msg = "oversized transmit packet!";
-		goto out_drop;
-	}
-
 	if (p[0] > 5) {
 		msg = "invalid KISS command";
 		goto out_drop;

From 155b23e6e53475ca3b8c2a946299b4d4dd6a5a1e Mon Sep 17 00:00:00 2001
From: Denis Efremov <efremov@linux.com>
Date: Fri, 5 Mar 2021 20:02:12 +0300
Subject: [PATCH 135/164] sun/niu: fix wrong RXMAC_BC_FRM_CNT_COUNT count

RXMAC_BC_FRM_CNT_COUNT added to mp->rx_bcasts twice in a row
in niu_xmac_interrupt(). Remove the second addition.

Signed-off-by: Denis Efremov <efremov@linux.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sun/niu.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index 68695d4afacd..707ccdd03b19 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -3931,8 +3931,6 @@ static void niu_xmac_interrupt(struct niu *np)
 		mp->rx_mcasts += RXMAC_MC_FRM_CNT_COUNT;
 	if (val & XRXMAC_STATUS_RXBCAST_CNT_EXP)
 		mp->rx_bcasts += RXMAC_BC_FRM_CNT_COUNT;
-	if (val & XRXMAC_STATUS_RXBCAST_CNT_EXP)
-		mp->rx_bcasts += RXMAC_BC_FRM_CNT_COUNT;
 	if (val & XRXMAC_STATUS_RXHIST1_CNT_EXP)
 		mp->rx_hist_cnt1 += RXMAC_HIST_CNT1_COUNT;
 	if (val & XRXMAC_STATUS_RXHIST2_CNT_EXP)

From d8861bab48b6c1fc3cdbcab8ff9d1eaea43afe7f Mon Sep 17 00:00:00 2001
From: Michael Braun <michael-dev@fami-braun.de>
Date: Thu, 4 Mar 2021 20:52:52 +0100
Subject: [PATCH 136/164] gianfar: fix jumbo packets+napi+rx overrun crash

When using jumbo packets and overrunning rx queue with napi enabled,
the following sequence is observed in gfar_add_rx_frag:

   | lstatus                              |       | skb                   |
t  | lstatus,  size, flags                | first | len, data_len, *ptr   |
---+--------------------------------------+-------+-----------------------+
13 | 18002348, 9032, INTERRUPT LAST       | 0     | 9600, 8000,  f554c12e |
12 | 10000640, 1600, INTERRUPT            | 0     | 8000, 6400,  f554c12e |
11 | 10000640, 1600, INTERRUPT            | 0     | 6400, 4800,  f554c12e |
10 | 10000640, 1600, INTERRUPT            | 0     | 4800, 3200,  f554c12e |
09 | 10000640, 1600, INTERRUPT            | 0     | 3200, 1600,  f554c12e |
08 | 14000640, 1600, INTERRUPT FIRST      | 0     | 1600, 0,     f554c12e |
07 | 14000640, 1600, INTERRUPT FIRST      | 1     | 0,    0,     f554c12e |
06 | 1c000080, 128,  INTERRUPT LAST FIRST | 1     | 0,    0,     abf3bd6e |
05 | 18002348, 9032, INTERRUPT LAST       | 0     | 8000, 6400,  c5a57780 |
04 | 10000640, 1600, INTERRUPT            | 0     | 6400, 4800,  c5a57780 |
03 | 10000640, 1600, INTERRUPT            | 0     | 4800, 3200,  c5a57780 |
02 | 10000640, 1600, INTERRUPT            | 0     | 3200, 1600,  c5a57780 |
01 | 10000640, 1600, INTERRUPT            | 0     | 1600, 0,     c5a57780 |
00 | 14000640, 1600, INTERRUPT FIRST      | 1     | 0,    0,     c5a57780 |

So at t=7 a new packets is started but not finished, probably due to rx
overrun - but rx overrun is not indicated in the flags. Instead a new
packets starts at t=8. This results in skb->len to exceed size for the LAST
fragment at t=13 and thus a negative fragment size added to the skb.

This then crashes:

kernel BUG at include/linux/skbuff.h:2277!
Oops: Exception in kernel mode, sig: 5 [#1]
...
NIP [c04689f4] skb_pull+0x2c/0x48
LR [c03f62ac] gfar_clean_rx_ring+0x2e4/0x844
Call Trace:
[ec4bfd38] [c06a84c4] _raw_spin_unlock_irqrestore+0x60/0x7c (unreliable)
[ec4bfda8] [c03f6a44] gfar_poll_rx_sq+0x48/0xe4
[ec4bfdc8] [c048d504] __napi_poll+0x54/0x26c
[ec4bfdf8] [c048d908] net_rx_action+0x138/0x2c0
[ec4bfe68] [c06a8f34] __do_softirq+0x3a4/0x4fc
[ec4bfed8] [c0040150] run_ksoftirqd+0x58/0x70
[ec4bfee8] [c0066ecc] smpboot_thread_fn+0x184/0x1cc
[ec4bff08] [c0062718] kthread+0x140/0x144
[ec4bff38] [c0012350] ret_from_kernel_thread+0x14/0x1c

This patch fixes this by checking for computed LAST fragment size, so a
negative sized fragment is never added.
In order to prevent the newer rx frame from getting corrupted, the FIRST
flag is checked to discard the incomplete older frame.

Signed-off-by: Michael Braun <michael-dev@fami-braun.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/gianfar.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 541de32ea662..1cf8ef717453 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -2390,6 +2390,10 @@ static bool gfar_add_rx_frag(struct gfar_rx_buff *rxb, u32 lstatus,
 		if (lstatus & BD_LFLAG(RXBD_LAST))
 			size -= skb->len;
 
+		WARN(size < 0, "gianfar: rx fragment size underflow");
+		if (size < 0)
+			return false;
+
 		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
 				rxb->page_offset + RXBUF_ALIGNMENT,
 				size, GFAR_RXB_TRUESIZE);
@@ -2552,6 +2556,17 @@ static int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue,
 		if (lstatus & BD_LFLAG(RXBD_EMPTY))
 			break;
 
+		/* lost RXBD_LAST descriptor due to overrun */
+		if (skb &&
+		    (lstatus & BD_LFLAG(RXBD_FIRST))) {
+			/* discard faulty buffer */
+			dev_kfree_skb(skb);
+			skb = NULL;
+			rx_queue->stats.rx_dropped++;
+
+			/* can continue normally */
+		}
+
 		/* order rx buffer descriptor reads */
 		rmb();
 

From 3e21a10fdea3c2e4e4d1b72cb9d720256461af40 Mon Sep 17 00:00:00 2001
From: George McCollister <george.mccollister@gmail.com>
Date: Fri, 5 Mar 2021 16:24:45 -0600
Subject: [PATCH 137/164] lan743x: trim all 4 bytes of the FCS; not just 2

Trim all 4 bytes of the received FCS; not just 2 of them. Leaving 2
bytes of the FCS on the frame breaks DSA tailing tag drivers.

Fixes: a8db76d40e4d ("lan743x: boost performance on cpu archs w/o dma cache snooping")
Signed-off-by: George McCollister <george.mccollister@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/lan743x_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index dbdfabff3b00..1c3e204d727c 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -2040,7 +2040,7 @@ lan743x_rx_trim_skb(struct sk_buff *skb, int frame_length)
 		dev_kfree_skb_irq(skb);
 		return NULL;
 	}
-	frame_length = max_t(int, 0, frame_length - RX_HEAD_PADDING - 2);
+	frame_length = max_t(int, 0, frame_length - RX_HEAD_PADDING - 4);
 	if (skb->len > frame_length) {
 		skb->tail -= skb->len - frame_length;
 		skb->len = frame_length;

From a4dcfbc4ee2218abd567d81d795082d8d4afcdf6 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 5 Mar 2021 14:17:29 -0800
Subject: [PATCH 138/164] ethernet: alx: fix order of calls on resume

netif_device_attach() will unpause the queues so we can't call
it before __alx_open(). This went undetected until
commit b0999223f224 ("alx: add ability to allocate and free
alx_napi structures") but now if stack tries to xmit immediately
on resume before __alx_open() we'll crash on the NAPI being null:

 BUG: kernel NULL pointer dereference, address: 0000000000000198
 CPU: 0 PID: 12 Comm: ksoftirqd/0 Tainted: G           OE 5.10.0-3-amd64 #1 Debian 5.10.13-1
 Hardware name: Gigabyte Technology Co., Ltd. To be filled by O.E.M./H77-D3H, BIOS F15 11/14/2013
 RIP: 0010:alx_start_xmit+0x34/0x650 [alx]
 Code: 41 56 41 55 41 54 55 53 48 83 ec 20 0f b7 57 7c 8b 8e b0
0b 00 00 39 ca 72 06 89 d0 31 d2 f7 f1 89 d2 48 8b 84 df
 RSP: 0018:ffffb09240083d28 EFLAGS: 00010297
 RAX: 0000000000000000 RBX: ffffa04d80ae7800 RCX: 0000000000000004
 RDX: 0000000000000000 RSI: ffffa04d80afa000 RDI: ffffa04e92e92a00
 RBP: 0000000000000042 R08: 0000000000000100 R09: ffffa04ea3146700
 R10: 0000000000000014 R11: 0000000000000000 R12: ffffa04e92e92100
 R13: 0000000000000001 R14: ffffa04e92e92a00 R15: ffffa04e92e92a00
 FS:  0000000000000000(0000) GS:ffffa0508f600000(0000) knlGS:0000000000000000
 i915 0000:00:02.0: vblank wait timed out on crtc 0
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000000000000198 CR3: 000000004460a001 CR4: 00000000001706f0
 Call Trace:
  dev_hard_start_xmit+0xc7/0x1e0
  sch_direct_xmit+0x10f/0x310

Cc: <stable@vger.kernel.org> # 4.9+
Fixes: bc2bebe8de8e ("alx: remove WoL support")
Reported-by: Zbynek Michl <zbynek.michl@gmail.com>
Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=983595
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Tested-by: Zbynek Michl <zbynek.michl@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/alx/main.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index 9b7f1af5f574..9e02f8864593 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -1894,13 +1894,16 @@ static int alx_resume(struct device *dev)
 
 	if (!netif_running(alx->dev))
 		return 0;
-	netif_device_attach(alx->dev);
 
 	rtnl_lock();
 	err = __alx_open(alx, true);
 	rtnl_unlock();
+	if (err)
+		return err;
 
-	return err;
+	netif_device_attach(alx->dev);
+
+	return 0;
 }
 
 static SIMPLE_DEV_PM_OPS(alx_pm_ops, alx_suspend, alx_resume);

From bf9279cd63dcc144b2a3c4c76d8b6b4c30b05c22 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Fri, 5 Mar 2021 10:14:48 +0100
Subject: [PATCH 139/164] net: dsa: bcm_sf2: simplify optional reset handling

As of commit bb475230b8e5 ("reset: make optional functions really
optional"), the reset framework API calls use NULL pointers to describe
optional, non-present reset controls.

This allows to unconditionally return errors from
devm_reset_control_get_optional_exclusive.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/bcm_sf2.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 5ee8103b8e9c..f277df922fcd 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -406,7 +406,7 @@ static int bcm_sf2_sw_rst(struct bcm_sf2_priv *priv)
 	/* The watchdog reset does not work on 7278, we need to hit the
 	 * "external" reset line through the reset controller.
 	 */
-	if (priv->type == BCM7278_DEVICE_ID && !IS_ERR(priv->rcdev)) {
+	if (priv->type == BCM7278_DEVICE_ID) {
 		ret = reset_control_assert(priv->rcdev);
 		if (ret)
 			return ret;
@@ -1265,7 +1265,7 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev)
 
 	priv->rcdev = devm_reset_control_get_optional_exclusive(&pdev->dev,
 								"switch");
-	if (PTR_ERR(priv->rcdev) == -EPROBE_DEFER)
+	if (IS_ERR(priv->rcdev))
 		return PTR_ERR(priv->rcdev);
 
 	/* Auto-detection using standard registers will not work, so
@@ -1426,7 +1426,7 @@ static int bcm_sf2_sw_remove(struct platform_device *pdev)
 	bcm_sf2_mdio_unregister(priv);
 	clk_disable_unprepare(priv->clk_mdiv);
 	clk_disable_unprepare(priv->clk);
-	if (priv->type == BCM7278_DEVICE_ID && !IS_ERR(priv->rcdev))
+	if (priv->type == BCM7278_DEVICE_ID)
 		reset_control_assert(priv->rcdev);
 
 	return 0;

From a4813dc7baa4898f66c84ef68274bbbd1a0ae224 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Sun, 7 Mar 2021 02:50:28 +0530
Subject: [PATCH 140/164] net: ethernet: chelsio: inline_crypto: Mundane typos
 fixed throughout the file chcr_ktls.c

Mundane typos fixes throughout the file.

s/establised/established/
s/availbale/available/
s/vaues/values/
s/Incase/In case/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c    | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
index 46a809f2aeca..169e10c91378 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
@@ -672,7 +672,7 @@ static int chcr_ktls_cpl_act_open_rpl(struct adapter *adap,
 	if (tx_info->pending_close) {
 		spin_unlock(&tx_info->lock);
 		if (!status) {
-			/* it's a late success, tcb status is establised,
+			/* it's a late success, tcb status is established,
 			 * mark it close.
 			 */
 			chcr_ktls_mark_tcb_close(tx_info);
@@ -930,7 +930,7 @@ chcr_ktls_get_tx_flits(u32 nr_frags, unsigned int key_ctx_len)
 }
 
 /*
- * chcr_ktls_check_tcp_options: To check if there is any TCP option availbale
+ * chcr_ktls_check_tcp_options: To check if there is any TCP option available
  * other than timestamp.
  * @skb - skb contains partial record..
  * return: 1 / 0
@@ -1115,7 +1115,7 @@ static int chcr_ktls_xmit_wr_complete(struct sk_buff *skb,
 	}
 
 	if (unlikely(credits < ETHTXQ_STOP_THRES)) {
-		/* Credits are below the threshold vaues, stop the queue after
+		/* Credits are below the threshold values, stop the queue after
 		 * injecting the Work Request for this packet.
 		 */
 		chcr_eth_txq_stop(q);
@@ -2006,7 +2006,7 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	/* TCP segments can be in received either complete or partial.
 	 * chcr_end_part_handler will handle cases if complete record or end
-	 * part of the record is received. Incase of partial end part of record,
+	 * part of the record is received. In case of partial end part of record,
 	 * we will send the complete record again.
 	 */
 

From 492bbe7f8a43ff20bb9bfc6b98220dcfb7e5992f Mon Sep 17 00:00:00 2001
From: Grant Grundler <grundler@chromium.org>
Date: Sat, 6 Mar 2021 14:12:31 -0800
Subject: [PATCH 141/164] net: usb: cdc_ncm: emit dev_err on error paths

Several error paths in bind/probe code will only emit
output using dev_dbg. But if we are going to fail the
bind/probe, emit related output with "err" priority.

Signed-off-by: Grant Grundler <grundler@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 4087c9e33781..8acf30115428 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -851,17 +851,17 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
 
 	/* check if we got everything */
 	if (!ctx->data) {
-		dev_dbg(&intf->dev, "CDC Union missing and no IAD found\n");
+		dev_err(&intf->dev, "CDC Union missing and no IAD found\n");
 		goto error;
 	}
 	if (cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting)) {
 		if (!ctx->mbim_desc) {
-			dev_dbg(&intf->dev, "MBIM functional descriptor missing\n");
+			dev_err(&intf->dev, "MBIM functional descriptor missing\n");
 			goto error;
 		}
 	} else {
 		if (!ctx->ether_desc || !ctx->func_desc) {
-			dev_dbg(&intf->dev, "NCM or ECM functional descriptors missing\n");
+			dev_err(&intf->dev, "NCM or ECM functional descriptors missing\n");
 			goto error;
 		}
 	}
@@ -870,7 +870,7 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
 	if (ctx->data != ctx->control) {
 		temp = usb_driver_claim_interface(driver, ctx->data, dev);
 		if (temp) {
-			dev_dbg(&intf->dev, "failed to claim data intf\n");
+			dev_err(&intf->dev, "failed to claim data intf\n");
 			goto error;
 		}
 	}
@@ -926,7 +926,7 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
 	if (ctx->ether_desc) {
 		temp = usbnet_get_ethernet_addr(dev, ctx->ether_desc->iMACAddress);
 		if (temp) {
-			dev_dbg(&intf->dev, "failed to get mac address\n");
+			dev_err(&intf->dev, "failed to get mac address\n");
 			goto error2;
 		}
 		dev_info(&intf->dev, "MAC-Address: %pM\n", dev->net->dev_addr);

From 4d8c79b7e9ff05030aad68421f7584b129933ba6 Mon Sep 17 00:00:00 2001
From: Grant Grundler <grundler@chromium.org>
Date: Sat, 6 Mar 2021 14:12:32 -0800
Subject: [PATCH 142/164] net: usb: log errors to dmesg/syslog

Errors in protocol should be logged when the driver aborts operations.
If the driver can carry on and "humor" the device, then emitting
the message as debug output level is fine.

Signed-off-by: Grant Grundler <grundler@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/usbnet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index b4c8080e6f87..f4f37ecfed58 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -887,7 +887,7 @@ int usbnet_open (struct net_device *net)
 
 	// insist peer be connected
 	if (info->check_connect && (retval = info->check_connect (dev)) < 0) {
-		netif_dbg(dev, ifup, dev->net, "can't open; %d\n", retval);
+		netif_err(dev, ifup, dev->net, "can't open; %d\n", retval);
 		goto done;
 	}
 

From 143c253f42bad20357e7e4432087aca747c43384 Mon Sep 17 00:00:00 2001
From: Jia-Ju Bai <baijiaju1990@gmail.com>
Date: Sun, 7 Mar 2021 00:40:12 -0800
Subject: [PATCH 143/164] net: hisilicon: hns: fix error return code of
 hns_nic_clear_all_rx_fetch()

When hns_assemble_skb() returns NULL to skb, no error return code of
hns_nic_clear_all_rx_fetch() is assigned.
To fix this bug, ret is assigned with -ENOMEM in this case.

Reported-by: TOTE Robot <oslab@tsinghua.edu.cn>
Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_enet.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 5d7824d2b4d4..c66a7a51198e 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -1663,8 +1663,10 @@ static int hns_nic_clear_all_rx_fetch(struct net_device *ndev)
 			for (j = 0; j < fetch_num; j++) {
 				/* alloc one skb and init */
 				skb = hns_assemble_skb(ndev);
-				if (!skb)
+				if (!skb) {
+					ret = -ENOMEM;
 					goto out;
+				}
 				rd = &tx_ring_data(priv, skb->queue_mapping);
 				hns_nic_net_xmit_hw(ndev, skb, rd);
 

From 62765d39553cfd1ad340124fe1e280450e8c89e2 Mon Sep 17 00:00:00 2001
From: Jia-Ju Bai <baijiaju1990@gmail.com>
Date: Sun, 7 Mar 2021 01:12:56 -0800
Subject: [PATCH 144/164] net: wan: fix error return code of uhdlc_init()

When priv->rx_skbuff or priv->tx_skbuff is NULL, no error return code of
uhdlc_init() is assigned.
To fix this bug, ret is assigned with -ENOMEM in these cases.

Reported-by: TOTE Robot <oslab@tsinghua.edu.cn>
Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/fsl_ucc_hdlc.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c
index dca97cd7c4e7..7eac6a3e1cde 100644
--- a/drivers/net/wan/fsl_ucc_hdlc.c
+++ b/drivers/net/wan/fsl_ucc_hdlc.c
@@ -204,14 +204,18 @@ static int uhdlc_init(struct ucc_hdlc_private *priv)
 	priv->rx_skbuff = kcalloc(priv->rx_ring_size,
 				  sizeof(*priv->rx_skbuff),
 				  GFP_KERNEL);
-	if (!priv->rx_skbuff)
+	if (!priv->rx_skbuff) {
+		ret = -ENOMEM;
 		goto free_ucc_pram;
+	}
 
 	priv->tx_skbuff = kcalloc(priv->tx_ring_size,
 				  sizeof(*priv->tx_skbuff),
 				  GFP_KERNEL);
-	if (!priv->tx_skbuff)
+	if (!priv->tx_skbuff) {
+		ret = -ENOMEM;
 		goto free_rx_skbuff;
+	}
 
 	priv->skb_curtx = 0;
 	priv->skb_dirtytx = 0;

From 03cbb87054c17b50a6ead63ed3ab02e094a785b1 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sun, 7 Mar 2021 12:21:56 +0200
Subject: [PATCH 145/164] net: dsa: fix switchdev objects on bridge master
 mistakenly being applied on ports

Tobias reports that after the blamed patch, VLAN objects being added to
a bridge device are being added to all slave ports instead (swp2, swp3).

ip link add br0 type bridge vlan_filtering 1
ip link set swp2 master br0
ip link set swp3 master br0
bridge vlan add dev br0 vid 100 self

This is because the fix was too broad: we made dsa_port_offloads_netdev
say "yes, I offload the br0 bridge" for all slave ports, but we didn't
add the checks whether the switchdev object was in fact meant for the
physical port or for the bridge itself. So we are reacting on events in
a way in which we shouldn't.

The reason why the fix was too broad is because the question itself,
"does this DSA port offload this netdev", was too broad in the first
place. The solution is to disambiguate the question and separate it into
two different functions, one to be called for each switchdev attribute /
object that has an orig_dev == net_bridge (dsa_port_offloads_bridge),
and the other for orig_dev == net_bridge_port (*_offloads_bridge_port).

In the case of VLAN objects on the bridge interface, this solves the
problem because we know that VLAN objects are per bridge port and not
per bridge. And when orig_dev is equal to the net_bridge, we offload it
as a bridge, but not as a bridge port; that's how we are able to skip
reacting on those events. Note that this is compatible with future plans
to have explicit offloading of VLAN objects on the bridge interface as a
bridge port (in DSA, this signifies that we should add that VLAN towards
the CPU port).

Fixes: 99b8202b179f ("net: dsa: fix SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING getting ignored")
Reported-by: Tobias Waldekranz <tobias@waldekranz.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Tobias Waldekranz <tobias@waldekranz.com>
Tested-by: Tobias Waldekranz <tobias@waldekranz.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/dsa_priv.h | 25 +++++++++++---------
 net/dsa/slave.c    | 59 +++++++++++++++++++++++++++++++++-------------
 2 files changed, 57 insertions(+), 27 deletions(-)

diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 2eeaa42f2e08..9d4b0e9b1aa1 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -230,8 +230,8 @@ int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr);
 void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr);
 extern const struct phylink_mac_ops dsa_port_phylink_mac_ops;
 
-static inline bool dsa_port_offloads_netdev(struct dsa_port *dp,
-					    struct net_device *dev)
+static inline bool dsa_port_offloads_bridge_port(struct dsa_port *dp,
+						 struct net_device *dev)
 {
 	/* Switchdev offloading can be configured on: */
 
@@ -241,12 +241,6 @@ static inline bool dsa_port_offloads_netdev(struct dsa_port *dp,
 		 */
 		return true;
 
-	if (dp->bridge_dev == dev)
-		/* DSA ports connected to a bridge, and event was emitted
-		 * for the bridge.
-		 */
-		return true;
-
 	if (dp->lag_dev == dev)
 		/* DSA ports connected to a bridge via a LAG */
 		return true;
@@ -254,14 +248,23 @@ static inline bool dsa_port_offloads_netdev(struct dsa_port *dp,
 	return false;
 }
 
+static inline bool dsa_port_offloads_bridge(struct dsa_port *dp,
+					    struct net_device *bridge_dev)
+{
+	/* DSA ports connected to a bridge, and event was emitted
+	 * for the bridge.
+	 */
+	return dp->bridge_dev == bridge_dev;
+}
+
 /* Returns true if any port of this tree offloads the given net_device */
-static inline bool dsa_tree_offloads_netdev(struct dsa_switch_tree *dst,
-					    struct net_device *dev)
+static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst,
+						 struct net_device *dev)
 {
 	struct dsa_port *dp;
 
 	list_for_each_entry(dp, &dst->ports, list)
-		if (dsa_port_offloads_netdev(dp, dev))
+		if (dsa_port_offloads_bridge_port(dp, dev))
 			return true;
 
 	return false;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 491e3761b5f4..992fcab4b552 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -278,28 +278,43 @@ static int dsa_slave_port_attr_set(struct net_device *dev,
 	struct dsa_port *dp = dsa_slave_to_port(dev);
 	int ret;
 
-	if (!dsa_port_offloads_netdev(dp, attr->orig_dev))
-		return -EOPNOTSUPP;
-
 	switch (attr->id) {
 	case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
+		if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev))
+			return -EOPNOTSUPP;
+
 		ret = dsa_port_set_state(dp, attr->u.stp_state);
 		break;
 	case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
+		if (!dsa_port_offloads_bridge(dp, attr->orig_dev))
+			return -EOPNOTSUPP;
+
 		ret = dsa_port_vlan_filtering(dp, attr->u.vlan_filtering,
 					      extack);
 		break;
 	case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
+		if (!dsa_port_offloads_bridge(dp, attr->orig_dev))
+			return -EOPNOTSUPP;
+
 		ret = dsa_port_ageing_time(dp, attr->u.ageing_time);
 		break;
 	case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS:
+		if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev))
+			return -EOPNOTSUPP;
+
 		ret = dsa_port_pre_bridge_flags(dp, attr->u.brport_flags,
 						extack);
 		break;
 	case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
+		if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev))
+			return -EOPNOTSUPP;
+
 		ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, extack);
 		break;
 	case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER:
+		if (!dsa_port_offloads_bridge(dp, attr->orig_dev))
+			return -EOPNOTSUPP;
+
 		ret = dsa_port_mrouter(dp->cpu_dp, attr->u.mrouter, extack);
 		break;
 	default:
@@ -341,9 +356,6 @@ static int dsa_slave_vlan_add(struct net_device *dev,
 	struct switchdev_obj_port_vlan vlan;
 	int err;
 
-	if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
-		return -EOPNOTSUPP;
-
 	if (dsa_port_skip_vlan_configuration(dp)) {
 		NL_SET_ERR_MSG_MOD(extack, "skipping configuration of VLAN");
 		return 0;
@@ -391,27 +403,36 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
 
 	switch (obj->id) {
 	case SWITCHDEV_OBJ_ID_PORT_MDB:
-		if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
+		if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev))
 			return -EOPNOTSUPP;
+
 		err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
 		break;
 	case SWITCHDEV_OBJ_ID_HOST_MDB:
+		if (!dsa_port_offloads_bridge(dp, obj->orig_dev))
+			return -EOPNOTSUPP;
+
 		/* DSA can directly translate this to a normal MDB add,
 		 * but on the CPU port.
 		 */
 		err = dsa_port_mdb_add(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj));
 		break;
 	case SWITCHDEV_OBJ_ID_PORT_VLAN:
+		if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev))
+			return -EOPNOTSUPP;
+
 		err = dsa_slave_vlan_add(dev, obj, extack);
 		break;
 	case SWITCHDEV_OBJ_ID_MRP:
-		if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
+		if (!dsa_port_offloads_bridge(dp, obj->orig_dev))
 			return -EOPNOTSUPP;
+
 		err = dsa_port_mrp_add(dp, SWITCHDEV_OBJ_MRP(obj));
 		break;
 	case SWITCHDEV_OBJ_ID_RING_ROLE_MRP:
-		if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
+		if (!dsa_port_offloads_bridge(dp, obj->orig_dev))
 			return -EOPNOTSUPP;
+
 		err = dsa_port_mrp_add_ring_role(dp,
 						 SWITCHDEV_OBJ_RING_ROLE_MRP(obj));
 		break;
@@ -431,9 +452,6 @@ static int dsa_slave_vlan_del(struct net_device *dev,
 	struct switchdev_obj_port_vlan *vlan;
 	int err;
 
-	if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
-		return -EOPNOTSUPP;
-
 	if (dsa_port_skip_vlan_configuration(dp))
 		return 0;
 
@@ -459,27 +477,36 @@ static int dsa_slave_port_obj_del(struct net_device *dev,
 
 	switch (obj->id) {
 	case SWITCHDEV_OBJ_ID_PORT_MDB:
-		if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
+		if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev))
 			return -EOPNOTSUPP;
+
 		err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
 		break;
 	case SWITCHDEV_OBJ_ID_HOST_MDB:
+		if (!dsa_port_offloads_bridge(dp, obj->orig_dev))
+			return -EOPNOTSUPP;
+
 		/* DSA can directly translate this to a normal MDB add,
 		 * but on the CPU port.
 		 */
 		err = dsa_port_mdb_del(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj));
 		break;
 	case SWITCHDEV_OBJ_ID_PORT_VLAN:
+		if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev))
+			return -EOPNOTSUPP;
+
 		err = dsa_slave_vlan_del(dev, obj);
 		break;
 	case SWITCHDEV_OBJ_ID_MRP:
-		if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
+		if (!dsa_port_offloads_bridge(dp, obj->orig_dev))
 			return -EOPNOTSUPP;
+
 		err = dsa_port_mrp_del(dp, SWITCHDEV_OBJ_MRP(obj));
 		break;
 	case SWITCHDEV_OBJ_ID_RING_ROLE_MRP:
-		if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
+		if (!dsa_port_offloads_bridge(dp, obj->orig_dev))
 			return -EOPNOTSUPP;
+
 		err = dsa_port_mrp_del_ring_role(dp,
 						 SWITCHDEV_OBJ_RING_ROLE_MRP(obj));
 		break;
@@ -2298,7 +2325,7 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
 			 * other ports bridged with the LAG should be able to
 			 * autonomously forward towards it.
 			 */
-			if (dsa_tree_offloads_netdev(dp->ds->dst, dev))
+			if (dsa_tree_offloads_bridge_port(dp->ds->dst, dev))
 				return NOTIFY_DONE;
 		}
 

From ac88c531a5b38877eba2365a3f28f0c8b513dc33 Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Sun, 7 Mar 2021 13:17:47 +0000
Subject: [PATCH 146/164] net: davicom: Fix regulator not turned off on failed
 probe

When the probe fails or requests to be defered, we must disable the
regulator that was previously enabled.

Fixes: 7994fe55a4a2 ("dm9000: Add regulator and reset support to dm9000")
Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/davicom/dm9000.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index 3fdc70dab5c1..ae744826bb9e 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -1449,7 +1449,7 @@ dm9000_probe(struct platform_device *pdev)
 		if (ret) {
 			dev_err(dev, "failed to request reset gpio %d: %d\n",
 				reset_gpios, ret);
-			return -ENODEV;
+			goto out_regulator_disable;
 		}
 
 		/* According to manual PWRST# Low Period Min 1ms */
@@ -1461,8 +1461,10 @@ dm9000_probe(struct platform_device *pdev)
 
 	if (!pdata) {
 		pdata = dm9000_parse_dt(&pdev->dev);
-		if (IS_ERR(pdata))
-			return PTR_ERR(pdata);
+		if (IS_ERR(pdata)) {
+			ret = PTR_ERR(pdata);
+			goto out_regulator_disable;
+		}
 	}
 
 	/* Init network device */
@@ -1703,6 +1705,10 @@ out:
 	dm9000_release_board(pdev, db);
 	free_netdev(ndev);
 
+out_regulator_disable:
+	if (!IS_ERR(power))
+		regulator_disable(power);
+
 	return ret;
 }
 

From cf9e60aa69ae6c40d3e3e4c94dd6c8de31674e9b Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Sun, 7 Mar 2021 13:17:48 +0000
Subject: [PATCH 147/164] net: davicom: Fix regulator not turned off on driver
 removal

We must disable the regulator that was enabled in the probe function.

Fixes: 7994fe55a4a2 ("dm9000: Add regulator and reset support to dm9000")
Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/davicom/dm9000.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index ae744826bb9e..a95e95ce9438 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -133,6 +133,8 @@ struct board_info {
 	u32		wake_state;
 
 	int		ip_summed;
+
+	struct regulator *power_supply;
 };
 
 /* debug code */
@@ -1481,6 +1483,8 @@ dm9000_probe(struct platform_device *pdev)
 
 	db->dev = &pdev->dev;
 	db->ndev = ndev;
+	if (!IS_ERR(power))
+		db->power_supply = power;
 
 	spin_lock_init(&db->lock);
 	mutex_init(&db->addr_lock);
@@ -1766,10 +1770,13 @@ static int
 dm9000_drv_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct board_info *dm = to_dm9000_board(ndev);
 
 	unregister_netdev(ndev);
-	dm9000_release_board(pdev, netdev_priv(ndev));
+	dm9000_release_board(pdev, dm);
 	free_netdev(ndev);		/* free device structure */
+	if (dm->power_supply)
+		regulator_disable(dm->power_supply);
 
 	dev_dbg(&pdev->dev, "released and freed device\n");
 	return 0;

From 2e2696223676d56db1a93acfca722c1b96cd552d Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Sun, 7 Mar 2021 13:17:49 +0000
Subject: [PATCH 148/164] net: davicom: Use platform_get_irq_optional()

The second IRQ line really is optional, so use
platform_get_irq_optional() to obtain it.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/davicom/dm9000.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index a95e95ce9438..252adfa5d837 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -1507,7 +1507,7 @@ dm9000_probe(struct platform_device *pdev)
 		goto out;
 	}
 
-	db->irq_wake = platform_get_irq(pdev, 1);
+	db->irq_wake = platform_get_irq_optional(pdev, 1);
 	if (db->irq_wake >= 0) {
 		dev_dbg(db->dev, "wakeup irq %d\n", db->irq_wake);
 

From 1b2395dfff5bb40228a187f21f577cd90673d344 Mon Sep 17 00:00:00 2001
From: Alex Marginean <alexandru.marginean@nxp.com>
Date: Sun, 7 Mar 2021 15:23:38 +0200
Subject: [PATCH 149/164] net: enetc: set MAC RX FIFO to recommended value

On LS1028A, the MAC RX FIFO defaults to the value 2, which is too high
and may lead to RX lock-up under traffic at a rate higher than 6 Gbps.
Set it to 1 instead, as recommended by the hardware design team and by
later versions of the ENETC block guide.

Signed-off-by: Alex Marginean <alexandru.marginean@nxp.com>
Reviewed-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Reviewed-by: Jason Liu <jason.hui.liu@nxp.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc_hw.h | 2 ++
 drivers/net/ethernet/freescale/enetc/enetc_pf.c | 6 ++++++
 2 files changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
index de0d20b0f489..00938f7960a4 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
@@ -234,6 +234,8 @@ enum enetc_bdr_type {TX, RX};
 #define ENETC_PM0_MAXFRM	0x8014
 #define ENETC_SET_TX_MTU(val)	((val) << 16)
 #define ENETC_SET_MAXFRM(val)	((val) & 0xffff)
+#define ENETC_PM0_RX_FIFO	0x801c
+#define ENETC_PM0_RX_FIFO_VAL	1
 
 #define ENETC_PM_IMDIO_BASE	0x8030
 
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index ca02f033bea2..224fc37a6757 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -490,6 +490,12 @@ static void enetc_configure_port_mac(struct enetc_hw *hw)
 
 	enetc_port_wr(hw, ENETC_PM1_CMD_CFG, ENETC_PM0_CMD_PHY_TX_EN |
 		      ENETC_PM0_CMD_TXP	| ENETC_PM0_PROMISC);
+
+	/* On LS1028A, the MAC RX FIFO defaults to 2, which is too high
+	 * and may lead to RX lock-up under traffic. Set it to 1 instead,
+	 * as recommended by the hardware team.
+	 */
+	enetc_port_wr(hw, ENETC_PM0_RX_FIFO, ENETC_PM0_RX_FIFO_VAL);
 }
 
 static void enetc_mac_config(struct enetc_hw *hw, phy_interface_t phy_mode)

From 29d98f54a4fe1b6a9089bec8715a1b89ff9ad59c Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sun, 7 Mar 2021 15:23:39 +0200
Subject: [PATCH 150/164] net: enetc: allow hardware timestamping on TX queues
 with tc-etf enabled

The txtime is passed to the driver in skb->skb_mstamp_ns, which is
actually in a union with skb->tstamp (the place where software
timestamps are kept).

Since commit b50a5c70ffa4 ("net: allow simultaneous SW and HW transmit
timestamping"), __sock_recv_timestamp has some logic for making sure
that the two calls to skb_tstamp_tx:

skb_tx_timestamp(skb) # Software timestamp in the driver
-> skb_tstamp_tx(skb, NULL)

and

skb_tstamp_tx(skb, &shhwtstamps) # Hardware timestamp in the driver

will both do the right thing and in a race-free manner, meaning that
skb_tx_timestamp will deliver a cmsg with the software timestamp only,
and skb_tstamp_tx with a non-NULL hwtstamps argument will deliver a cmsg
with the hardware timestamp only.

Why are races even possible? Well, because although the software timestamp
skb->tstamp is private per skb, the hardware timestamp skb_hwtstamps(skb)
lives in skb_shinfo(skb), an area which is shared between skbs and their
clones. And skb_tstamp_tx works by cloning the packets when timestamping
them, therefore attempting to perform hardware timestamping on an skb's
clone will also change the hardware timestamp of the original skb. And
the original skb might have been yet again cloned for software
timestamping, at an earlier stage.

So the logic in __sock_recv_timestamp can't be as simple as saying
"does this skb have a hardware timestamp? if yes I'll send the hardware
timestamp to the socket, otherwise I'll send the software timestamp",
precisely because the hardware timestamp is shared.
Instead, it's quite the other way around: __sock_recv_timestamp says
"does this skb have a software timestamp? if yes, I'll send the software
timestamp, otherwise the hardware one". This works because the software
timestamp is not shared with clones.

But that means we have a problem when we attempt hardware timestamping
with skbs that don't have the skb->tstamp == 0. __sock_recv_timestamp
will say "oh, yeah, this must be some sort of odd clone" and will not
deliver the hardware timestamp to the socket. And this is exactly what
is happening when we have txtime enabled on the socket: as mentioned,
that is put in a union with skb->tstamp, so it is quite easy to mistake
it.

Do what other drivers do (intel igb/igc) and write zero to skb->tstamp
before taking the hardware timestamp. It's of no use to us now (we're
already on the TX confirmation path).

Fixes: 0d08c9ec7d6e ("enetc: add support time specific departure base on the qos etf")
Cc: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 30d7d4e83900..09471329f3a3 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -344,6 +344,12 @@ static void enetc_tstamp_tx(struct sk_buff *skb, u64 tstamp)
 	if (skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) {
 		memset(&shhwtstamps, 0, sizeof(shhwtstamps));
 		shhwtstamps.hwtstamp = ns_to_ktime(tstamp);
+		/* Ensure skb_mstamp_ns, which might have been populated with
+		 * the txtime, is not mistaken for a software timestamp,
+		 * because this will prevent the dispatch of our hardware
+		 * timestamp to the socket.
+		 */
+		skb->tstamp = ktime_set(0, 0);
 		skb_tstamp_tx(skb, &shhwtstamps);
 	}
 }

From 2055a99da8a253a357bdfd359b3338ef3375a26c Mon Sep 17 00:00:00 2001
From: Jia-Ju Bai <baijiaju1990@gmail.com>
Date: Sun, 7 Mar 2021 19:11:02 -0800
Subject: [PATCH 151/164] net: bonding: fix error return code of
 bond_neigh_init()

When slave is NULL or slave_ops->ndo_neigh_setup is NULL, no error
return code of bond_neigh_init() is assigned.
To fix this bug, ret is assigned with -EINVAL in these cases.

Fixes: 9e99bfefdbce ("bonding: fix bond_neigh_init()")
Reported-by: TOTE Robot <oslab@tsinghua.edu.cn>
Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 74cbbb22470b..456315bef3a8 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3978,11 +3978,15 @@ static int bond_neigh_init(struct neighbour *n)
 
 	rcu_read_lock();
 	slave = bond_first_slave_rcu(bond);
-	if (!slave)
+	if (!slave) {
+		ret = -EINVAL;
 		goto out;
+	}
 	slave_ops = slave->dev->netdev_ops;
-	if (!slave_ops->ndo_neigh_setup)
+	if (!slave_ops->ndo_neigh_setup) {
+		ret = -EINVAL;
 		goto out;
+	}
 
 	/* TODO: find another way [1] to implement this.
 	 * Passing a zeroed structure is fragile,

From 27ab92d9996e4e003a726d22c56d780a1655d6b4 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Mon, 8 Mar 2021 10:00:04 +0100
Subject: [PATCH 152/164] mptcp: fix length of ADD_ADDR with port sub-option
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

in current Linux, MPTCP peers advertising endpoints with port numbers use
a sub-option length that wrongly accounts for the trailing TCP NOP. Also,
receivers will only process incoming ADD_ADDR with port having such wrong
sub-option length. Fix this, making ADD_ADDR compliant to RFC8684 §3.4.1.

this can be verified running tcpdump on the kselftests artifacts:

 unpatched kernel:
 [root@bottarga mptcp]# tcpdump -tnnr unpatched.pcap | grep add-addr
 reading from file unpatched.pcap, link-type LINUX_SLL (Linux cooked v1), snapshot length 65535
 IP 10.0.1.1.10000 > 10.0.1.2.53078: Flags [.], ack 101, win 509, options [nop,nop,TS val 214459678 ecr 521312851,mptcp add-addr v1 id 1 a00:201:2774:2d88:7436:85c3:17fd:101], length 0
 IP 10.0.1.2.53078 > 10.0.1.1.10000: Flags [.], ack 101, win 502, options [nop,nop,TS val 521312852 ecr 214459678,mptcp add-addr[bad opt]]

 patched kernel:
 [root@bottarga mptcp]# tcpdump -tnnr patched.pcap | grep add-addr
 reading from file patched.pcap, link-type LINUX_SLL (Linux cooked v1), snapshot length 65535
 IP 10.0.1.1.10000 > 10.0.1.2.38178: Flags [.], ack 101, win 509, options [nop,nop,TS val 3728873902 ecr 2732713192,mptcp add-addr v1 id 1 10.0.2.1:10100 hmac 0xbccdfcbe59292a1f,nop,nop], length 0
 IP 10.0.1.2.38178 > 10.0.1.1.10000: Flags [.], ack 101, win 502, options [nop,nop,TS val 2732713195 ecr 3728873902,mptcp add-addr v1-echo id 1 10.0.2.1:10100,nop,nop], length 0

Fixes: 22fb85ffaefb ("mptcp: add port support for ADD_ADDR suboption writing")
CC: stable@vger.kernel.org # 5.11+
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Acked-and-tested-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.h | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 91827d949766..e21a5bc36cf0 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -52,14 +52,15 @@
 #define TCPOLEN_MPTCP_DSS_MAP64		14
 #define TCPOLEN_MPTCP_DSS_CHECKSUM	2
 #define TCPOLEN_MPTCP_ADD_ADDR		16
-#define TCPOLEN_MPTCP_ADD_ADDR_PORT	20
+#define TCPOLEN_MPTCP_ADD_ADDR_PORT	18
 #define TCPOLEN_MPTCP_ADD_ADDR_BASE	8
-#define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT	12
+#define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT	10
 #define TCPOLEN_MPTCP_ADD_ADDR6		28
-#define TCPOLEN_MPTCP_ADD_ADDR6_PORT	32
+#define TCPOLEN_MPTCP_ADD_ADDR6_PORT	30
 #define TCPOLEN_MPTCP_ADD_ADDR6_BASE	20
-#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT	24
-#define TCPOLEN_MPTCP_PORT_LEN		4
+#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT	22
+#define TCPOLEN_MPTCP_PORT_LEN		2
+#define TCPOLEN_MPTCP_PORT_ALIGN	2
 #define TCPOLEN_MPTCP_RM_ADDR_BASE	4
 #define TCPOLEN_MPTCP_PRIO		3
 #define TCPOLEN_MPTCP_PRIO_ALIGN	4
@@ -701,8 +702,9 @@ static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port)
 		len = TCPOLEN_MPTCP_ADD_ADDR6_BASE;
 	if (!echo)
 		len += MPTCPOPT_THMAC_LEN;
+	/* account for 2 trailing 'nop' options */
 	if (port)
-		len += TCPOLEN_MPTCP_PORT_LEN;
+		len += TCPOLEN_MPTCP_PORT_LEN + TCPOLEN_MPTCP_PORT_ALIGN;
 
 	return len;
 }

From 179d0ba0c454057a65929c46af0d6ad986754781 Mon Sep 17 00:00:00 2001
From: Jia-Ju Bai <baijiaju1990@gmail.com>
Date: Mon, 8 Mar 2021 01:13:55 -0800
Subject: [PATCH 153/164] net: qrtr: fix error return code of qrtr_sendmsg()

When sock_alloc_send_skb() returns NULL to skb, no error return code of
qrtr_sendmsg() is assigned.
To fix this bug, rc is assigned with -ENOMEM in this case.

Fixes: 194ccc88297a ("net: qrtr: Support decoding incoming v2 packets")
Reported-by: TOTE Robot <oslab@tsinghua.edu.cn>
Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/qrtr/qrtr.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 82d2eb8c21d1..edb6ac17ceca 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -958,8 +958,10 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 	plen = (len + 3) & ~3;
 	skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_MAX_SIZE,
 				  msg->msg_flags & MSG_DONTWAIT, &rc);
-	if (!skb)
+	if (!skb) {
+		rc = -ENOMEM;
 		goto out_node;
+	}
 
 	skb_reserve(skb, QRTR_HDR_MAX_SIZE);
 

From 1019d7923d9d4cc878a1a85d4fc2d6619cfe1a6a Mon Sep 17 00:00:00 2001
From: Tong Zhang <ztong0001@gmail.com>
Date: Sun, 7 Mar 2021 22:25:28 -0500
Subject: [PATCH 154/164] atm: fix a typo in the struct description

phy_data means private PHY data not date

Signed-off-by: Tong Zhang <ztong0001@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/atmdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index 60cd25c0461b..9b02961d65ee 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -151,7 +151,7 @@ struct atm_dev {
 	const char	*type;		/* device type name */
 	int		number;		/* device index */
 	void		*dev_data;	/* per-device data */
-	void		*phy_data;	/* private PHY date */
+	void		*phy_data;	/* private PHY data */
 	unsigned long	flags;		/* device flags (ATM_DF_*) */
 	struct list_head local;		/* local ATM addresses */
 	struct list_head lecs;		/* LECS ATM addresses learned via ILMI */

From 3153724fc084d8ef640c611f269ddfb576d1dcb1 Mon Sep 17 00:00:00 2001
From: Tong Zhang <ztong0001@gmail.com>
Date: Sun, 7 Mar 2021 22:25:29 -0500
Subject: [PATCH 155/164] atm: uPD98402: fix incorrect allocation

dev->dev_data is set in zatm.c, calling zatm_start() will overwrite this
dev->dev_data in uPD98402_start() and a subsequent PRIV(dev)->lock
(i.e dev->phy_data->lock) will result in a null-ptr-dereference.

I believe this is a typo and what it actually want to do is to allocate
phy_data instead of dev_data.

Signed-off-by: Tong Zhang <ztong0001@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/uPD98402.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/atm/uPD98402.c b/drivers/atm/uPD98402.c
index 7850758b5bb8..239852d85558 100644
--- a/drivers/atm/uPD98402.c
+++ b/drivers/atm/uPD98402.c
@@ -211,7 +211,7 @@ static void uPD98402_int(struct atm_dev *dev)
 static int uPD98402_start(struct atm_dev *dev)
 {
 	DPRINTK("phy_start\n");
-	if (!(dev->dev_data = kmalloc(sizeof(struct uPD98402_priv),GFP_KERNEL)))
+	if (!(dev->phy_data = kmalloc(sizeof(struct uPD98402_priv),GFP_KERNEL)))
 		return -ENOMEM;
 	spin_lock_init(&PRIV(dev)->lock);
 	memset(&PRIV(dev)->sonet_stats,0,sizeof(struct k_sonet_stats));

From 4416e98594dc04590ebc498fc4e530009535c511 Mon Sep 17 00:00:00 2001
From: Tong Zhang <ztong0001@gmail.com>
Date: Sun, 7 Mar 2021 22:25:30 -0500
Subject: [PATCH 156/164] atm: idt77252: fix null-ptr-dereference

this one is similar to the phy_data allocation fix in uPD98402, the
driver allocate the idt77105_priv and store to dev_data but later
dereference using dev->dev_data, which will cause null-ptr-dereference.

fix this issue by changing dev_data to phy_data so that PRIV(dev) can
work correctly.

Signed-off-by: Tong Zhang <ztong0001@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/idt77105.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/atm/idt77105.c b/drivers/atm/idt77105.c
index 3c081b6171a8..bfca7b8a6f31 100644
--- a/drivers/atm/idt77105.c
+++ b/drivers/atm/idt77105.c
@@ -262,7 +262,7 @@ static int idt77105_start(struct atm_dev *dev)
 {
 	unsigned long flags;
 
-	if (!(dev->dev_data = kmalloc(sizeof(struct idt77105_priv),GFP_KERNEL)))
+	if (!(dev->phy_data = kmalloc(sizeof(struct idt77105_priv),GFP_KERNEL)))
 		return -ENOMEM;
 	PRIV(dev)->dev = dev;
 	spin_lock_irqsave(&idt77105_priv_lock, flags);
@@ -337,7 +337,7 @@ static int idt77105_stop(struct atm_dev *dev)
                 else
                     idt77105_all = walk->next;
 	        dev->phy = NULL;
-                dev->dev_data = NULL;
+                dev->phy_data = NULL;
                 kfree(walk);
                 break;
             }

From f7d9d4854519fdf4d45c70a4d953438cd88e7e58 Mon Sep 17 00:00:00 2001
From: Xie He <xie.he.0141@gmail.com>
Date: Sun, 7 Mar 2021 03:33:07 -0800
Subject: [PATCH 157/164] net: lapbether: Remove netif_start_queue /
 netif_stop_queue

For the devices in this driver, the default qdisc is "noqueue",
because their "tx_queue_len" is 0.

In function "__dev_queue_xmit" in "net/core/dev.c", devices with the
"noqueue" qdisc are specially handled. Packets are transmitted without
being queued after a "dev->flags & IFF_UP" check. However, it's possible
that even if this check succeeds, "ops->ndo_stop" may still have already
been called. This is because in "__dev_close_many", "ops->ndo_stop" is
called before clearing the "IFF_UP" flag.

If we call "netif_stop_queue" in "ops->ndo_stop", then it's possible in
"__dev_queue_xmit", it sees the "IFF_UP" flag is present, and then it
checks "netif_xmit_stopped" and finds that the queue is already stopped.
In this case, it will complain that:
"Virtual device ... asks to queue packet!"

To prevent "__dev_queue_xmit" from generating this complaint, we should
not call "netif_stop_queue" in "ops->ndo_stop".

We also don't need to call "netif_start_queue" in "ops->ndo_open",
because after a netdev is allocated and registered, the
"__QUEUE_STATE_DRV_XOFF" flag is initially not set, so there is no need
to call "netif_start_queue" to clear it.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Xie He <xie.he.0141@gmail.com>
Acked-by: Martin Schiller <ms@dev.tdt.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/lapbether.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c
index 605fe555e157..c3372498f4f1 100644
--- a/drivers/net/wan/lapbether.c
+++ b/drivers/net/wan/lapbether.c
@@ -292,7 +292,6 @@ static int lapbeth_open(struct net_device *dev)
 		return -ENODEV;
 	}
 
-	netif_start_queue(dev);
 	return 0;
 }
 
@@ -300,8 +299,6 @@ static int lapbeth_close(struct net_device *dev)
 {
 	int err;
 
-	netif_stop_queue(dev);
-
 	if ((err = lapb_unregister(dev)) != LAPB_OK)
 		pr_err("lapb_unregister error: %d\n", err);
 

From 286a8624d7f9c6505cd568d947772eb59646514b Mon Sep 17 00:00:00 2001
From: George McCollister <george.mccollister@gmail.com>
Date: Mon, 8 Mar 2021 17:38:22 -0600
Subject: [PATCH 158/164] net: dsa: xrs700x: check if partner is same as port
 in hsr join

Don't assign dp to partner if it's the same port that xrs700x_hsr_join
was called with. The partner port is supposed to be the other port in
the HSR/PRP redundant pair not the same port. This fixes an issue
observed in testing where forwarding between redundant HSR ports on this
switch didn't work depending on the order the ports were added to the
hsr device.

Fixes: bd62e6f5e6a9 ("net: dsa: xrs700x: add HSR offloading support")
Signed-off-by: George McCollister <george.mccollister@gmail.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/xrs700x/xrs700x.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/xrs700x/xrs700x.c b/drivers/net/dsa/xrs700x/xrs700x.c
index f025f968f96d..fde6e99274b6 100644
--- a/drivers/net/dsa/xrs700x/xrs700x.c
+++ b/drivers/net/dsa/xrs700x/xrs700x.c
@@ -528,7 +528,10 @@ static int xrs700x_hsr_join(struct dsa_switch *ds, int port,
 		return -EOPNOTSUPP;
 
 	dsa_hsr_foreach_port(dp, ds, hsr) {
-		partner = dp;
+		if (dp->index != port) {
+			partner = dp;
+			break;
+		}
 	}
 
 	/* We can't enable redundancy on the switch until both
@@ -582,7 +585,10 @@ static int xrs700x_hsr_leave(struct dsa_switch *ds, int port,
 	unsigned int val;
 
 	dsa_hsr_foreach_port(dp, ds, hsr) {
-		partner = dp;
+		if (dp->index != port) {
+			partner = dp;
+			break;
+		}
 	}
 
 	if (!partner)

From 924a9bc362a5223cd448ca08c3dde21235adc310 Mon Sep 17 00:00:00 2001
From: Balazs Nemeth <bnemeth@redhat.com>
Date: Tue, 9 Mar 2021 12:31:00 +0100
Subject: [PATCH 159/164] net: check if protocol extracted by
 virtio_net_hdr_set_proto is correct

For gso packets, virtio_net_hdr_set_proto sets the protocol (if it isn't
set) based on the type in the virtio net hdr, but the skb could contain
anything since it could come from packet_snd through a raw socket. If
there is a mismatch between what virtio_net_hdr_set_proto sets and
the actual protocol, then the skb could be handled incorrectly later
on.

An example where this poses an issue is with the subsequent call to
skb_flow_dissect_flow_keys_basic which relies on skb->protocol being set
correctly. A specially crafted packet could fool
skb_flow_dissect_flow_keys_basic preventing EINVAL to be returned.

Avoid blindly trusting the information provided by the virtio net header
by checking that the protocol in the packet actually matches the
protocol set by virtio_net_hdr_set_proto. Note that since the protocol
is only checked if skb->dev implements header_ops->parse_protocol,
packets from devices without the implementation are not checked at this
stage.

Fixes: 9274124f023b ("net: stricter validation of untrusted gso packets")
Signed-off-by: Balazs Nemeth <bnemeth@redhat.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/virtio_net.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index e8a924eeea3d..6b5fcfa1e555 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -79,8 +79,13 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
 		if (gso_type && skb->network_header) {
 			struct flow_keys_basic keys;
 
-			if (!skb->protocol)
+			if (!skb->protocol) {
+				__be16 protocol = dev_parse_header_protocol(skb);
+
 				virtio_net_hdr_set_proto(skb, hdr);
+				if (protocol && protocol != skb->protocol)
+					return -EINVAL;
+			}
 retry:
 			if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys,
 							      NULL, 0, 0, 0,

From d348ede32e99d3a04863e9f9b28d224456118c27 Mon Sep 17 00:00:00 2001
From: Balazs Nemeth <bnemeth@redhat.com>
Date: Tue, 9 Mar 2021 12:31:01 +0100
Subject: [PATCH 160/164] net: avoid infinite loop in mpls_gso_segment when
 mpls_hlen == 0

A packet with skb_inner_network_header(skb) == skb_network_header(skb)
and ETH_P_MPLS_UC will prevent mpls_gso_segment from pulling any headers
from the packet. Subsequently, the call to skb_mac_gso_segment will
again call mpls_gso_segment with the same packet leading to an infinite
loop. In addition, ensure that the header length is a multiple of four,
which should hold irrespective of the number of stacked labels.

Signed-off-by: Balazs Nemeth <bnemeth@redhat.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mpls/mpls_gso.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
index b1690149b6fa..1482259de9b5 100644
--- a/net/mpls/mpls_gso.c
+++ b/net/mpls/mpls_gso.c
@@ -14,6 +14,7 @@
 #include <linux/netdev_features.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
+#include <net/mpls.h>
 
 static struct sk_buff *mpls_gso_segment(struct sk_buff *skb,
 				       netdev_features_t features)
@@ -27,6 +28,8 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb,
 
 	skb_reset_network_header(skb);
 	mpls_hlen = skb_inner_network_header(skb) - skb_network_header(skb);
+	if (unlikely(!mpls_hlen || mpls_hlen % MPLS_HLEN))
+		goto out;
 	if (unlikely(!pskb_may_pull(skb, mpls_hlen)))
 		goto out;
 

From e7a36d27f6b9f389e41d8189a8a08919c6835732 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Tue, 9 Mar 2021 17:52:18 +0100
Subject: [PATCH 161/164] s390/qeth: fix memory leak after failed TX Buffer
 allocation

When qeth_alloc_qdio_queues() fails to allocate one of the buffers that
back an Output Queue, the 'out_freeoutqbufs' path will free all
previously allocated buffers for this queue. But it misses to free the
half-finished queue struct itself.

Move the buffer allocation into qeth_alloc_output_queue(), and deal with
such errors internally.

Fixes: 0da9581ddb0f ("qeth: exploit asynchronous delivery of storage blocks")
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Reviewed-by: Alexandra Winter <wintera@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core_main.c | 35 +++++++++++++++----------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index b71b8902d1c4..f7bc0ca6909b 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -2634,15 +2634,28 @@ static void qeth_free_output_queue(struct qeth_qdio_out_q *q)
 static struct qeth_qdio_out_q *qeth_alloc_output_queue(void)
 {
 	struct qeth_qdio_out_q *q = kzalloc(sizeof(*q), GFP_KERNEL);
+	unsigned int i;
 
 	if (!q)
 		return NULL;
 
-	if (qdio_alloc_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q)) {
-		kfree(q);
-		return NULL;
+	if (qdio_alloc_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q))
+		goto err_qdio_bufs;
+
+	for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; i++) {
+		if (qeth_init_qdio_out_buf(q, i))
+			goto err_out_bufs;
 	}
+
 	return q;
+
+err_out_bufs:
+	while (i > 0)
+		kmem_cache_free(qeth_qdio_outbuf_cache, q->bufs[--i]);
+	qdio_free_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q);
+err_qdio_bufs:
+	kfree(q);
+	return NULL;
 }
 
 static void qeth_tx_completion_timer(struct timer_list *timer)
@@ -2655,7 +2668,7 @@ static void qeth_tx_completion_timer(struct timer_list *timer)
 
 static int qeth_alloc_qdio_queues(struct qeth_card *card)
 {
-	int i, j;
+	unsigned int i;
 
 	QETH_CARD_TEXT(card, 2, "allcqdbf");
 
@@ -2689,13 +2702,6 @@ static int qeth_alloc_qdio_queues(struct qeth_card *card)
 		queue->coalesce_usecs = QETH_TX_COALESCE_USECS;
 		queue->max_coalesced_frames = QETH_TX_MAX_COALESCED_FRAMES;
 		queue->priority = QETH_QIB_PQUE_PRIO_DEFAULT;
-
-		/* give outbound qeth_qdio_buffers their qdio_buffers */
-		for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) {
-			WARN_ON(queue->bufs[j]);
-			if (qeth_init_qdio_out_buf(queue, j))
-				goto out_freeoutqbufs;
-		}
 	}
 
 	/* completion */
@@ -2704,13 +2710,6 @@ static int qeth_alloc_qdio_queues(struct qeth_card *card)
 
 	return 0;
 
-out_freeoutqbufs:
-	while (j > 0) {
-		--j;
-		kmem_cache_free(qeth_qdio_outbuf_cache,
-				card->qdio.out_qs[i]->bufs[j]);
-		card->qdio.out_qs[i]->bufs[j] = NULL;
-	}
 out_freeoutq:
 	while (i > 0) {
 		qeth_free_output_queue(card->qdio.out_qs[--i]);

From c20383ad1656b0f6354dd50e4acd894f9d94090d Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Tue, 9 Mar 2021 17:52:19 +0100
Subject: [PATCH 162/164] s390/qeth: improve completion of pending TX buffers

The current design attaches a pending TX buffer to a custom
single-linked list, which is anchored at the buffer's slot on the
TX ring. The buffer is then checked for final completion whenever
this slot is processed during a subsequent TX NAPI poll cycle.

But if there's insufficient traffic on the ring, we might never make
enough progress to get back to this ring slot and discover the pending
buffer's final TX completion. In particular if this missing TX
completion blocks the application from sending further traffic.

So convert the custom single-linked list code to a per-queue list_head,
and scan this list on every TX NAPI cycle.

Fixes: 0da9581ddb0f ("qeth: exploit asynchronous delivery of storage blocks")
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core.h      |  3 +-
 drivers/s390/net/qeth_core_main.c | 69 +++++++++++++------------------
 2 files changed, 30 insertions(+), 42 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index a1da83b0b0ef..91acff493612 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -436,7 +436,7 @@ struct qeth_qdio_out_buffer {
 	int is_header[QDIO_MAX_ELEMENTS_PER_BUFFER];
 
 	struct qeth_qdio_out_q *q;
-	struct qeth_qdio_out_buffer *next_pending;
+	struct list_head list_entry;
 };
 
 struct qeth_card;
@@ -500,6 +500,7 @@ struct qeth_qdio_out_q {
 	struct qdio_buffer *qdio_bufs[QDIO_MAX_BUFFERS_PER_Q];
 	struct qeth_qdio_out_buffer *bufs[QDIO_MAX_BUFFERS_PER_Q];
 	struct qdio_outbuf_state *bufstates; /* convenience pointer */
+	struct list_head pending_bufs;
 	struct qeth_out_q_stats stats;
 	spinlock_t lock;
 	unsigned int priority;
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index f7bc0ca6909b..3763cd6d14f8 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -73,8 +73,6 @@ static void qeth_free_qdio_queues(struct qeth_card *card);
 static void qeth_notify_skbs(struct qeth_qdio_out_q *queue,
 		struct qeth_qdio_out_buffer *buf,
 		enum iucv_tx_notify notification);
-static void qeth_tx_complete_buf(struct qeth_qdio_out_buffer *buf, bool error,
-				 int budget);
 
 static void qeth_close_dev_handler(struct work_struct *work)
 {
@@ -465,41 +463,6 @@ static enum iucv_tx_notify qeth_compute_cq_notification(int sbalf15,
 	return n;
 }
 
-static void qeth_cleanup_handled_pending(struct qeth_qdio_out_q *q, int bidx,
-					 int forced_cleanup)
-{
-	if (q->card->options.cq != QETH_CQ_ENABLED)
-		return;
-
-	if (q->bufs[bidx]->next_pending != NULL) {
-		struct qeth_qdio_out_buffer *head = q->bufs[bidx];
-		struct qeth_qdio_out_buffer *c = q->bufs[bidx]->next_pending;
-
-		while (c) {
-			if (forced_cleanup ||
-			    atomic_read(&c->state) == QETH_QDIO_BUF_EMPTY) {
-				struct qeth_qdio_out_buffer *f = c;
-
-				QETH_CARD_TEXT(f->q->card, 5, "fp");
-				QETH_CARD_TEXT_(f->q->card, 5, "%lx", (long) f);
-				/* release here to avoid interleaving between
-				   outbound tasklet and inbound tasklet
-				   regarding notifications and lifecycle */
-				qeth_tx_complete_buf(c, forced_cleanup, 0);
-
-				c = f->next_pending;
-				WARN_ON_ONCE(head->next_pending != f);
-				head->next_pending = c;
-				kmem_cache_free(qeth_qdio_outbuf_cache, f);
-			} else {
-				head = c;
-				c = c->next_pending;
-			}
-
-		}
-	}
-}
-
 static void qeth_qdio_handle_aob(struct qeth_card *card,
 				 unsigned long phys_aob_addr)
 {
@@ -537,7 +500,7 @@ static void qeth_qdio_handle_aob(struct qeth_card *card,
 		qeth_notify_skbs(buffer->q, buffer, notification);
 
 		/* Free dangling allocations. The attached skbs are handled by
-		 * qeth_cleanup_handled_pending().
+		 * qeth_tx_complete_pending_bufs().
 		 */
 		for (i = 0;
 		     i < aob->sb_count && i < QETH_MAX_BUFFER_ELEMENTS(card);
@@ -1488,14 +1451,35 @@ static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue,
 	atomic_set(&buf->state, QETH_QDIO_BUF_EMPTY);
 }
 
+static void qeth_tx_complete_pending_bufs(struct qeth_card *card,
+					  struct qeth_qdio_out_q *queue,
+					  bool drain)
+{
+	struct qeth_qdio_out_buffer *buf, *tmp;
+
+	list_for_each_entry_safe(buf, tmp, &queue->pending_bufs, list_entry) {
+		if (drain || atomic_read(&buf->state) == QETH_QDIO_BUF_EMPTY) {
+			QETH_CARD_TEXT(card, 5, "fp");
+			QETH_CARD_TEXT_(card, 5, "%lx", (long) buf);
+
+			qeth_tx_complete_buf(buf, drain, 0);
+
+			list_del(&buf->list_entry);
+			kmem_cache_free(qeth_qdio_outbuf_cache, buf);
+		}
+	}
+}
+
 static void qeth_drain_output_queue(struct qeth_qdio_out_q *q, bool free)
 {
 	int j;
 
+	qeth_tx_complete_pending_bufs(q->card, q, true);
+
 	for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) {
 		if (!q->bufs[j])
 			continue;
-		qeth_cleanup_handled_pending(q, j, 1);
+
 		qeth_clear_output_buffer(q, q->bufs[j], true, 0);
 		if (free) {
 			kmem_cache_free(qeth_qdio_outbuf_cache, q->bufs[j]);
@@ -2615,7 +2599,6 @@ static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *q, int bidx)
 	skb_queue_head_init(&newbuf->skb_list);
 	lockdep_set_class(&newbuf->skb_list.lock, &qdio_out_skb_queue_key);
 	newbuf->q = q;
-	newbuf->next_pending = q->bufs[bidx];
 	atomic_set(&newbuf->state, QETH_QDIO_BUF_EMPTY);
 	q->bufs[bidx] = newbuf;
 	return 0;
@@ -2697,6 +2680,7 @@ static int qeth_alloc_qdio_queues(struct qeth_card *card)
 		card->qdio.out_qs[i] = queue;
 		queue->card = card;
 		queue->queue_no = i;
+		INIT_LIST_HEAD(&queue->pending_bufs);
 		spin_lock_init(&queue->lock);
 		timer_setup(&queue->timer, qeth_tx_completion_timer, 0);
 		queue->coalesce_usecs = QETH_TX_COALESCE_USECS;
@@ -6106,6 +6090,8 @@ static void qeth_iqd_tx_complete(struct qeth_qdio_out_q *queue,
 					qeth_schedule_recovery(card);
 				}
 
+				list_add(&buffer->list_entry,
+					 &queue->pending_bufs);
 				/* Skip clearing the buffer: */
 				return;
 			case QETH_QDIO_BUF_QAOB_OK:
@@ -6161,6 +6147,8 @@ static int qeth_tx_poll(struct napi_struct *napi, int budget)
 		unsigned int bytes = 0;
 		int completed;
 
+		qeth_tx_complete_pending_bufs(card, queue, false);
+
 		if (qeth_out_queue_is_empty(queue)) {
 			napi_complete(napi);
 			return 0;
@@ -6193,7 +6181,6 @@ static int qeth_tx_poll(struct napi_struct *napi, int budget)
 
 			qeth_handle_send_error(card, buffer, error);
 			qeth_iqd_tx_complete(queue, bidx, error, budget);
-			qeth_cleanup_handled_pending(queue, bidx, false);
 		}
 
 		netdev_tx_completed_queue(txq, packets, bytes);

From 3e83d467a08e25b27c44c885f511624a71c84f7c Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Tue, 9 Mar 2021 17:52:20 +0100
Subject: [PATCH 163/164] s390/qeth: schedule TX NAPI on QAOB completion

When a QAOB notifies us that a pending TX buffer has been delivered, the
actual TX completion processing by qeth_tx_complete_pending_bufs()
is done within the context of a TX NAPI instance. We shouldn't rely on
this instance being scheduled by some other TX event, but just do it
ourselves.

qeth_qdio_handle_aob() is called from qeth_poll(), ie. our main NAPI
instance. To avoid touching the TX queue's NAPI instance
before/after it is (un-)registered, reorder the code in qeth_open()
and qeth_stop() accordingly.

Fixes: 0da9581ddb0f ("qeth: exploit asynchronous delivery of storage blocks")
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core_main.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 3763cd6d14f8..d0a56afec028 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -470,6 +470,7 @@ static void qeth_qdio_handle_aob(struct qeth_card *card,
 	struct qaob *aob;
 	struct qeth_qdio_out_buffer *buffer;
 	enum iucv_tx_notify notification;
+	struct qeth_qdio_out_q *queue;
 	unsigned int i;
 
 	aob = (struct qaob *) phys_to_virt(phys_aob_addr);
@@ -512,7 +513,9 @@ static void qeth_qdio_handle_aob(struct qeth_card *card,
 			buffer->is_header[i] = 0;
 		}
 
+		queue = buffer->q;
 		atomic_set(&buffer->state, QETH_QDIO_BUF_EMPTY);
+		napi_schedule(&queue->napi);
 		break;
 	default:
 		WARN_ON_ONCE(1);
@@ -7235,9 +7238,7 @@ int qeth_open(struct net_device *dev)
 	card->data.state = CH_STATE_UP;
 	netif_tx_start_all_queues(dev);
 
-	napi_enable(&card->napi);
 	local_bh_disable();
-	napi_schedule(&card->napi);
 	if (IS_IQD(card)) {
 		struct qeth_qdio_out_q *queue;
 		unsigned int i;
@@ -7249,8 +7250,12 @@ int qeth_open(struct net_device *dev)
 			napi_schedule(&queue->napi);
 		}
 	}
+
+	napi_enable(&card->napi);
+	napi_schedule(&card->napi);
 	/* kick-start the NAPI softirq: */
 	local_bh_enable();
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(qeth_open);
@@ -7260,6 +7265,11 @@ int qeth_stop(struct net_device *dev)
 	struct qeth_card *card = dev->ml_priv;
 
 	QETH_CARD_TEXT(card, 4, "qethstop");
+
+	napi_disable(&card->napi);
+	cancel_delayed_work_sync(&card->buffer_reclaim_work);
+	qdio_stop_irq(CARD_DDEV(card));
+
 	if (IS_IQD(card)) {
 		struct qeth_qdio_out_q *queue;
 		unsigned int i;
@@ -7280,10 +7290,6 @@ int qeth_stop(struct net_device *dev)
 		netif_tx_disable(dev);
 	}
 
-	napi_disable(&card->napi);
-	cancel_delayed_work_sync(&card->buffer_reclaim_work);
-	qdio_stop_irq(CARD_DDEV(card));
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(qeth_stop);

From 7eefda7f353ef86ad82a2dc8329e8a3538c08ab6 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Tue, 9 Mar 2021 17:52:21 +0100
Subject: [PATCH 164/164] s390/qeth: fix notification for pending buffers
 during teardown

The cited commit reworked the state machine for pending TX buffers.
In qeth_iqd_tx_complete() it turned PENDING into a transient state, and
uses NEED_QAOB for buffers that get parked while waiting for their QAOB
completion.

But it missed to adjust the check in qeth_tx_complete_buf(). So if
qeth_tx_complete_pending_bufs() is called during teardown to drain
the parked TX buffers, we no longer raise a notification for af_iucv.

Instead of updating the checked state, just move this code into
qeth_tx_complete_pending_bufs() itself. This also gets rid of the
special-case in the common TX completion path.

Fixes: 8908f36d20d8 ("s390/qeth: fix af_iucv notification race")
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core_main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index d0a56afec028..a814698387bc 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -1390,9 +1390,6 @@ static void qeth_tx_complete_buf(struct qeth_qdio_out_buffer *buf, bool error,
 	struct qeth_qdio_out_q *queue = buf->q;
 	struct sk_buff *skb;
 
-	if (atomic_read(&buf->state) == QETH_QDIO_BUF_PENDING)
-		qeth_notify_skbs(queue, buf, TX_NOTIFY_GENERALERROR);
-
 	/* Empty buffer? */
 	if (buf->next_element_to_fill == 0)
 		return;
@@ -1465,6 +1462,9 @@ static void qeth_tx_complete_pending_bufs(struct qeth_card *card,
 			QETH_CARD_TEXT(card, 5, "fp");
 			QETH_CARD_TEXT_(card, 5, "%lx", (long) buf);
 
+			if (drain)
+				qeth_notify_skbs(queue, buf,
+						 TX_NOTIFY_GENERALERROR);
 			qeth_tx_complete_buf(buf, drain, 0);
 
 			list_del(&buf->list_entry);