From 99b5382bffd59c6957b7b8f773a7ed27ad68991a Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 25 Nov 2020 11:16:19 +0200 Subject: [PATCH 01/40] devlink: Hold rtnl lock while reading netdev attributes [ Upstream commit b187c9b4178b87954dbc94e78a7094715794714f ] A netdevice of a devlink port can be moved to different net namespace than its parent devlink instance. This scenario occurs when devlink reload is not used. When netdevice is undergoing migration to net namespace, its ifindex and name may change. In such use case, devlink port query may read stale netdev attributes. Fix it by reading them under rtnl lock. Fixes: bfcd3a466172 ("Introduce devlink infrastructure") Signed-off-by: Parav Pandit Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/core/devlink.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/core/devlink.c b/net/core/devlink.c index 79f54ae71422..0ac02cab3087 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -562,6 +562,8 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index)) goto nla_put_failure; + /* Hold rtnl lock while accessing port's netdev attributes. */ + rtnl_lock(); spin_lock_bh(&devlink_port->type_lock); if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type)) goto nla_put_failure_type_locked; @@ -588,6 +590,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, goto nla_put_failure_type_locked; } spin_unlock_bh(&devlink_port->type_lock); + rtnl_unlock(); if (devlink_nl_port_attrs_put(msg, devlink_port)) goto nla_put_failure; @@ -596,6 +599,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, nla_put_failure_type_locked: spin_unlock_bh(&devlink_port->type_lock); + rtnl_unlock(); nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; From 9414855a1305cac3a0208bd4849a74ae23676bd4 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Tue, 24 Nov 2020 15:17:28 +0800 Subject: [PATCH 02/40] ipv6: addrlabel: fix possible memory leak in ip6addrlbl_net_init [ Upstream commit e255e11e66da8281e337e4e352956e8a4999fca4 ] kmemleak report a memory leak as follows: unreferenced object 0xffff8880059c6a00 (size 64): comm "ip", pid 23696, jiffies 4296590183 (age 1755.384s) hex dump (first 32 bytes): 20 01 00 10 00 00 00 00 00 00 00 00 00 00 00 00 ............... 1c 00 00 00 00 00 00 00 00 00 00 00 07 00 00 00 ................ backtrace: [<00000000aa4e7a87>] ip6addrlbl_add+0x90/0xbb0 [<0000000070b8d7f1>] ip6addrlbl_net_init+0x109/0x170 [<000000006a9ca9d4>] ops_init+0xa8/0x3c0 [<000000002da57bf2>] setup_net+0x2de/0x7e0 [<000000004e52d573>] copy_net_ns+0x27d/0x530 [<00000000b07ae2b4>] create_new_namespaces+0x382/0xa30 [<000000003b76d36f>] unshare_nsproxy_namespaces+0xa1/0x1d0 [<0000000030653721>] ksys_unshare+0x3a4/0x780 [<0000000007e82e40>] __x64_sys_unshare+0x2d/0x40 [<0000000031a10c08>] do_syscall_64+0x33/0x40 [<0000000099df30e7>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 We should free all rules when we catch an error in ip6addrlbl_net_init(). otherwise a memory leak will occur. Fixes: 2a8cc6c89039 ("[IPV6] ADDRCONF: Support RFC3484 configurable address selection policy table.") Reported-by: Hulk Robot Signed-off-by: Wang Hai Link: https://lore.kernel.org/r/20201124071728.8385-1-wanghai38@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrlabel.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 642fc6ac13d2..8a22486cf270 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -306,7 +306,9 @@ static int ip6addrlbl_del(struct net *net, /* add default label */ static int __net_init ip6addrlbl_net_init(struct net *net) { - int err = 0; + struct ip6addrlbl_entry *p = NULL; + struct hlist_node *n; + int err; int i; ADDRLABEL(KERN_DEBUG "%s\n", __func__); @@ -315,14 +317,20 @@ static int __net_init ip6addrlbl_net_init(struct net *net) INIT_HLIST_HEAD(&net->ipv6.ip6addrlbl_table.head); for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { - int ret = ip6addrlbl_add(net, - ip6addrlbl_init_table[i].prefix, - ip6addrlbl_init_table[i].prefixlen, - 0, - ip6addrlbl_init_table[i].label, 0); - /* XXX: should we free all rules when we catch an error? */ - if (ret && (!err || err != -ENOMEM)) - err = ret; + err = ip6addrlbl_add(net, + ip6addrlbl_init_table[i].prefix, + ip6addrlbl_init_table[i].prefixlen, + 0, + ip6addrlbl_init_table[i].label, 0); + if (err) + goto err_ip6addrlbl_add; + } + return 0; + +err_ip6addrlbl_add: + hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) { + hlist_del_rcu(&p->list); + kfree_rcu(p, rcu); } return err; } From a15beea80e72daa9836ac47c383bf5974031bba4 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 20 Nov 2020 11:06:57 +0100 Subject: [PATCH 03/40] net/af_iucv: set correct sk_protocol for child sockets [ Upstream commit c5dab0941fcdc9664eb0ec0d4d51433216d91336 ] Child sockets erroneously inherit their parent's sk_type (ie. SOCK_*), instead of the PF_IUCV protocol that the parent was created with in iucv_sock_create(). We're currently not using sk->sk_protocol ourselves, so this shouldn't have much impact (except eg. getting the output in skb_dump() right). Fixes: eac3731bd04c ("[S390]: Add AF_IUCV socket support") Signed-off-by: Julian Wiedmann Link: https://lore.kernel.org/r/20201120100657.34407-1-jwi@linux.ibm.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/iucv/af_iucv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index be8fd79202b8..fdced0a7bd77 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1785,7 +1785,7 @@ static int iucv_callback_connreq(struct iucv_path *path, } /* Create the new socket */ - nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0); + nsk = iucv_sock_alloc(NULL, sk->sk_protocol, GFP_ATOMIC, 0); if (!nsk) { err = pr_iucv->path_sever(path, user_data); iucv_path_free(path); @@ -1991,7 +1991,7 @@ static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb) goto out; } - nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0); + nsk = iucv_sock_alloc(NULL, sk->sk_protocol, GFP_ATOMIC, 0); bh_lock_sock(sk); if ((sk->sk_state != IUCV_LISTEN) || sk_acceptq_is_full(sk) || From a6300aedf8626f87745a081d7512148e2a36b5bc Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 19 Nov 2020 18:59:48 +0300 Subject: [PATCH 04/40] net/tls: missing received data after fast remote close [ Upstream commit 20ffc7adf53a5fd3d19751fbff7895bcca66686e ] In case when tcp socket received FIN after some data and the parser haven't started before reading data caller will receive an empty buffer. This behavior differs from plain TCP socket and leads to special treating in user-space. The flow that triggers the race is simple. Server sends small amount of data right after the connection is configured to use TLS and closes the connection. In this case receiver sees TLS Handshake data, configures TLS socket right after Change Cipher Spec record. While the configuration is in process, TCP socket receives small Application Data record, Encrypted Alert record and FIN packet. So the TCP socket changes sk_shutdown to RCV_SHUTDOWN and sk_flag with SK_DONE bit set. The received data is not parsed upon arrival and is never sent to user-space. Patch unpauses parser directly if we have unparsed data in tcp receive queue. Fixes: fcf4793e278e ("tls: check RCV_SHUTDOWN in tls_wait_data") Signed-off-by: Vadim Fedorenko Link: https://lore.kernel.org/r/1605801588-12236-1-git-send-email-vfedorenko@novek.ru Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/tls/tls_sw.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 57032b7ad023..0d524ef0d8c8 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1291,6 +1291,12 @@ static struct sk_buff *tls_wait_data(struct sock *sk, struct sk_psock *psock, return NULL; } + if (!skb_queue_empty(&sk->sk_receive_queue)) { + __strp_unpause(&ctx->strp); + if (ctx->recv_pkt) + return ctx->recv_pkt; + } + if (sk->sk_shutdown & RCV_SHUTDOWN) return NULL; From f2f25bc797823b9293845fdab35094ec676d775b Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 25 Nov 2020 14:18:10 -0800 Subject: [PATCH 05/40] net/tls: Protect from calling tls_dev_del for TLS RX twice [ Upstream commit 025cc2fb6a4e84e9a0552c0017dcd1c24b7ac7da ] tls_device_offload_cleanup_rx doesn't clear tls_ctx->netdev after calling tls_dev_del if TLX TX offload is also enabled. Clearing tls_ctx->netdev gets postponed until tls_device_gc_task. It leaves a time frame when tls_device_down may get called and call tls_dev_del for RX one extra time, confusing the driver, which may lead to a crash. This patch corrects this racy behavior by adding a flag to prevent tls_device_down from calling tls_dev_del the second time. Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure") Signed-off-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20201125221810.69870-1-saeedm@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- include/net/tls.h | 6 ++++++ net/tls/tls_device.c | 5 ++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/include/net/tls.h b/include/net/tls.h index 0a065bdffa39..697df45c0bce 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -221,6 +221,12 @@ enum tls_context_flags { * to be atomic. */ TLS_TX_SYNC_SCHED = 1, + /* tls_dev_del was called for the RX side, device state was released, + * but tls_ctx->netdev might still be kept, because TX-side driver + * resources might not be released yet. Used to prevent the second + * tls_dev_del call in tls_device_down if it happens simultaneously. + */ + TLS_RX_DEV_CLOSED = 2, }; struct cipher_context { diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 933a3187d3bf..0f034c3bc37d 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -1163,6 +1163,8 @@ void tls_device_offload_cleanup_rx(struct sock *sk) if (tls_ctx->tx_conf != TLS_HW) { dev_put(netdev); tls_ctx->netdev = NULL; + } else { + set_bit(TLS_RX_DEV_CLOSED, &tls_ctx->flags); } out: up_read(&device_offload_lock); @@ -1192,7 +1194,8 @@ static int tls_device_down(struct net_device *netdev) if (ctx->tx_conf == TLS_HW) netdev->tlsdev_ops->tls_dev_del(netdev, ctx, TLS_OFFLOAD_CTX_DIR_TX); - if (ctx->rx_conf == TLS_HW) + if (ctx->rx_conf == TLS_HW && + !test_bit(TLS_RX_DEV_CLOSED, &ctx->flags)) netdev->tlsdev_ops->tls_dev_del(netdev, ctx, TLS_OFFLOAD_CTX_DIR_RX); WRITE_ONCE(ctx->netdev, NULL); From 7f0ddd41e2899349461b578bec18e8bd492e1765 Mon Sep 17 00:00:00 2001 From: Anmol Karn Date: Fri, 20 Nov 2020 00:40:43 +0530 Subject: [PATCH 06/40] rose: Fix Null pointer dereference in rose_send_frame() [ Upstream commit 3b3fd068c56e3fbea30090859216a368398e39bf ] rose_send_frame() dereferences `neigh->dev` when called from rose_transmit_clear_request(), and the first occurrence of the `neigh` is in rose_loopback_timer() as `rose_loopback_neigh`, and it is initialized in rose_add_loopback_neigh() as NULL. i.e when `rose_loopback_neigh` used in rose_loopback_timer() its `->dev` was still NULL and rose_loopback_timer() was calling rose_rx_call_request() without checking for NULL. - net/rose/rose_link.c This bug seems to get triggered in this line: rose_call = (ax25_address *)neigh->dev->dev_addr; Fix it by adding NULL checking for `rose_loopback_neigh->dev` in rose_loopback_timer(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Suggested-by: Jakub Kicinski Reported-by: syzbot+a1c743815982d9496393@syzkaller.appspotmail.com Tested-by: syzbot+a1c743815982d9496393@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=9d2a7ca8c7f2e4b682c97578dfa3f236258300b3 Signed-off-by: Anmol Karn Link: https://lore.kernel.org/r/20201119191043.28813-1-anmol.karan123@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/rose/rose_loopback.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c index 7b094275ea8b..11c45c8c6c16 100644 --- a/net/rose/rose_loopback.c +++ b/net/rose/rose_loopback.c @@ -96,10 +96,19 @@ static void rose_loopback_timer(struct timer_list *unused) } if (frametype == ROSE_CALL_REQUEST) { - if ((dev = rose_dev_get(dest)) != NULL) { - if (rose_rx_call_request(skb, dev, rose_loopback_neigh, lci_o) == 0) - kfree_skb(skb); - } else { + if (!rose_loopback_neigh->dev) { + kfree_skb(skb); + continue; + } + + dev = rose_dev_get(dest); + if (!dev) { + kfree_skb(skb); + continue; + } + + if (rose_rx_call_request(skb, dev, rose_loopback_neigh, lci_o) == 0) { + dev_put(dev); kfree_skb(skb); } } else { From 9a62c8229cffddeb20feb6fdebaec033280f9143 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 26 Nov 2020 10:12:20 -0500 Subject: [PATCH 07/40] sock: set sk_err to ee_errno on dequeue from errq [ Upstream commit 985f7337421a811cb354ca93882f943c8335a6f5 ] When setting sk_err, set it to ee_errno, not ee_origin. Commit f5f99309fa74 ("sock: do not set sk_err in sock_dequeue_err_skb") disabled updating sk_err on errq dequeue, which is correct for most error types (origins): - sk->sk_err = err; Commit 38b257938ac6 ("sock: reset sk_err when the error queue is empty") reenabled the behavior for IMCP origins, which do require it: + if (icmp_next) + sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_origin; But read from ee_errno. Fixes: 38b257938ac6 ("sock: reset sk_err when the error queue is empty") Reported-by: Ayush Ranjan Signed-off-by: Willem de Bruijn Acked-by: Soheil Hassas Yeganeh Link: https://lore.kernel.org/r/20201126151220.2819322-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 466d6273da9f..5fd6633bfa5b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4452,7 +4452,7 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk) if (skb && (skb_next = skb_peek(q))) { icmp_next = is_icmp_err_skb(skb_next); if (icmp_next) - sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_origin; + sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_errno; } spin_unlock_irqrestore(&q->lock, flags); From 538008749df206cfdd6064932fb26ed2dab099a6 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 19 Nov 2020 13:23:58 -0800 Subject: [PATCH 08/40] tcp: Set INET_ECN_xmit configuration in tcp_reinit_congestion_control [ Upstream commit 55472017a4219ca965a957584affdb17549ae4a4 ] When setting congestion control via a BPF program it is seen that the SYN/ACK for packets within a given flow will not include the ECT0 flag. A bit of simple printk debugging shows that when this is configured without BPF we will see the value INET_ECN_xmit value initialized in tcp_assign_congestion_control however when we configure this via BPF the socket is in the closed state and as such it isn't configured, and I do not see it being initialized when we transition the socket into the listen state. The result of this is that the ECT0 bit is configured based on whatever the default state is for the socket. Any easy way to reproduce this is to monitor the following with tcpdump: tools/testing/selftests/bpf/test_progs -t bpf_tcp_ca Without this patch the SYN/ACK will follow whatever the default is. If dctcp all SYN/ACK packets will have the ECT0 bit set, and if it is not then ECT0 will be cleared on all SYN/ACK packets. With this patch applied the SYN/ACK bit matches the value seen on the other packets in the given stream. Fixes: 91b5b21c7c16 ("bpf: Add support for changing congestion control") Signed-off-by: Alexander Duyck Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_cong.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index d7a1f2ef6c52..62292eef151c 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -197,6 +197,11 @@ static void tcp_reinit_congestion_control(struct sock *sk, icsk->icsk_ca_setsockopt = 1; memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); + if (ca->flags & TCP_CONG_NEEDS_ECN) + INET_ECN_xmit(sk); + else + INET_ECN_dontxmit(sk); + if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) tcp_init_congestion_control(sk); } From f057c4d226f19af95fa03507dc8780741bd43e48 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 20 Nov 2020 07:59:54 -0700 Subject: [PATCH 09/40] tun: honor IOCB_NOWAIT flag [ Upstream commit 5aac0390a63b8718237a61dd0d24a29201d1c94a ] tun only checks the file O_NONBLOCK flag, but it should also be checking the iocb IOCB_NOWAIT flag. Any fops using ->read/write_iter() should check both, otherwise it breaks users that correctly expect O_NONBLOCK semantics if IOCB_NOWAIT is set. Signed-off-by: Jens Axboe Link: https://lore.kernel.org/r/e9451860-96cc-c7c7-47b8-fe42cadd5f4c@kernel.dk Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 46bdd0df2eb8..e72d27399983 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2028,12 +2028,15 @@ static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from) struct tun_file *tfile = file->private_data; struct tun_struct *tun = tun_get(tfile); ssize_t result; + int noblock = 0; if (!tun) return -EBADFD; - result = tun_get_user(tun, tfile, NULL, from, - file->f_flags & O_NONBLOCK, false); + if ((file->f_flags & O_NONBLOCK) || (iocb->ki_flags & IOCB_NOWAIT)) + noblock = 1; + + result = tun_get_user(tun, tfile, NULL, from, noblock, false); tun_put(tun); return result; @@ -2254,10 +2257,15 @@ static ssize_t tun_chr_read_iter(struct kiocb *iocb, struct iov_iter *to) struct tun_file *tfile = file->private_data; struct tun_struct *tun = tun_get(tfile); ssize_t len = iov_iter_count(to), ret; + int noblock = 0; if (!tun) return -EBADFD; - ret = tun_do_read(tun, tfile, to, file->f_flags & O_NONBLOCK, NULL); + + if ((file->f_flags & O_NONBLOCK) || (iocb->ki_flags & IOCB_NOWAIT)) + noblock = 1; + + ret = tun_do_read(tun, tfile, to, noblock, NULL); ret = min_t(ssize_t, ret, len); if (ret > 0) iocb->ki_pos = ret; From beead010654d0e0432166fccec290e10273b8c46 Mon Sep 17 00:00:00 2001 From: Yves-Alexis Perez Date: Thu, 19 Nov 2020 18:24:39 +0100 Subject: [PATCH 10/40] usbnet: ipheth: fix connectivity with iOS 14 [ Upstream commit f33d9e2b48a34e1558b67a473a1fc1d6e793f93c ] Starting with iOS 14 released in September 2020, connectivity using the personal hotspot USB tethering function of iOS devices is broken. Communication between the host and the device (for example ICMP traffic or DNS resolution using the DNS service running in the device itself) works fine, but communication to endpoints further away doesn't work. Investigation on the matter shows that no UDP and ICMP traffic from the tethered host is reaching the Internet at all. For TCP traffic there are exchanges between tethered host and server but packets are modified in transit leading to impossible communication. After some trials Matti Vuorela discovered that reducing the URB buffer size by two bytes restored the previous behavior. While a better solution might exist to fix the issue, since the protocol is not publicly documented and considering the small size of the fix, let's do that. Tested-by: Matti Vuorela Signed-off-by: Yves-Alexis Perez Link: https://lore.kernel.org/linux-usb/CAAn0qaXmysJ9vx3ZEMkViv_B19ju-_ExN8Yn_uSefxpjS6g4Lw@mail.gmail.com/ Link: https://github.com/libimobiledevice/libimobiledevice/issues/1038 Link: https://lore.kernel.org/r/20201119172439.94988-1-corsac@corsac.net Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/ipheth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index 8c01fbf68a89..345576f1a747 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -59,7 +59,7 @@ #define IPHETH_USBINTF_SUBCLASS 253 #define IPHETH_USBINTF_PROTO 1 -#define IPHETH_BUF_SIZE 1516 +#define IPHETH_BUF_SIZE 1514 #define IPHETH_IP_ALIGN 2 /* padding at front of URB */ #define IPHETH_TX_TIMEOUT (5 * HZ) From 6dd37fdc95503650eb5d2f0458103e32ac36f85d Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Fri, 20 Nov 2020 14:28:27 +0000 Subject: [PATCH 11/40] bonding: wait for sysfs kobject destruction before freeing struct slave [ Upstream commit b9ad3e9f5a7a760ab068e33e1f18d240ba32ce92 ] syzkaller found that with CONFIG_DEBUG_KOBJECT_RELEASE=y, releasing a struct slave device could result in the following splat: kobject: 'bonding_slave' (00000000cecdd4fe): kobject_release, parent 0000000074ceb2b2 (delayed 1000) bond0 (unregistering): (slave bond_slave_1): Releasing backup interface ------------[ cut here ]------------ ODEBUG: free active (active state 0) object type: timer_list hint: workqueue_select_cpu_near kernel/workqueue.c:1549 [inline] ODEBUG: free active (active state 0) object type: timer_list hint: delayed_work_timer_fn+0x0/0x98 kernel/workqueue.c:1600 WARNING: CPU: 1 PID: 842 at lib/debugobjects.c:485 debug_print_object+0x180/0x240 lib/debugobjects.c:485 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 842 Comm: kworker/u4:4 Tainted: G S 5.9.0-rc8+ #96 Hardware name: linux,dummy-virt (DT) Workqueue: netns cleanup_net Call trace: dump_backtrace+0x0/0x4d8 include/linux/bitmap.h:239 show_stack+0x34/0x48 arch/arm64/kernel/traps.c:142 __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x174/0x1f8 lib/dump_stack.c:118 panic+0x360/0x7a0 kernel/panic.c:231 __warn+0x244/0x2ec kernel/panic.c:600 report_bug+0x240/0x398 lib/bug.c:198 bug_handler+0x50/0xc0 arch/arm64/kernel/traps.c:974 call_break_hook+0x160/0x1d8 arch/arm64/kernel/debug-monitors.c:322 brk_handler+0x30/0xc0 arch/arm64/kernel/debug-monitors.c:329 do_debug_exception+0x184/0x340 arch/arm64/mm/fault.c:864 el1_dbg+0x48/0xb0 arch/arm64/kernel/entry-common.c:65 el1_sync_handler+0x170/0x1c8 arch/arm64/kernel/entry-common.c:93 el1_sync+0x80/0x100 arch/arm64/kernel/entry.S:594 debug_print_object+0x180/0x240 lib/debugobjects.c:485 __debug_check_no_obj_freed lib/debugobjects.c:967 [inline] debug_check_no_obj_freed+0x200/0x430 lib/debugobjects.c:998 slab_free_hook mm/slub.c:1536 [inline] slab_free_freelist_hook+0x190/0x210 mm/slub.c:1577 slab_free mm/slub.c:3138 [inline] kfree+0x13c/0x460 mm/slub.c:4119 bond_free_slave+0x8c/0xf8 drivers/net/bonding/bond_main.c:1492 __bond_release_one+0xe0c/0xec8 drivers/net/bonding/bond_main.c:2190 bond_slave_netdev_event drivers/net/bonding/bond_main.c:3309 [inline] bond_netdev_event+0x8f0/0xa70 drivers/net/bonding/bond_main.c:3420 notifier_call_chain+0xf0/0x200 kernel/notifier.c:83 __raw_notifier_call_chain kernel/notifier.c:361 [inline] raw_notifier_call_chain+0x44/0x58 kernel/notifier.c:368 call_netdevice_notifiers_info+0xbc/0x150 net/core/dev.c:2033 call_netdevice_notifiers_extack net/core/dev.c:2045 [inline] call_netdevice_notifiers net/core/dev.c:2059 [inline] rollback_registered_many+0x6a4/0xec0 net/core/dev.c:9347 unregister_netdevice_many.part.0+0x2c/0x1c0 net/core/dev.c:10509 unregister_netdevice_many net/core/dev.c:10508 [inline] default_device_exit_batch+0x294/0x338 net/core/dev.c:10992 ops_exit_list.isra.0+0xec/0x150 net/core/net_namespace.c:189 cleanup_net+0x44c/0x888 net/core/net_namespace.c:603 process_one_work+0x96c/0x18c0 kernel/workqueue.c:2269 worker_thread+0x3f0/0xc30 kernel/workqueue.c:2415 kthread+0x390/0x498 kernel/kthread.c:292 ret_from_fork+0x10/0x18 arch/arm64/kernel/entry.S:925 This is a potential use-after-free if the sysfs nodes are being accessed whilst removing the struct slave, so wait for the object destruction to complete before freeing the struct slave itself. Fixes: 07699f9a7c8d ("bonding: add sysfs /slave dir for bond slave devices.") Fixes: a068aab42258 ("bonding: Fix reference count leak in bond_sysfs_slave_add.") Cc: Qiushi Wu Cc: Jay Vosburgh Cc: Veaceslav Falico Cc: Andy Gospodarek Signed-off-by: Jamie Iles Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20201120142827.879226-1-jamie@nuviainc.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_main.c | 79 +++++++++++++++++--------- drivers/net/bonding/bond_sysfs_slave.c | 18 +----- include/net/bonding.h | 8 +++ 3 files changed, 61 insertions(+), 44 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 6862c2ef2442..2bc4cb9e3095 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1293,29 +1293,9 @@ static void bond_upper_dev_unlink(struct bonding *bond, struct slave *slave) slave->dev->flags &= ~IFF_SLAVE; } -static struct slave *bond_alloc_slave(struct bonding *bond) -{ - struct slave *slave = NULL; - - slave = kzalloc(sizeof(*slave), GFP_KERNEL); - if (!slave) - return NULL; - - if (BOND_MODE(bond) == BOND_MODE_8023AD) { - SLAVE_AD_INFO(slave) = kzalloc(sizeof(struct ad_slave_info), - GFP_KERNEL); - if (!SLAVE_AD_INFO(slave)) { - kfree(slave); - return NULL; - } - } - INIT_DELAYED_WORK(&slave->notify_work, bond_netdev_notify_work); - - return slave; -} - -static void bond_free_slave(struct slave *slave) +static void slave_kobj_release(struct kobject *kobj) { + struct slave *slave = to_slave(kobj); struct bonding *bond = bond_get_bond_by_slave(slave); cancel_delayed_work_sync(&slave->notify_work); @@ -1325,6 +1305,53 @@ static void bond_free_slave(struct slave *slave) kfree(slave); } +static struct kobj_type slave_ktype = { + .release = slave_kobj_release, +#ifdef CONFIG_SYSFS + .sysfs_ops = &slave_sysfs_ops, +#endif +}; + +static int bond_kobj_init(struct slave *slave) +{ + int err; + + err = kobject_init_and_add(&slave->kobj, &slave_ktype, + &(slave->dev->dev.kobj), "bonding_slave"); + if (err) + kobject_put(&slave->kobj); + + return err; +} + +static struct slave *bond_alloc_slave(struct bonding *bond, + struct net_device *slave_dev) +{ + struct slave *slave = NULL; + + slave = kzalloc(sizeof(*slave), GFP_KERNEL); + if (!slave) + return NULL; + + slave->bond = bond; + slave->dev = slave_dev; + + if (bond_kobj_init(slave)) + return NULL; + + if (BOND_MODE(bond) == BOND_MODE_8023AD) { + SLAVE_AD_INFO(slave) = kzalloc(sizeof(struct ad_slave_info), + GFP_KERNEL); + if (!SLAVE_AD_INFO(slave)) { + kobject_put(&slave->kobj); + return NULL; + } + } + INIT_DELAYED_WORK(&slave->notify_work, bond_netdev_notify_work); + + return slave; +} + static void bond_fill_ifbond(struct bonding *bond, struct ifbond *info) { info->bond_mode = BOND_MODE(bond); @@ -1508,14 +1535,12 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, goto err_undo_flags; } - new_slave = bond_alloc_slave(bond); + new_slave = bond_alloc_slave(bond, slave_dev); if (!new_slave) { res = -ENOMEM; goto err_undo_flags; } - new_slave->bond = bond; - new_slave->dev = slave_dev; /* Set the new_slave's queue_id to be zero. Queue ID mapping * is set via sysfs or module option if desired. */ @@ -1837,7 +1862,7 @@ err_restore_mtu: dev_set_mtu(slave_dev, new_slave->original_mtu); err_free: - bond_free_slave(new_slave); + kobject_put(&new_slave->kobj); err_undo_flags: /* Enslave of first slave has failed and we need to fix master's mac */ @@ -2017,7 +2042,7 @@ static int __bond_release_one(struct net_device *bond_dev, if (!netif_is_bond_master(slave_dev)) slave_dev->priv_flags &= ~IFF_BONDING; - bond_free_slave(slave); + kobject_put(&slave->kobj); return 0; } diff --git a/drivers/net/bonding/bond_sysfs_slave.c b/drivers/net/bonding/bond_sysfs_slave.c index 9b8346638f69..fd07561da034 100644 --- a/drivers/net/bonding/bond_sysfs_slave.c +++ b/drivers/net/bonding/bond_sysfs_slave.c @@ -121,7 +121,6 @@ static const struct slave_attribute *slave_attrs[] = { }; #define to_slave_attr(_at) container_of(_at, struct slave_attribute, attr) -#define to_slave(obj) container_of(obj, struct slave, kobj) static ssize_t slave_show(struct kobject *kobj, struct attribute *attr, char *buf) @@ -132,28 +131,15 @@ static ssize_t slave_show(struct kobject *kobj, return slave_attr->show(slave, buf); } -static const struct sysfs_ops slave_sysfs_ops = { +const struct sysfs_ops slave_sysfs_ops = { .show = slave_show, }; -static struct kobj_type slave_ktype = { -#ifdef CONFIG_SYSFS - .sysfs_ops = &slave_sysfs_ops, -#endif -}; - int bond_sysfs_slave_add(struct slave *slave) { const struct slave_attribute **a; int err; - err = kobject_init_and_add(&slave->kobj, &slave_ktype, - &(slave->dev->dev.kobj), "bonding_slave"); - if (err) { - kobject_put(&slave->kobj); - return err; - } - for (a = slave_attrs; *a; ++a) { err = sysfs_create_file(&slave->kobj, &((*a)->attr)); if (err) { @@ -171,6 +157,4 @@ void bond_sysfs_slave_del(struct slave *slave) for (a = slave_attrs; *a; ++a) sysfs_remove_file(&slave->kobj, &((*a)->attr)); - - kobject_put(&slave->kobj); } diff --git a/include/net/bonding.h b/include/net/bonding.h index 3d56b026bb9e..1bee8fdff7db 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -180,6 +180,11 @@ struct slave { struct rtnl_link_stats64 slave_stats; }; +static inline struct slave *to_slave(struct kobject *kobj) +{ + return container_of(kobj, struct slave, kobj); +} + struct bond_up_slave { unsigned int count; struct rcu_head rcu; @@ -743,6 +748,9 @@ extern struct bond_parm_tbl ad_select_tbl[]; /* exported from bond_netlink.c */ extern struct rtnl_link_ops bond_link_ops; +/* exported from bond_sysfs_slave.c */ +extern const struct sysfs_ops slave_sysfs_ops; + static inline void bond_tx_drop(struct net_device *dev, struct sk_buff *skb) { atomic_long_inc(&dev->tx_dropped); From 733729d3e0e44d5706ca886cea7d8f0733d3ced7 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 29 Mar 2020 09:12:31 -0700 Subject: [PATCH 12/40] staging/octeon: fix up merge error commit 673b41e04a035d760bc0aff83fa9ee24fd9c2779 upstream. There's a semantic conflict in the Octeon staging network driver, which used the skb_reset_tc() function to reset skb state when re-using an skb. But that inline helper function was removed in mainline by commit 2c64605b590e ("net: Fix CONFIG_NET_CLS_ACT=n and CONFIG_NFT_FWD_NETDEV={y, m} build"). Fix it by using skb_reset_redirect() instead. Also move it out of the This code path only ends up triggering if REUSE_SKBUFFS_WITHOUT_FREE is enabled, which in turn only happens if you don't have CONFIG_NETFILTER configured. Which was how this wasn't caught by the usual allmodconfig builds. Signed-off-by: Randy Dunlap Reported-by: Stephen Rothwell Signed-off-by: Linus Torvalds Cc: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- drivers/staging/octeon/ethernet-tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/octeon/ethernet-tx.c b/drivers/staging/octeon/ethernet-tx.c index 83469061a542..fe6e1ae73460 100644 --- a/drivers/staging/octeon/ethernet-tx.c +++ b/drivers/staging/octeon/ethernet-tx.c @@ -352,10 +352,10 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev) skb_dst_set(skb, NULL); skb_ext_reset(skb); nf_reset_ct(skb); + skb_reset_redirect(skb); #ifdef CONFIG_NET_SCHED skb->tc_index = 0; - skb_reset_tc(skb); #endif /* CONFIG_NET_SCHED */ #endif /* REUSE_SKBUFFS_WITHOUT_FREE */ From c4405cdf96f4b98898f556069154b535aeefb965 Mon Sep 17 00:00:00 2001 From: Maurizio Drocco Date: Tue, 23 Jun 2020 11:57:32 -0400 Subject: [PATCH 13/40] ima: extend boot_aggregate with kernel measurements [ Upstream commit 20c59ce010f84300f6c655d32db2610d3433f85c ] Registers 8-9 are used to store measurements of the kernel and its command line (e.g., grub2 bootloader with tpm module enabled). IMA should include them in the boot aggregate. Registers 8-9 should be only included in non-SHA1 digests to avoid ambiguity. Signed-off-by: Maurizio Drocco Reviewed-by: Bruno Meneguele Tested-by: Bruno Meneguele (TPM 1.2, TPM 2.0) Signed-off-by: Mimi Zohar Signed-off-by: Sasha Levin --- security/integrity/ima/ima.h | 2 +- security/integrity/ima/ima_crypto.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index 8173982e00ab..5fae6cfe8d91 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -30,7 +30,7 @@ enum ima_show_type { IMA_SHOW_BINARY, IMA_SHOW_BINARY_NO_FIELD_LEN, IMA_SHOW_BINARY_OLD_STRING_FMT, IMA_SHOW_ASCII }; -enum tpm_pcrs { TPM_PCR0 = 0, TPM_PCR8 = 8 }; +enum tpm_pcrs { TPM_PCR0 = 0, TPM_PCR8 = 8, TPM_PCR10 = 10 }; /* digest size for IMA, fits SHA1 or MD5 */ #define IMA_DIGEST_SIZE SHA1_DIGEST_SIZE diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c index d86825261b51..b06baf5d3cd3 100644 --- a/security/integrity/ima/ima_crypto.c +++ b/security/integrity/ima/ima_crypto.c @@ -682,7 +682,7 @@ static int ima_calc_boot_aggregate_tfm(char *digest, u16 alg_id, if (rc != 0) return rc; - /* cumulative sha1 over tpm registers 0-7 */ + /* cumulative digest over TPM registers 0-7 */ for (i = TPM_PCR0; i < TPM_PCR8; i++) { ima_pcrread(i, &d); /* now accumulate with current aggregate */ @@ -691,6 +691,19 @@ static int ima_calc_boot_aggregate_tfm(char *digest, u16 alg_id, if (rc != 0) return rc; } + /* + * Extend cumulative digest over TPM registers 8-9, which contain + * measurement for the kernel command line (reg. 8) and image (reg. 9) + * in a typical PCR allocation. Registers 8-9 are only included in + * non-SHA1 boot_aggregate digests to avoid ambiguity. + */ + if (alg_id != TPM_ALG_SHA1) { + for (i = TPM_PCR8; i < TPM_PCR10; i++) { + ima_pcrread(i, &d); + rc = crypto_shash_update(shash, d.digest, + crypto_shash_digestsize(tfm)); + } + } if (!rc) crypto_shash_final(shash, digest); return rc; From 294de8933adbdda78acaa3935971d26bb6de745e Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Wed, 2 Dec 2020 19:44:23 -0500 Subject: [PATCH 14/40] sched/fair: Fix unthrottle_cfs_rq() for leaf_cfs_rq list [ Upstream commit 39f23ce07b9355d05a64ae303ce20d1c4b92b957 ] Although not exactly identical, unthrottle_cfs_rq() and enqueue_task_fair() are quite close and follow the same sequence for enqueuing an entity in the cfs hierarchy. Modify unthrottle_cfs_rq() to use the same pattern as enqueue_task_fair(). This fixes a problem already faced with the latter and add an optimization in the last for_each_sched_entity loop. Fixes: fe61468b2cb (sched/fair: Fix enqueue_task_fair warning) Reported-by Tao Zhou Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Phil Auld Reviewed-by: Ben Segall Link: https://lkml.kernel.org/r/20200513135528.4742-1-vincent.guittot@linaro.org Signed-off-by: Sasha Levin --- kernel/sched/fair.c | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 200e12110109..3dd7c10d6a58 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4580,7 +4580,6 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) struct rq *rq = rq_of(cfs_rq); struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); struct sched_entity *se; - int enqueue = 1; long task_delta, idle_task_delta; se = cfs_rq->tg->se[cpu_of(rq)]; @@ -4604,21 +4603,41 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) idle_task_delta = cfs_rq->idle_h_nr_running; for_each_sched_entity(se) { if (se->on_rq) - enqueue = 0; - + break; cfs_rq = cfs_rq_of(se); - if (enqueue) - enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP); + enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP); + cfs_rq->h_nr_running += task_delta; cfs_rq->idle_h_nr_running += idle_task_delta; + /* end evaluation on encountering a throttled cfs_rq */ if (cfs_rq_throttled(cfs_rq)) - break; + goto unthrottle_throttle; } - if (!se) - add_nr_running(rq, task_delta); + for_each_sched_entity(se) { + cfs_rq = cfs_rq_of(se); + cfs_rq->h_nr_running += task_delta; + cfs_rq->idle_h_nr_running += idle_task_delta; + + + /* end evaluation on encountering a throttled cfs_rq */ + if (cfs_rq_throttled(cfs_rq)) + goto unthrottle_throttle; + + /* + * One parent has been throttled and cfs_rq removed from the + * list. Add it back to not break the leaf list. + */ + if (throttled_hierarchy(cfs_rq)) + list_add_leaf_cfs_rq(cfs_rq); + } + + /* At this point se is NULL and we are at root level*/ + add_nr_running(rq, task_delta); + +unthrottle_throttle: /* * The cfs_rq_throttled() breaks in the above iteration can result in * incomplete leaf list maintenance, resulting in triggering the @@ -4627,7 +4646,8 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); - list_add_leaf_cfs_rq(cfs_rq); + if (list_add_leaf_cfs_rq(cfs_rq)) + break; } assert_list_leaf_cfs_rq(rq); From 4615228a50f97781e3b328544af7911a2af6540f Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 23 Nov 2020 18:49:02 +0100 Subject: [PATCH 15/40] netfilter: bridge: reset skb->pkt_type after NF_INET_POST_ROUTING traversal [ Upstream commit 44f64f23bae2f0fad25503bc7ab86cd08d04cd47 ] Netfilter changes PACKET_OTHERHOST to PACKET_HOST before invoking the hooks as, while it's an expected value for a bridge, routing expects PACKET_HOST. The change is undone later on after hook traversal. This can be seen with pairs of functions updating skb>pkt_type and then reverting it to its original value: For hook NF_INET_PRE_ROUTING: setup_pre_routing / br_nf_pre_routing_finish For hook NF_INET_FORWARD: br_nf_forward_ip / br_nf_forward_finish But the third case where netfilter does this, for hook NF_INET_POST_ROUTING, the packet type is changed in br_nf_post_routing but never reverted. A comment says: /* We assume any code from br_dev_queue_push_xmit onwards doesn't care * about the value of skb->pkt_type. */ But when having a tunnel (say vxlan) attached to a bridge we have the following call trace: br_nf_pre_routing br_nf_pre_routing_ipv6 br_nf_pre_routing_finish br_nf_forward_ip br_nf_forward_finish br_nf_post_routing <- pkt_type is updated to PACKET_HOST br_nf_dev_queue_xmit <- but not reverted to its original value vxlan_xmit vxlan_xmit_one skb_tunnel_check_pmtu <- a check on pkt_type is performed In this specific case, this creates issues such as when an ICMPv6 PTB should be sent back. When CONFIG_BRIDGE_NETFILTER is enabled, the PTB isn't sent (as skb_tunnel_check_pmtu checks if pkt_type is PACKET_HOST and returns early). If the comment is right and no one cares about the value of skb->pkt_type after br_dev_queue_push_xmit (which isn't true), resetting it to its original value should be safe. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Antoine Tenart Reviewed-by: Florian Westphal Link: https://lore.kernel.org/r/20201123174902.622102-1-atenart@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_netfilter_hooks.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 59980ecfc962..2371b833b2bc 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -735,6 +735,11 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff mtu_reserved = nf_bridge_mtu_reduction(skb); mtu = skb->dev->mtu; + if (nf_bridge->pkt_otherhost) { + skb->pkt_type = PACKET_OTHERHOST; + nf_bridge->pkt_otherhost = false; + } + if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu) mtu = nf_bridge->frag_max_size; @@ -835,8 +840,6 @@ static unsigned int br_nf_post_routing(void *priv, else return NF_ACCEPT; - /* We assume any code from br_dev_queue_push_xmit onwards doesn't care - * about the value of skb->pkt_type. */ if (skb->pkt_type == PACKET_OTHERHOST) { skb->pkt_type = PACKET_HOST; nf_bridge->pkt_otherhost = true; From af0b082e16fbc6a0005b77e525b94ca6ee377d31 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Thu, 26 Nov 2020 19:09:22 +0100 Subject: [PATCH 16/40] ipv4: Fix tos mask in inet_rtm_getroute() [ Upstream commit 1ebf179037cb46c19da3a9c1e2ca16e7a754b75e ] When inet_rtm_getroute() was converted to use the RCU variants of ip_route_input() and ip_route_output_key(), the TOS parameters stopped being masked with IPTOS_RT_MASK before doing the route lookup. As a result, "ip route get" can return a different route than what would be used when sending real packets. For example: $ ip route add 192.0.2.11/32 dev eth0 $ ip route add unreachable 192.0.2.11/32 tos 2 $ ip route get 192.0.2.11 tos 2 RTNETLINK answers: No route to host But, packets with TOS 2 (ECT(0) if interpreted as an ECN bit) would actually be routed using the first route: $ ping -c 1 -Q 2 192.0.2.11 PING 192.0.2.11 (192.0.2.11) 56(84) bytes of data. 64 bytes from 192.0.2.11: icmp_seq=1 ttl=64 time=0.173 ms --- 192.0.2.11 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.173/0.173/0.173/0.000 ms This patch re-applies IPTOS_RT_MASK in inet_rtm_getroute(), to return results consistent with real route lookups. Fixes: 3765d35ed8b9 ("net: ipv4: Convert inet_rtm_getroute to rcu versions of route lookup") Signed-off-by: Guillaume Nault Reviewed-by: David Ahern Link: https://lore.kernel.org/r/b2d237d08317ca55926add9654a48409ac1b8f5b.1606412894.git.gnault@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv4/route.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a293d4968d1e..53c5cf5723aa 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3132,7 +3132,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, fl4.daddr = dst; fl4.saddr = src; - fl4.flowi4_tos = rtm->rtm_tos; + fl4.flowi4_tos = rtm->rtm_tos & IPTOS_RT_MASK; fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; fl4.flowi4_mark = mark; fl4.flowi4_uid = uid; @@ -3156,8 +3156,9 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, fl4.flowi4_iif = iif; /* for rt_fill_info */ skb->dev = dev; skb->mark = mark; - err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos, - dev, &res); + err = ip_route_input_rcu(skb, dst, src, + rtm->rtm_tos & IPTOS_RT_MASK, dev, + &res); rt = skb_rtable(skb); if (err == 0 && rt->dst.error) From 8a1bb298f75f6909fdf5548b767d0228ab100181 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 26 Oct 2020 16:36:20 +0100 Subject: [PATCH 17/40] dt-bindings: net: correct interrupt flags in examples [ Upstream commit 4d521943f76bd0d1e68ea5e02df7aadd30b2838a ] GPIO_ACTIVE_x flags are not correct in the context of interrupt flags. These are simple defines so they could be used in DTS but they will not have the same meaning: 1. GPIO_ACTIVE_HIGH = 0 = IRQ_TYPE_NONE 2. GPIO_ACTIVE_LOW = 1 = IRQ_TYPE_EDGE_RISING Correct the interrupt flags, assuming the author of the code wanted same logical behavior behind the name "ACTIVE_xxx", this is: ACTIVE_LOW => IRQ_TYPE_LEVEL_LOW ACTIVE_HIGH => IRQ_TYPE_LEVEL_HIGH Fixes: a1a8b4594f8d ("NFC: pn544: i2c: Add DTS Documentation") Fixes: 6be88670fc59 ("NFC: nxp-nci_i2c: Add I2C support to NXP NCI driver") Fixes: e3b329221567 ("dt-bindings: can: tcan4x5x: Update binding to use interrupt property") Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring Acked-by: Marc Kleine-Budde # for tcan4x5x.txt Link: https://lore.kernel.org/r/20201026153620.89268-1-krzk@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/net/can/tcan4x5x.txt | 2 +- Documentation/devicetree/bindings/net/nfc/nxp-nci.txt | 2 +- Documentation/devicetree/bindings/net/nfc/pn544.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt index 27e1b4cebfbd..9cb3560756d0 100644 --- a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt +++ b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt @@ -33,7 +33,7 @@ tcan4x5x: tcan4x5x@0 { spi-max-frequency = <10000000>; bosch,mram-cfg = <0x0 0 0 32 0 0 1 1>; interrupt-parent = <&gpio1>; - interrupts = <14 GPIO_ACTIVE_LOW>; + interrupts = <14 IRQ_TYPE_LEVEL_LOW>; device-state-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>; device-wake-gpios = <&gpio1 15 GPIO_ACTIVE_HIGH>; reset-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>; diff --git a/Documentation/devicetree/bindings/net/nfc/nxp-nci.txt b/Documentation/devicetree/bindings/net/nfc/nxp-nci.txt index cfaf88998918..9e4dc510a40a 100644 --- a/Documentation/devicetree/bindings/net/nfc/nxp-nci.txt +++ b/Documentation/devicetree/bindings/net/nfc/nxp-nci.txt @@ -25,7 +25,7 @@ Example (for ARM-based BeagleBone with NPC100 NFC controller on I2C2): clock-frequency = <100000>; interrupt-parent = <&gpio1>; - interrupts = <29 GPIO_ACTIVE_HIGH>; + interrupts = <29 IRQ_TYPE_LEVEL_HIGH>; enable-gpios = <&gpio0 30 GPIO_ACTIVE_HIGH>; firmware-gpios = <&gpio0 31 GPIO_ACTIVE_HIGH>; diff --git a/Documentation/devicetree/bindings/net/nfc/pn544.txt b/Documentation/devicetree/bindings/net/nfc/pn544.txt index 92f399ec22b8..2bd82562ce8e 100644 --- a/Documentation/devicetree/bindings/net/nfc/pn544.txt +++ b/Documentation/devicetree/bindings/net/nfc/pn544.txt @@ -25,7 +25,7 @@ Example (for ARM-based BeagleBone with PN544 on I2C2): clock-frequency = <400000>; interrupt-parent = <&gpio1>; - interrupts = <17 GPIO_ACTIVE_HIGH>; + interrupts = <17 IRQ_TYPE_LEVEL_HIGH>; enable-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>; firmware-gpios = <&gpio3 19 GPIO_ACTIVE_HIGH>; From d126c30eb30dcc380ffa20267a95916ab82eff76 Mon Sep 17 00:00:00 2001 From: Vinay Kumar Yadav Date: Thu, 26 Nov 2020 03:19:14 +0530 Subject: [PATCH 18/40] chelsio/chtls: fix panic during unload reload chtls [ Upstream commit e3d5e971d2f83d8ddd4b91a50cea4517fb488383 ] there is kernel panic in inet_twsk_free() while chtls module unload when socket is in TIME_WAIT state because sk_prot_creator was not preserved on connection socket. Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition") Signed-off-by: Udai Sharma Signed-off-by: Vinay Kumar Yadav Link: https://lore.kernel.org/r/20201125214913.16938-1-vinay.yadav@chelsio.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/chelsio/chtls/chtls_cm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c index 385bd4dc6686..f81a5e35d8fd 100644 --- a/drivers/crypto/chelsio/chtls/chtls_cm.c +++ b/drivers/crypto/chelsio/chtls/chtls_cm.c @@ -1077,6 +1077,7 @@ static struct sock *chtls_recv_sock(struct sock *lsk, sk_setup_caps(newsk, dst); ctx = tls_get_ctx(lsk); newsk->sk_destruct = ctx->sk_destruct; + newsk->sk_prot_creator = lsk->sk_prot_creator; csk->sk = newsk; csk->passive_reap_next = oreq; csk->tx_chan = cxgb4_port_chan(ndev); From 40caea31dd5629d8155ee7d06a07ba002e5c291d Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Tue, 1 Dec 2020 09:52:10 -0600 Subject: [PATCH 19/40] ibmvnic: Ensure that SCRQ entry reads are correctly ordered [ Upstream commit b71ec952234610b4f90ef17a2fdcb124d5320070 ] Ensure that received Subordinate Command-Response Queue (SCRQ) entries are properly read in order by the driver. These queues are used in the ibmvnic device to process RX buffer and TX completion descriptors. dma_rmb barriers have been added after checking for a pending descriptor to ensure the correct descriptor entry is checked and after reading the SCRQ descriptor to ensure the entire descriptor is read before processing. Fixes: 032c5e82847a ("Driver for IBM System i/p VNIC protocol") Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ibm/ibmvnic.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index e53994ca3142..90a8c0bbad8d 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2307,6 +2307,12 @@ restart_poll: if (!pending_scrq(adapter, adapter->rx_scrq[scrq_num])) break; + /* The queue entry at the current index is peeked at above + * to determine that there is a valid descriptor awaiting + * processing. We want to be sure that the current slot + * holds a valid descriptor before reading its contents. + */ + dma_rmb(); next = ibmvnic_next_scrq(adapter, adapter->rx_scrq[scrq_num]); rx_buff = (struct ibmvnic_rx_buff *)be64_to_cpu(next-> @@ -2988,6 +2994,13 @@ restart_loop: unsigned int pool = scrq->pool_index; int num_entries = 0; + /* The queue entry at the current index is peeked at above + * to determine that there is a valid descriptor awaiting + * processing. We want to be sure that the current slot + * holds a valid descriptor before reading its contents. + */ + dma_rmb(); + next = ibmvnic_next_scrq(adapter, scrq); for (i = 0; i < next->tx_comp.num_comps; i++) { if (next->tx_comp.rcs[i]) { @@ -3388,6 +3401,11 @@ static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *adapter, } spin_unlock_irqrestore(&scrq->lock, flags); + /* Ensure that the entire buffer descriptor has been + * loaded before reading its contents + */ + dma_rmb(); + return entry; } From 9a3cce1ceee42aa3046b8d33ac6c2b4dfc0e32c6 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Tue, 1 Dec 2020 09:52:11 -0600 Subject: [PATCH 20/40] ibmvnic: Fix TX completion error handling [ Upstream commit ba246c175116e2e8fa4fdfa5f8e958e086a9a818 ] TX completions received with an error return code are not being processed properly. When an error code is seen, do not proceed to the next completion before cleaning up the existing entry's data structures. Fixes: 032c5e82847a ("Driver for IBM System i/p VNIC protocol") Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ibm/ibmvnic.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 90a8c0bbad8d..7056419461e7 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3003,11 +3003,9 @@ restart_loop: next = ibmvnic_next_scrq(adapter, scrq); for (i = 0; i < next->tx_comp.num_comps; i++) { - if (next->tx_comp.rcs[i]) { + if (next->tx_comp.rcs[i]) dev_err(dev, "tx error %x\n", next->tx_comp.rcs[i]); - continue; - } index = be32_to_cpu(next->tx_comp.correlators[i]); if (index & IBMVNIC_TSO_POOL_MASK) { tx_pool = &adapter->tso_pool[pool]; From 2b714b607f24cd1ffeb492d73449a21fcc584b13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Mon, 30 Nov 2020 19:37:05 +0100 Subject: [PATCH 21/40] inet_ecn: Fix endianness of checksum update when setting ECT(1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2867e1eac61016f59b3d730e3f7aa488e186e917 ] When adding support for propagating ECT(1) marking in IP headers it seems I suffered from endianness-confusion in the checksum update calculation: In fact the ECN field is in the *lower* bits of the first 16-bit word of the IP header when calculating in network byte order. This means that the addition performed to update the checksum field was wrong; let's fix that. Fixes: b723748750ec ("tunnel: Propagate ECT(1) when decapsulating as recommended by RFC6040") Reported-by: Jonathan Morton Tested-by: Pete Heist Signed-off-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20201130183705.17540-1-toke@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- include/net/inet_ecn.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index e1eaf1780288..563457fec557 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -107,7 +107,7 @@ static inline int IP_ECN_set_ect1(struct iphdr *iph) if ((iph->tos & INET_ECN_MASK) != INET_ECN_ECT_0) return 0; - check += (__force u16)htons(0x100); + check += (__force u16)htons(0x1); iph->check = (__force __sum16)(check + (check>=0xFFFF)); iph->tos ^= INET_ECN_MASK; From a6cd761328726da209eb95134de7185ad350633e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 1 Dec 2020 01:05:07 -0800 Subject: [PATCH 22/40] geneve: pull IP header before ECN decapsulation [ Upstream commit 4179b00c04d18ea7013f68d578d80f3c9d13150a ] IP_ECN_decapsulate() and IP6_ECN_decapsulate() assume IP header is already pulled. geneve does not ensure this yet. Fixing this generically in IP_ECN_decapsulate() and IP6_ECN_decapsulate() is not possible, since callers pass a pointer that might be freed by pskb_may_pull() syzbot reported : BUG: KMSAN: uninit-value in __INET_ECN_decapsulate include/net/inet_ecn.h:238 [inline] BUG: KMSAN: uninit-value in INET_ECN_decapsulate+0x345/0x1db0 include/net/inet_ecn.h:260 CPU: 1 PID: 8941 Comm: syz-executor.0 Not tainted 5.10.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x21c/0x280 lib/dump_stack.c:118 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:118 __msan_warning+0x5f/0xa0 mm/kmsan/kmsan_instr.c:197 __INET_ECN_decapsulate include/net/inet_ecn.h:238 [inline] INET_ECN_decapsulate+0x345/0x1db0 include/net/inet_ecn.h:260 geneve_rx+0x2103/0x2980 include/net/inet_ecn.h:306 geneve_udp_encap_recv+0x105c/0x1340 drivers/net/geneve.c:377 udp_queue_rcv_one_skb+0x193a/0x1af0 net/ipv4/udp.c:2093 udp_queue_rcv_skb+0x282/0x1050 net/ipv4/udp.c:2167 udp_unicast_rcv_skb net/ipv4/udp.c:2325 [inline] __udp4_lib_rcv+0x399d/0x5880 net/ipv4/udp.c:2394 udp_rcv+0x5c/0x70 net/ipv4/udp.c:2564 ip_protocol_deliver_rcu+0x572/0xc50 net/ipv4/ip_input.c:204 ip_local_deliver_finish net/ipv4/ip_input.c:231 [inline] NF_HOOK include/linux/netfilter.h:301 [inline] ip_local_deliver+0x583/0x8d0 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:449 [inline] ip_rcv_finish net/ipv4/ip_input.c:428 [inline] NF_HOOK include/linux/netfilter.h:301 [inline] ip_rcv+0x5c3/0x840 net/ipv4/ip_input.c:539 __netif_receive_skb_one_core net/core/dev.c:5315 [inline] __netif_receive_skb+0x1ec/0x640 net/core/dev.c:5429 process_backlog+0x523/0xc10 net/core/dev.c:6319 napi_poll+0x420/0x1010 net/core/dev.c:6763 net_rx_action+0x35c/0xd40 net/core/dev.c:6833 __do_softirq+0x1a9/0x6fa kernel/softirq.c:298 asm_call_irq_on_stack+0xf/0x20 __run_on_irqstack arch/x86/include/asm/irq_stack.h:26 [inline] run_on_irqstack_cond arch/x86/include/asm/irq_stack.h:77 [inline] do_softirq_own_stack+0x6e/0x90 arch/x86/kernel/irq_64.c:77 do_softirq kernel/softirq.c:343 [inline] __local_bh_enable_ip+0x184/0x1d0 kernel/softirq.c:195 local_bh_enable+0x36/0x40 include/linux/bottom_half.h:32 rcu_read_unlock_bh include/linux/rcupdate.h:730 [inline] __dev_queue_xmit+0x3a9b/0x4520 net/core/dev.c:4167 dev_queue_xmit+0x4b/0x60 net/core/dev.c:4173 packet_snd net/packet/af_packet.c:2992 [inline] packet_sendmsg+0x86f9/0x99d0 net/packet/af_packet.c:3017 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg net/socket.c:671 [inline] __sys_sendto+0x9dc/0xc80 net/socket.c:1992 __do_sys_sendto net/socket.c:2004 [inline] __se_sys_sendto+0x107/0x130 net/socket.c:2000 __x64_sys_sendto+0x6e/0x90 net/socket.c:2000 do_syscall_64+0x9f/0x140 arch/x86/entry/common.c:48 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 2d07dc79fe04 ("geneve: add initial netdev driver for GENEVE tunnels") Signed-off-by: Eric Dumazet Reported-by: syzbot Link: https://lore.kernel.org/r/20201201090507.4137906-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/geneve.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index c7ec3d24eabc..496ae07aca5e 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -254,11 +254,21 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, skb_dst_set(skb, &tun_dst->dst); /* Ignore packet loops (and multicast echo) */ - if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) { - geneve->dev->stats.rx_errors++; - goto drop; - } + if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) + goto rx_error; + switch (skb_protocol(skb, true)) { + case htons(ETH_P_IP): + if (pskb_may_pull(skb, sizeof(struct iphdr))) + goto rx_error; + break; + case htons(ETH_P_IPV6): + if (pskb_may_pull(skb, sizeof(struct ipv6hdr))) + goto rx_error; + break; + default: + goto rx_error; + } oiph = skb_network_header(skb); skb_reset_network_header(skb); @@ -299,6 +309,8 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, u64_stats_update_end(&stats->syncp); } return; +rx_error: + geneve->dev->stats.rx_errors++; drop: /* Consume bad packet */ kfree_skb(skb); From 187a6daf5db45adc807c85bbe285319107bed198 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 30 Nov 2020 17:19:11 +0100 Subject: [PATCH 23/40] net: ip6_gre: set dev->hard_header_len when using header_ops [ Upstream commit 832ba596494b2c9eac7760259eff2d8b7dcad0ee ] syzkaller managed to crash the kernel using an NBMA ip6gre interface. I could reproduce it creating an NBMA ip6gre interface and forwarding traffic to it: skbuff: skb_under_panic: text:ffffffff8250e927 len:148 put:44 head:ffff8c03c7a33 ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:109! Call Trace: skb_push+0x10/0x10 ip6gre_header+0x47/0x1b0 neigh_connected_output+0xae/0xf0 ip6gre tunnel provides its own header_ops->create, and sets it conditionally when initializing the tunnel in NBMA mode. When header_ops->create is used, dev->hard_header_len should reflect the length of the header created. Otherwise, when not used, dev->needed_headroom should be used. Fixes: eb95f52fc72d ("net: ipv6_gre: Fix GRO to work on IPv6 over GRE tap") Cc: Maria Pasechnik Signed-off-by: Antoine Tenart Link: https://lore.kernel.org/r/20201130161911.464106-1-atenart@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_gre.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 44876509d215..e4a43a8941c8 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1120,8 +1120,13 @@ static void ip6gre_tnl_link_config_route(struct ip6_tnl *t, int set_mtu, return; if (rt->dst.dev) { - dev->needed_headroom = rt->dst.dev->hard_header_len + - t_hlen; + unsigned short dst_len = rt->dst.dev->hard_header_len + + t_hlen; + + if (t->dev->header_ops) + dev->hard_header_len = dst_len; + else + dev->needed_headroom = dst_len; if (set_mtu) { dev->mtu = rt->dst.dev->mtu - t_hlen; @@ -1146,7 +1151,12 @@ static int ip6gre_calc_hlen(struct ip6_tnl *tunnel) tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); - tunnel->dev->needed_headroom = LL_MAX_HEADER + t_hlen; + + if (tunnel->dev->header_ops) + tunnel->dev->hard_header_len = LL_MAX_HEADER + t_hlen; + else + tunnel->dev->needed_headroom = LL_MAX_HEADER + t_hlen; + return t_hlen; } From 8bfe5b73b185d931b77c965002f84ad986aa94f1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 1 Dec 2020 18:15:12 +0300 Subject: [PATCH 24/40] net/x25: prevent a couple of overflows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6ee50c8e262a0f0693dad264c3c99e30e6442a56 ] The .x25_addr[] address comes from the user and is not necessarily NUL terminated. This leads to a couple problems. The first problem is that the strlen() in x25_bind() can read beyond the end of the buffer. The second problem is more subtle and could result in memory corruption. The call tree is: x25_connect() --> x25_write_internal() --> x25_addr_aton() The .x25_addr[] buffers are copied to the "addresses" buffer from x25_write_internal() so it will lead to stack corruption. Verify that the strings are NUL terminated and return -EINVAL if they are not. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Fixes: a9288525d2ae ("X25: Dont let x25_bind use addresses containing characters") Reported-by: "kiyin(尹亮)" Signed-off-by: Dan Carpenter Acked-by: Martin Schiller Link: https://lore.kernel.org/r/X8ZeAKm8FnFpN//B@mwanda Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/x25/af_x25.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 54351e5ba047..cb1f5016c433 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -675,7 +675,8 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) int len, i, rc = 0; if (addr_len != sizeof(struct sockaddr_x25) || - addr->sx25_family != AF_X25) { + addr->sx25_family != AF_X25 || + strnlen(addr->sx25_addr.x25_addr, X25_ADDR_LEN) == X25_ADDR_LEN) { rc = -EINVAL; goto out; } @@ -769,7 +770,8 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr, rc = -EINVAL; if (addr_len != sizeof(struct sockaddr_x25) || - addr->sx25_family != AF_X25) + addr->sx25_family != AF_X25 || + strnlen(addr->sx25_addr.x25_addr, X25_ADDR_LEN) == X25_ADDR_LEN) goto out; rc = -ENETUNREACH; From dc469f42365435ddc24d0649b442247f361b6837 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Wed, 2 Dec 2020 17:56:05 +0800 Subject: [PATCH 25/40] cxgb3: fix error return code in t3_sge_alloc_qset() [ Upstream commit ff9924897f8bfed82e61894b373ab9d2dfea5b10 ] Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: b1fb1f280d09 ("cxgb3 - Fix dma mapping error path") Reported-by: Hulk Robot Signed-off-by: Zhang Changzhong Acked-by: Raju Rangoju Link: https://lore.kernel.org/r/1606902965-1646-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/chelsio/cxgb3/sge.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c index 6dabbf1502c7..c0e96bf5dd1a 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c @@ -3176,6 +3176,7 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports, GFP_KERNEL | __GFP_COMP); if (!avail) { CH_ALERT(adapter, "free list queue 0 initialization failed\n"); + ret = -ENOMEM; goto err; } if (avail < q->fl[0].size) From 5405a299b8c1878499bd072998e8cfcab62ca913 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Wed, 2 Dec 2020 17:57:15 +0800 Subject: [PATCH 26/40] net: pasemi: fix error return code in pasemi_mac_open() [ Upstream commit aba84871bd4f52c4dfcf3ad5d4501a6c9d2de90e ] Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 72b05b9940f0 ("pasemi_mac: RX/TX ring management cleanup") Fixes: 8d636d8bc5ff ("pasemi_mac: jumbo frame support") Reported-by: Hulk Robot Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/r/1606903035-1838-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/pasemi/pasemi_mac.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c index be6660128b55..040a15a828b4 100644 --- a/drivers/net/ethernet/pasemi/pasemi_mac.c +++ b/drivers/net/ethernet/pasemi/pasemi_mac.c @@ -1078,16 +1078,20 @@ static int pasemi_mac_open(struct net_device *dev) mac->tx = pasemi_mac_setup_tx_resources(dev); - if (!mac->tx) + if (!mac->tx) { + ret = -ENOMEM; goto out_tx_ring; + } /* We might already have allocated rings in case mtu was changed * before interface was brought up. */ if (dev->mtu > 1500 && !mac->num_cs) { pasemi_mac_setup_csrings(mac); - if (!mac->num_cs) + if (!mac->num_cs) { + ret = -ENOMEM; goto out_tx_ring; + } } /* Zero out rmon counters */ From 178da08f9b5ba74a3bf5d8dc9eb66b111ceeb24a Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Wed, 2 Dec 2020 17:58:42 +0800 Subject: [PATCH 27/40] vxlan: fix error return code in __vxlan_dev_create() [ Upstream commit 832e09798c261cf58de3a68cfcc6556408c16a5a ] Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 0ce1822c2a08 ("vxlan: add adjacent link to limit depth level") Reported-by: Hulk Robot Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/r/1606903122-2098-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/vxlan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index f9edc76580d9..630ac00a34ed 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -3617,8 +3617,10 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, if (dst->remote_ifindex) { remote_dev = __dev_get_by_index(net, dst->remote_ifindex); - if (!remote_dev) + if (!remote_dev) { + err = -ENODEV; goto errout; + } err = netdev_upper_dev_link(remote_dev, dev, extack); if (err) From 7c3894f695e487e13b2d599e3d3437ea6969c78d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 3 Dec 2020 11:44:31 +0300 Subject: [PATCH 28/40] chelsio/chtls: fix a double free in chtls_setkey() [ Upstream commit 391119fb5c5c4bdb4d57c7ffeb5e8d18560783d1 ] The "skb" is freed by the transmit code in cxgb4_ofld_send() and we shouldn't use it again. But in the current code, if we hit an error later on in the function then the clean up code will call kfree_skb(skb) and so it causes a double free. Set the "skb" to NULL and that makes the kfree_skb() a no-op. Fixes: d25f2f71f653 ("crypto: chtls - Program the TLS session Key") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/X8ilb6PtBRLWiSHp@mwanda Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/chelsio/chtls/chtls_hw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/chelsio/chtls/chtls_hw.c b/drivers/crypto/chelsio/chtls/chtls_hw.c index 3ef723e08953..753f4ba38f83 100644 --- a/drivers/crypto/chelsio/chtls/chtls_hw.c +++ b/drivers/crypto/chelsio/chtls/chtls_hw.c @@ -365,6 +365,7 @@ int chtls_setkey(struct chtls_sock *csk, u32 keylen, u32 optname) csk->wr_unacked += DIV_ROUND_UP(len, 16); enqueue_wr(csk, skb); cxgb4_ofld_send(csk->egress_dev, skb); + skb = NULL; chtls_set_scmd(csk); /* Clear quiesce for Rx key */ From 892e08e0b4f3dbacd490bb6552345c098740c204 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Thu, 3 Dec 2020 22:18:06 +0800 Subject: [PATCH 29/40] net: mvpp2: Fix error return code in mvpp2_open() [ Upstream commit 82a10dc7f0960735f40e8d7d3bee56934291600f ] Fix to return negative error code -ENOENT from invalid configuration error handling case instead of 0, as done elsewhere in this function. Fixes: 4bb043262878 ("net: mvpp2: phylink support") Reported-by: Hulk Robot Signed-off-by: Wang Hai Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20201203141806.37966-1-wanghai38@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 0f136f1af5d1..63c033443013 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -3696,6 +3696,7 @@ static int mvpp2_open(struct net_device *dev) if (!valid) { netdev_err(port->dev, "invalid configuration: no dt or link IRQ"); + err = -ENOENT; goto err_free_irq; } From ba203b92a829c8aaf8763c76034e21e687b1b9bf Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 3 Dec 2020 10:58:21 +0100 Subject: [PATCH 30/40] net: skbuff: ensure LSE is pullable before decrementing the MPLS ttl [ Upstream commit 13de4ed9e3a9ccbe54d05f7d5c773f69ecaf6c64 ] skb_mpls_dec_ttl() reads the LSE without ensuring that it is contained in the skb "linear" area. Fix this calling pskb_may_pull() before reading the current ttl. Found by code inspection. Fixes: 2a2ea50870ba ("net: sched: add mpls manipulation actions to TC") Reported-by: Marcelo Ricardo Leitner Signed-off-by: Davide Caratti Link: https://lore.kernel.org/r/53659f28be8bc336c113b5254dc637cc76bbae91.1606987074.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5fd6633bfa5b..a0486dcf5425 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5618,6 +5618,9 @@ int skb_mpls_dec_ttl(struct sk_buff *skb) if (unlikely(!eth_p_mpls(skb->protocol))) return -EINVAL; + if (!pskb_may_pull(skb, skb_network_offset(skb) + MPLS_HLEN)) + return -ENOMEM; + lse = be32_to_cpu(mpls_hdr(skb)->label_stack_entry); ttl = (lse & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT; if (!--ttl) From 1086f789060ab968314b93ddbee91547f276b5d9 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 3 Dec 2020 10:46:06 +0100 Subject: [PATCH 31/40] net: openvswitch: ensure LSE is pullable before reading it [ Upstream commit 43c13605bad44b8abbc9776d6e63f62ccb7a47d6 ] when openvswitch is configured to mangle the LSE, the current value is read from the packet dereferencing 4 bytes at mpls_hdr(): ensure that the label is contained in the skb "linear" area. Found by code inspection. Fixes: d27cf5c59a12 ("net: core: add MPLS update core helper and use in OvS") Signed-off-by: Davide Caratti Link: https://lore.kernel.org/r/aa099f245d93218b84b5c056b67b6058ccf81a66.1606987185.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/openvswitch/actions.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 3d96dab10449..425e146523cc 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -196,6 +196,9 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, __be32 lse; int err; + if (!pskb_may_pull(skb, skb_network_offset(skb) + MPLS_HLEN)) + return -ENOMEM; + stack = mpls_hdr(skb); lse = OVS_MASKED(stack->label_stack_entry, *mpls_lse, *mask); err = skb_mpls_update_lse(skb, lse); From 8f92330b08735e9a75ef1a8f6721c8ac51d03afb Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 3 Dec 2020 10:37:52 +0100 Subject: [PATCH 32/40] net/sched: act_mpls: ensure LSE is pullable before reading it [ Upstream commit 9608fa653059c3f72faab0c148ac8773c46e7314 ] when 'act_mpls' is used to mangle the LSE, the current value is read from the packet dereferencing 4 bytes at mpls_hdr(): ensure that the label is contained in the skb "linear" area. Found by code inspection. v2: - use MPLS_HLEN instead of sizeof(new_lse), thanks to Jakub Kicinski Fixes: 2a2ea50870ba ("net: sched: add mpls manipulation actions to TC") Signed-off-by: Davide Caratti Acked-by: Guillaume Nault Link: https://lore.kernel.org/r/3243506cba43d14858f3bd21ee0994160e44d64a.1606987058.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/sched/act_mpls.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index 7954021ade33..0fccae356dc1 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -88,6 +88,9 @@ static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a, goto drop; break; case TCA_MPLS_ACT_MODIFY: + if (!pskb_may_pull(skb, + skb_network_offset(skb) + MPLS_HLEN)) + goto drop; new_lse = tcf_mpls_get_lse(mpls_hdr(skb), p, false); if (skb_mpls_update_lse(skb, new_lse)) goto drop; From 2598dd80b80142f9d89c12f770d0c52ee2f68ce2 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Wed, 2 Dec 2020 20:39:46 -0800 Subject: [PATCH 33/40] net/mlx5: DR, Proper handling of unsupported Connect-X6DX SW steering [ Upstream commit d421e466c2373095f165ddd25cbabd6c5b077928 ] STEs format for Connect-X5 and Connect-X6DX different. Currently, on Connext-X6DX the SW steering would break at some point when building STEs w/o giving a proper error message. Fix this by checking the STE format of the current device when initializing domain: add mlx5_ifc definitions for Connect-X6DX SW steering, read FW capability to get the current format version, and check this version when domain is being created. Fixes: 26d688e33f88 ("net/mlx5: DR, Add Steering entry (STE) utilities") Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c | 1 + .../net/ethernet/mellanox/mlx5/core/steering/dr_domain.c | 5 +++++ .../net/ethernet/mellanox/mlx5/core/steering/dr_types.h | 1 + include/linux/mlx5/mlx5_ifc.h | 9 ++++++++- 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c index 41662c4e2664..64f6f529f6eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c @@ -92,6 +92,7 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev, caps->eswitch_manager = MLX5_CAP_GEN(mdev, eswitch_manager); caps->gvmi = MLX5_CAP_GEN(mdev, vhca_id); caps->flex_protocols = MLX5_CAP_GEN(mdev, flex_parser_protocols); + caps->sw_format_ver = MLX5_CAP_GEN(mdev, steering_format_version); if (mlx5dr_matcher_supp_flex_parser_icmp_v4(caps)) { caps->flex_parser_id_icmp_dw0 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c index 5b24732b18c0..56bf900eb753 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -223,6 +223,11 @@ static int dr_domain_caps_init(struct mlx5_core_dev *mdev, if (ret) return ret; + if (dmn->info.caps.sw_format_ver != MLX5_STEERING_FORMAT_CONNECTX_5) { + mlx5dr_err(dmn, "SW steering is not supported on this device\n"); + return -EOPNOTSUPP; + } + ret = dr_domain_query_fdb_caps(mdev, dmn); if (ret) return ret; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index 31737dfca4ea..c360d08af67d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -613,6 +613,7 @@ struct mlx5dr_cmd_caps { u8 max_ft_level; u16 roce_min_src_udp; u8 num_esw_ports; + u8 sw_format_ver; bool eswitch_manager; bool rx_sw_owner; bool tx_sw_owner; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index aba56077cfda..75e5a7fe341f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1139,6 +1139,11 @@ enum mlx5_fc_bulk_alloc_bitmask { #define MLX5_FC_BULK_NUM_FCS(fc_enum) (MLX5_FC_BULK_SIZE_FACTOR * (fc_enum)) +enum { + MLX5_STEERING_FORMAT_CONNECTX_5 = 0, + MLX5_STEERING_FORMAT_CONNECTX_6DX = 1, +}; + struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_0[0x30]; u8 vhca_id[0x10]; @@ -1419,7 +1424,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 general_obj_types[0x40]; - u8 reserved_at_440[0x20]; + u8 reserved_at_440[0x4]; + u8 steering_format_version[0x4]; + u8 create_qp_start_hint[0x18]; u8 reserved_at_460[0x3]; u8 log_max_uctx[0x5]; From 8cd76dacd3dc3d8320b9e613e3440ac7cacddf3d Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Wed, 2 Dec 2020 20:39:43 -0800 Subject: [PATCH 34/40] net/mlx5: Fix wrong address reclaim when command interface is down [ Upstream commit 1d2bb5ad89f47d8ce8aedc70ef85059ab3870292 ] When command interface is down, driver to reclaim all 4K page chucks that were hold by the Firmeware. Fix a bug for 64K page size systems, where driver repeatedly released only the first chunk of the page. Define helper function to fill 4K chunks for a given Firmware pages. Iterate over all unreleased Firmware pages and call the hepler per each. Fixes: 5adff6a08862 ("net/mlx5: Fix incorrect page count when in internal error") Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- .../ethernet/mellanox/mlx5/core/pagealloc.c | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 91bd258ecf1b..db76c92b75e2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -339,6 +339,24 @@ out_free: return err; } +static u32 fwp_fill_manage_pages_out(struct fw_page *fwp, u32 *out, u32 index, + u32 npages) +{ + u32 pages_set = 0; + unsigned int n; + + for_each_clear_bit(n, &fwp->bitmask, MLX5_NUM_4K_IN_PAGE) { + MLX5_ARRAY_SET64(manage_pages_out, out, pas, index + pages_set, + fwp->addr + (n * MLX5_ADAPTER_PAGE_SIZE)); + pages_set++; + + if (!--npages) + break; + } + + return pages_set; +} + static int reclaim_pages_cmd(struct mlx5_core_dev *dev, u32 *in, int in_size, u32 *out, int out_size) { @@ -362,8 +380,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, if (fwp->func_id != func_id) continue; - MLX5_ARRAY_SET64(manage_pages_out, out, pas, i, fwp->addr); - i++; + i += fwp_fill_manage_pages_out(fwp, out, i, npages - i); } MLX5_SET(manage_pages_out, out, output_num_entries, i); From c38a7023c00a0ef30c92641147c990d3fdfb655b Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Fri, 27 Nov 2020 22:26:35 +0900 Subject: [PATCH 35/40] ALSA: usb-audio: US16x08: fix value count for level meters commit 402d5840b0d40a2a26c8651165d29b534abb6d36 upstream. The level meter control returns 34 integers of info. This fixes: snd-usb-audio 3-1:1.0: control 2:0:0:Level Meter:0: access overflow Fixes: d2bb390a2081 ("ALSA: usb-audio: Tascam US-16x08 DSP mixer quirk") Cc: stable@vger.kernel.org Signed-off-by: Hector Martin Link: https://lore.kernel.org/r/20201127132635.18947-1-marcan@marcan.st Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer_us16x08.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/mixer_us16x08.c b/sound/usb/mixer_us16x08.c index f0e8e1539450..c6c834ac83ac 100644 --- a/sound/usb/mixer_us16x08.c +++ b/sound/usb/mixer_us16x08.c @@ -607,7 +607,7 @@ static int snd_us16x08_eq_put(struct snd_kcontrol *kcontrol, static int snd_us16x08_meter_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { - uinfo->count = 1; + uinfo->count = 34; uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; uinfo->value.integer.max = 0x7FFF; uinfo->value.integer.min = 0; From dfe5d9a8307e319e8f767098e598f7ac6de944f5 Mon Sep 17 00:00:00 2001 From: Sanjay Govind Date: Mon, 30 Nov 2020 23:41:48 -0800 Subject: [PATCH 36/40] Input: xpad - support Ardwiino Controllers commit 2aab1561439032be2e98811dd0ddbeb5b2ae4c61 upstream. This commit adds support for Ardwiino Controllers Signed-off-by: Sanjay Govind Link: https://lore.kernel.org/r/20201201071922.131666-1-sanjay.govind9@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/joystick/xpad.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index c77cdb3b62b5..8c73377ac82c 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -241,6 +241,7 @@ static const struct xpad_device { { 0x1038, 0x1430, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, { 0x1038, 0x1431, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, { 0x11c9, 0x55f0, "Nacon GC-100XF", 0, XTYPE_XBOX360 }, + { 0x1209, 0x2882, "Ardwiino Controller", 0, XTYPE_XBOX360 }, { 0x12ab, 0x0004, "Honey Bee Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x12ab, 0x0301, "PDP AFTERGLOW AX.1", 0, XTYPE_XBOX360 }, { 0x12ab, 0x0303, "Mortal Kombat Klassic FightStick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, @@ -418,6 +419,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOXONE_VENDOR(0x0f0d), /* Hori Controllers */ XPAD_XBOX360_VENDOR(0x1038), /* SteelSeries Controllers */ XPAD_XBOX360_VENDOR(0x11c9), /* Nacon GC100XF */ + XPAD_XBOX360_VENDOR(0x1209), /* Ardwiino Controllers */ XPAD_XBOX360_VENDOR(0x12ab), /* X-Box 360 dance pads */ XPAD_XBOX360_VENDOR(0x1430), /* RedOctane X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x146b), /* BigBen Interactive Controllers */ From 6ad995b851cb91b1011f6b30555c150d88694a34 Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Mon, 30 Nov 2020 22:39:40 -0800 Subject: [PATCH 37/40] Input: i8042 - add ByteSpeed touchpad to noloop table commit a48491c65b513e5cdc3e7a886a4db915f848a5f5 upstream. It looks like the C15B laptop got another vendor: ByteSpeed LLC. Avoid AUX loopback on this touchpad as well, thus input subsystem will be able to recognize a Synaptics touchpad in the AUX port. BugLink: https://bugs.launchpad.net/bugs/1906128 Signed-off-by: Po-Hsu Lin Link: https://lore.kernel.org/r/20201201054723.5939-1-po-hsu.lin@canonical.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/serio/i8042-x86ia64io.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 98f0c7729b75..837911a15e44 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -219,6 +219,10 @@ static const struct dmi_system_id __initconst i8042_dmi_noloop_table[] = { DMI_MATCH(DMI_SYS_VENDOR, "PEGATRON CORPORATION"), DMI_MATCH(DMI_PRODUCT_NAME, "C15B"), }, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ByteSpeed LLC"), + DMI_MATCH(DMI_PRODUCT_NAME, "ByteSpeed Laptop C15B"), + }, }, { } }; From 07434172c58b4b8aa8c3af5fad1ae32df4c6494f Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 18 Nov 2020 15:05:20 +0300 Subject: [PATCH 38/40] tracing: Remove WARN_ON in start_thread() commit 310e3a4b5a4fc718a72201c1e4cf5c64ac6f5442 upstream. This patch reverts commit 978defee11a5 ("tracing: Do a WARN_ON() if start_thread() in hwlat is called when thread exists") .start hook can be legally called several times if according tracer is stopped screen window 1 [root@localhost ~]# echo 1 > /sys/kernel/tracing/events/kmem/kfree/enable [root@localhost ~]# echo 1 > /sys/kernel/tracing/options/pause-on-trace [root@localhost ~]# less -F /sys/kernel/tracing/trace screen window 2 [root@localhost ~]# cat /sys/kernel/debug/tracing/tracing_on 0 [root@localhost ~]# echo hwlat > /sys/kernel/debug/tracing/current_tracer [root@localhost ~]# echo 1 > /sys/kernel/debug/tracing/tracing_on [root@localhost ~]# cat /sys/kernel/debug/tracing/tracing_on 0 [root@localhost ~]# echo 2 > /sys/kernel/debug/tracing/tracing_on triggers warning in dmesg: WARNING: CPU: 3 PID: 1403 at kernel/trace/trace_hwlat.c:371 hwlat_tracer_start+0xc9/0xd0 Link: https://lkml.kernel.org/r/bd4d3e70-400d-9c82-7b73-a2d695e86b58@virtuozzo.com Cc: Ingo Molnar Cc: stable@vger.kernel.org Fixes: 978defee11a5 ("tracing: Do a WARN_ON() if start_thread() in hwlat is called when thread exists") Signed-off-by: Vasily Averin Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_hwlat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index 35512ed26d9f..164e5c618cce 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -355,7 +355,7 @@ static int start_kthread(struct trace_array *tr) struct task_struct *kthread; int next_cpu; - if (WARN_ON(hwlat_kthread)) + if (hwlat_kthread) return 0; /* Just pick the first CPU on first iteration */ From 4460a7c979ee969c318ac9f5b764b0c2680f49e9 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Tue, 24 Nov 2020 18:56:16 -0600 Subject: [PATCH 39/40] RDMA/i40iw: Address an mmap handler exploit in i40iw commit 2ed381439e89fa6d1a0839ef45ccd45d99d8e915 upstream. i40iw_mmap manipulates the vma->vm_pgoff to differentiate a push page mmap vs a doorbell mmap, and uses it to compute the pfn in remap_pfn_range without any validation. This is vulnerable to an mmap exploit as described in: https://lore.kernel.org/r/20201119093523.7588-1-zhudi21@huawei.com The push feature is disabled in the driver currently and therefore no push mmaps are issued from user-space. The feature does not work as expected in the x722 product. Remove the push module parameter and all VMA attribute manipulations for this feature in i40iw_mmap. Update i40iw_mmap to only allow DB user mmapings at offset = 0. Check vm_pgoff for zero and if the mmaps are bound to a single page. Cc: Fixes: d37498417947 ("i40iw: add files for iwarp interface") Link: https://lore.kernel.org/r/20201125005616.1800-2-shiraz.saleem@intel.com Reported-by: Di Zhu Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/i40iw/i40iw_main.c | 5 ---- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 36 +++++------------------ 2 files changed, 7 insertions(+), 34 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 238614370927..f1b0290da92d 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -54,10 +54,6 @@ #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." __stringify(DRV_VERSION_BUILD) -static int push_mode; -module_param(push_mode, int, 0644); -MODULE_PARM_DESC(push_mode, "Low latency mode: 0=disabled (default), 1=enabled)"); - static int debug; module_param(debug, int, 0644); MODULE_PARM_DESC(debug, "debug flags: 0=disabled (default), 0x7fffffff=all"); @@ -1588,7 +1584,6 @@ static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl, if (status) goto exit; iwdev->obj_next = iwdev->obj_mem; - iwdev->push_mode = push_mode; init_waitqueue_head(&iwdev->vchnl_waitq); init_waitqueue_head(&dev->vf_reqs); diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 22bf4f09c064..7e9c1a40f040 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -168,38 +168,16 @@ static void i40iw_dealloc_ucontext(struct ib_ucontext *context) */ static int i40iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { - struct i40iw_ucontext *ucontext; - u64 db_addr_offset; - u64 push_offset; + struct i40iw_ucontext *ucontext = to_ucontext(context); + u64 dbaddr; - ucontext = to_ucontext(context); - if (ucontext->iwdev->sc_dev.is_pf) { - db_addr_offset = I40IW_DB_ADDR_OFFSET; - push_offset = I40IW_PUSH_OFFSET; - if (vma->vm_pgoff) - vma->vm_pgoff += I40IW_PF_FIRST_PUSH_PAGE_INDEX - 1; - } else { - db_addr_offset = I40IW_VF_DB_ADDR_OFFSET; - push_offset = I40IW_VF_PUSH_OFFSET; - if (vma->vm_pgoff) - vma->vm_pgoff += I40IW_VF_FIRST_PUSH_PAGE_INDEX - 1; - } + if (vma->vm_pgoff || vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; - vma->vm_pgoff += db_addr_offset >> PAGE_SHIFT; + dbaddr = I40IW_DB_ADDR_OFFSET + pci_resource_start(ucontext->iwdev->ldev->pcidev, 0); - if (vma->vm_pgoff == (db_addr_offset >> PAGE_SHIFT)) { - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vma->vm_private_data = ucontext; - } else { - if ((vma->vm_pgoff - (push_offset >> PAGE_SHIFT)) % 2) - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - else - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); - } - - if (io_remap_pfn_range(vma, vma->vm_start, - vma->vm_pgoff + (pci_resource_start(ucontext->iwdev->ldev->pcidev, 0) >> PAGE_SHIFT), - PAGE_SIZE, vma->vm_page_prot)) + if (io_remap_pfn_range(vma, vma->vm_start, dbaddr >> PAGE_SHIFT, PAGE_SIZE, + pgprot_noncached(vma->vm_page_prot))) return -EAGAIN; return 0; From ec274ecd62f9e0404c935ff073346d243d5082e6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 8 Dec 2020 10:40:28 +0100 Subject: [PATCH 40/40] Linux 5.4.82 Tested-by: Linux Kernel Functional Testing Tested-by: Jon Hunter Tested-by: Guenter Roeck Link: https://lore.kernel.org/r/20201206111554.677764505@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5bbb7607fa55..e520dee34490 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 4 -SUBLEVEL = 81 +SUBLEVEL = 82 EXTRAVERSION = NAME = Kleptomaniac Octopus