From b58aa24edb621469040b0a8a47f25d91f5ee2c58 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 6 Dec 2017 09:27:30 -0800 Subject: [PATCH 001/705] dm bufio: fix shrinker scans when (nr_to_scan < retain_target) commit fbc7c07ec23c040179384a1f16b62b6030eb6bdd upstream. When system is under memory pressure it is observed that dm bufio shrinker often reclaims only one buffer per scan. This change fixes the following two issues in dm bufio shrinker that cause this behavior: 1. ((nr_to_scan - freed) <= retain_target) condition is used to terminate slab scan process. This assumes that nr_to_scan is equal to the LRU size, which might not be correct because do_shrink_slab() in vmscan.c calculates nr_to_scan using multiple inputs. As a result when nr_to_scan is less than retain_target (64) the scan will terminate after the first iteration, effectively reclaiming one buffer per scan and making scans very inefficient. This hurts vmscan performance especially because mutex is acquired/released every time dm_bufio_shrink_scan() is called. New implementation uses ((LRU size - freed) <= retain_target) condition for scan termination. LRU size can be safely determined inside __scan() because this function is called after dm_bufio_lock(). 2. do_shrink_slab() uses value returned by dm_bufio_shrink_count() to determine number of freeable objects in the slab. However dm_bufio always retains retain_target buffers in its LRU and will terminate a scan when this mark is reached. Therefore returning the entire LRU size from dm_bufio_shrink_count() is misleading because that does not represent the number of freeable objects that slab will reclaim during a scan. Returning (LRU size - retain_target) better represents the number of freeable objects in the slab. This way do_shrink_slab() returns 0 when (LRU size < retain_target) and vmscan will not try to scan this shrinker avoiding scans that will not reclaim any memory. Test: tested using Android device running /system/extras/alloc-stress that generates memory pressure and causes intensive shrinker scans Signed-off-by: Suren Baghdasaryan Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-bufio.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 7643f72adb1c..3ec647e8b9c6 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1554,7 +1554,8 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, int l; struct dm_buffer *b, *tmp; unsigned long freed = 0; - unsigned long count = nr_to_scan; + unsigned long count = c->n_buffers[LIST_CLEAN] + + c->n_buffers[LIST_DIRTY]; unsigned long retain_target = get_retain_buffers(c); for (l = 0; l < LIST_SIZE; l++) { @@ -1591,6 +1592,7 @@ dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { struct dm_bufio_client *c; unsigned long count; + unsigned long retain_target; c = container_of(shrink, struct dm_bufio_client, shrinker); if (sc->gfp_mask & __GFP_FS) @@ -1599,8 +1601,9 @@ dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) return 0; count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY]; + retain_target = get_retain_buffers(c); dm_bufio_unlock(c); - return count; + return (count < retain_target) ? 0 : (count - retain_target); } /* From 234c8e60437d3733cdcae996a62064757f3f35ac Mon Sep 17 00:00:00 2001 From: David Spinadel Date: Mon, 21 Nov 2016 16:58:40 +0200 Subject: [PATCH 002/705] mac80211: Add RX flag to indicate ICV stripped commit cef0acd4d7d4811d2d19cd0195031bf0dfe41249 upstream. Add a flag that indicates that the WEP ICV was stripped from an RX packet, allowing the device to not transfer that if it's already checked. Signed-off-by: David Spinadel Signed-off-by: Johannes Berg Cc: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- include/net/mac80211.h | 5 ++++- net/mac80211/wep.c | 3 ++- net/mac80211/wpa.c | 3 ++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 2c7d876e2a1a..8fd61bc50383 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1007,7 +1007,7 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * @RX_FLAG_DECRYPTED: This frame was decrypted in hardware. * @RX_FLAG_MMIC_STRIPPED: the Michael MIC is stripped off this frame, * verification has been done by the hardware. - * @RX_FLAG_IV_STRIPPED: The IV/ICV are stripped from this frame. + * @RX_FLAG_IV_STRIPPED: The IV and ICV are stripped from this frame. * If this flag is set, the stack cannot do any replay detection * hence the driver or hardware will have to do that. * @RX_FLAG_PN_VALIDATED: Currently only valid for CCMP/GCMP frames, this @@ -1078,6 +1078,8 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * @RX_FLAG_ALLOW_SAME_PN: Allow the same PN as same packet before. * This is used for AMSDU subframes which can have the same PN as * the first subframe. + * @RX_FLAG_ICV_STRIPPED: The ICV is stripped from this frame. CRC checking must + * be done in the hardware. */ enum mac80211_rx_flags { RX_FLAG_MMIC_ERROR = BIT(0), @@ -1113,6 +1115,7 @@ enum mac80211_rx_flags { RX_FLAG_RADIOTAP_VENDOR_DATA = BIT(31), RX_FLAG_MIC_STRIPPED = BIT_ULL(32), RX_FLAG_ALLOW_SAME_PN = BIT_ULL(33), + RX_FLAG_ICV_STRIPPED = BIT_ULL(34), }; #define RX_FLAG_STBC_SHIFT 26 diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index efa3f48f1ec5..73e8f347802e 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -293,7 +293,8 @@ ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx) return RX_DROP_UNUSABLE; ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key); /* remove ICV */ - if (pskb_trim(rx->skb, rx->skb->len - IEEE80211_WEP_ICV_LEN)) + if (!(status->flag & RX_FLAG_ICV_STRIPPED) && + pskb_trim(rx->skb, rx->skb->len - IEEE80211_WEP_ICV_LEN)) return RX_DROP_UNUSABLE; } diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 5c71d60f3a64..caa5986cb2e4 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -295,7 +295,8 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) return RX_DROP_UNUSABLE; /* Trim ICV */ - skb_trim(skb, skb->len - IEEE80211_TKIP_ICV_LEN); + if (!(status->flag & RX_FLAG_ICV_STRIPPED)) + skb_trim(skb, skb->len - IEEE80211_TKIP_ICV_LEN); /* Remove IV */ memmove(skb->data + IEEE80211_TKIP_IV_LEN, skb->data, hdrlen); From c5ab9ee144d8a3a3ec8de9b2c029e84ea221478f Mon Sep 17 00:00:00 2001 From: Vasanthakumar Thiagarajan Date: Fri, 27 Oct 2017 18:35:31 +0300 Subject: [PATCH 003/705] ath10k: rebuild crypto header in rx data frames commit 7eccb738fce57cbe53ed903ccf43f9ab257b15b3 upstream. Rx data frames notified through HTT_T2H_MSG_TYPE_RX_IND and HTT_T2H_MSG_TYPE_RX_FRAG_IND expect PN/TSC check to be done on host (mac80211) rather than firmware. Rebuild cipher header in every received data frames (that are notified through those HTT interfaces) from the rx_hdr_status tlv available in the rx descriptor of the first msdu. Skip setting RX_FLAG_IV_STRIPPED flag for the packets which requires mac80211 PN/TSC check support and set appropriate RX_FLAG for stripped crypto tail. Hw QCA988X, QCA9887, QCA99X0, QCA9984, QCA9888 and QCA4019 currently need the rebuilding of cipher header to perform PN/TSC check for replay attack. Please note that removing crypto tail for CCMP-256, GCMP and GCMP-256 ciphers in raw mode needs to be fixed. Since Rx with these ciphers in raw mode does not work in the current form even without this patch and removing crypto tail for these chipers needs clean up, raw mode related issues in CCMP-256, GCMP and GCMP-256 can be addressed in follow up patches. Tested-by: Manikanta Pubbisetty Signed-off-by: Vasanthakumar Thiagarajan Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/htt_rx.c | 105 ++++++++++++++++++---- drivers/net/wireless/ath/ath10k/rx_desc.h | 3 + 2 files changed, 92 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index 0b4c1562420f..ba1fe61e6ea6 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -548,6 +548,11 @@ static int ath10k_htt_rx_crypto_param_len(struct ath10k *ar, return IEEE80211_TKIP_IV_LEN; case HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2: return IEEE80211_CCMP_HDR_LEN; + case HTT_RX_MPDU_ENCRYPT_AES_CCM256_WPA2: + return IEEE80211_CCMP_256_HDR_LEN; + case HTT_RX_MPDU_ENCRYPT_AES_GCMP_WPA2: + case HTT_RX_MPDU_ENCRYPT_AES_GCMP256_WPA2: + return IEEE80211_GCMP_HDR_LEN; case HTT_RX_MPDU_ENCRYPT_WEP128: case HTT_RX_MPDU_ENCRYPT_WAPI: break; @@ -573,6 +578,11 @@ static int ath10k_htt_rx_crypto_tail_len(struct ath10k *ar, return IEEE80211_TKIP_ICV_LEN; case HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2: return IEEE80211_CCMP_MIC_LEN; + case HTT_RX_MPDU_ENCRYPT_AES_CCM256_WPA2: + return IEEE80211_CCMP_256_MIC_LEN; + case HTT_RX_MPDU_ENCRYPT_AES_GCMP_WPA2: + case HTT_RX_MPDU_ENCRYPT_AES_GCMP256_WPA2: + return IEEE80211_GCMP_MIC_LEN; case HTT_RX_MPDU_ENCRYPT_WEP128: case HTT_RX_MPDU_ENCRYPT_WAPI: break; @@ -1024,9 +1034,21 @@ static void ath10k_htt_rx_h_undecap_raw(struct ath10k *ar, hdr = (void *)msdu->data; /* Tail */ - if (status->flag & RX_FLAG_IV_STRIPPED) + if (status->flag & RX_FLAG_IV_STRIPPED) { skb_trim(msdu, msdu->len - ath10k_htt_rx_crypto_tail_len(ar, enctype)); + } else { + /* MIC */ + if ((status->flag & RX_FLAG_MIC_STRIPPED) && + enctype == HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2) + skb_trim(msdu, msdu->len - 8); + + /* ICV */ + if (status->flag & RX_FLAG_ICV_STRIPPED && + enctype != HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2) + skb_trim(msdu, msdu->len - + ath10k_htt_rx_crypto_tail_len(ar, enctype)); + } /* MMIC */ if ((status->flag & RX_FLAG_MMIC_STRIPPED) && @@ -1048,7 +1070,8 @@ static void ath10k_htt_rx_h_undecap_raw(struct ath10k *ar, static void ath10k_htt_rx_h_undecap_nwifi(struct ath10k *ar, struct sk_buff *msdu, struct ieee80211_rx_status *status, - const u8 first_hdr[64]) + const u8 first_hdr[64], + enum htt_rx_mpdu_encrypt_type enctype) { struct ieee80211_hdr *hdr; struct htt_rx_desc *rxd; @@ -1056,6 +1079,7 @@ static void ath10k_htt_rx_h_undecap_nwifi(struct ath10k *ar, u8 da[ETH_ALEN]; u8 sa[ETH_ALEN]; int l3_pad_bytes; + int bytes_aligned = ar->hw_params.decap_align_bytes; /* Delivered decapped frame: * [nwifi 802.11 header] <-- replaced with 802.11 hdr @@ -1084,6 +1108,14 @@ static void ath10k_htt_rx_h_undecap_nwifi(struct ath10k *ar, /* push original 802.11 header */ hdr = (struct ieee80211_hdr *)first_hdr; hdr_len = ieee80211_hdrlen(hdr->frame_control); + + if (!(status->flag & RX_FLAG_IV_STRIPPED)) { + memcpy(skb_push(msdu, + ath10k_htt_rx_crypto_param_len(ar, enctype)), + (void *)hdr + round_up(hdr_len, bytes_aligned), + ath10k_htt_rx_crypto_param_len(ar, enctype)); + } + memcpy(skb_push(msdu, hdr_len), hdr, hdr_len); /* original 802.11 header has a different DA and in @@ -1144,6 +1176,7 @@ static void ath10k_htt_rx_h_undecap_eth(struct ath10k *ar, u8 sa[ETH_ALEN]; int l3_pad_bytes; struct htt_rx_desc *rxd; + int bytes_aligned = ar->hw_params.decap_align_bytes; /* Delivered decapped frame: * [eth header] <-- replaced with 802.11 hdr & rfc1042/llc @@ -1172,6 +1205,14 @@ static void ath10k_htt_rx_h_undecap_eth(struct ath10k *ar, /* push original 802.11 header */ hdr = (struct ieee80211_hdr *)first_hdr; hdr_len = ieee80211_hdrlen(hdr->frame_control); + + if (!(status->flag & RX_FLAG_IV_STRIPPED)) { + memcpy(skb_push(msdu, + ath10k_htt_rx_crypto_param_len(ar, enctype)), + (void *)hdr + round_up(hdr_len, bytes_aligned), + ath10k_htt_rx_crypto_param_len(ar, enctype)); + } + memcpy(skb_push(msdu, hdr_len), hdr, hdr_len); /* original 802.11 header has a different DA and in @@ -1185,12 +1226,14 @@ static void ath10k_htt_rx_h_undecap_eth(struct ath10k *ar, static void ath10k_htt_rx_h_undecap_snap(struct ath10k *ar, struct sk_buff *msdu, struct ieee80211_rx_status *status, - const u8 first_hdr[64]) + const u8 first_hdr[64], + enum htt_rx_mpdu_encrypt_type enctype) { struct ieee80211_hdr *hdr; size_t hdr_len; int l3_pad_bytes; struct htt_rx_desc *rxd; + int bytes_aligned = ar->hw_params.decap_align_bytes; /* Delivered decapped frame: * [amsdu header] <-- replaced with 802.11 hdr @@ -1206,6 +1249,14 @@ static void ath10k_htt_rx_h_undecap_snap(struct ath10k *ar, hdr = (struct ieee80211_hdr *)first_hdr; hdr_len = ieee80211_hdrlen(hdr->frame_control); + + if (!(status->flag & RX_FLAG_IV_STRIPPED)) { + memcpy(skb_push(msdu, + ath10k_htt_rx_crypto_param_len(ar, enctype)), + (void *)hdr + round_up(hdr_len, bytes_aligned), + ath10k_htt_rx_crypto_param_len(ar, enctype)); + } + memcpy(skb_push(msdu, hdr_len), hdr, hdr_len); } @@ -1240,13 +1291,15 @@ static void ath10k_htt_rx_h_undecap(struct ath10k *ar, is_decrypted); break; case RX_MSDU_DECAP_NATIVE_WIFI: - ath10k_htt_rx_h_undecap_nwifi(ar, msdu, status, first_hdr); + ath10k_htt_rx_h_undecap_nwifi(ar, msdu, status, first_hdr, + enctype); break; case RX_MSDU_DECAP_ETHERNET2_DIX: ath10k_htt_rx_h_undecap_eth(ar, msdu, status, first_hdr, enctype); break; case RX_MSDU_DECAP_8023_SNAP_LLC: - ath10k_htt_rx_h_undecap_snap(ar, msdu, status, first_hdr); + ath10k_htt_rx_h_undecap_snap(ar, msdu, status, first_hdr, + enctype); break; } } @@ -1289,7 +1342,8 @@ static void ath10k_htt_rx_h_csum_offload(struct sk_buff *msdu) static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, struct sk_buff_head *amsdu, - struct ieee80211_rx_status *status) + struct ieee80211_rx_status *status, + bool fill_crypt_header) { struct sk_buff *first; struct sk_buff *last; @@ -1299,7 +1353,6 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, enum htt_rx_mpdu_encrypt_type enctype; u8 first_hdr[64]; u8 *qos; - size_t hdr_len; bool has_fcs_err; bool has_crypto_err; bool has_tkip_err; @@ -1324,15 +1377,17 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, * decapped header. It'll be used for undecapping of each MSDU. */ hdr = (void *)rxd->rx_hdr_status; - hdr_len = ieee80211_hdrlen(hdr->frame_control); - memcpy(first_hdr, hdr, hdr_len); + memcpy(first_hdr, hdr, RX_HTT_HDR_STATUS_LEN); /* Each A-MSDU subframe will use the original header as the base and be * reported as a separate MSDU so strip the A-MSDU bit from QoS Ctl. */ hdr = (void *)first_hdr; - qos = ieee80211_get_qos_ctl(hdr); - qos[0] &= ~IEEE80211_QOS_CTL_A_MSDU_PRESENT; + + if (ieee80211_is_data_qos(hdr->frame_control)) { + qos = ieee80211_get_qos_ctl(hdr); + qos[0] &= ~IEEE80211_QOS_CTL_A_MSDU_PRESENT; + } /* Some attention flags are valid only in the last MSDU. */ last = skb_peek_tail(amsdu); @@ -1379,9 +1434,14 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, status->flag |= RX_FLAG_DECRYPTED; if (likely(!is_mgmt)) - status->flag |= RX_FLAG_IV_STRIPPED | - RX_FLAG_MMIC_STRIPPED; -} + status->flag |= RX_FLAG_MMIC_STRIPPED; + + if (fill_crypt_header) + status->flag |= RX_FLAG_MIC_STRIPPED | + RX_FLAG_ICV_STRIPPED; + else + status->flag |= RX_FLAG_IV_STRIPPED; + } skb_queue_walk(amsdu, msdu) { ath10k_htt_rx_h_csum_offload(msdu); @@ -1397,6 +1457,9 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, if (is_mgmt) continue; + if (fill_crypt_header) + continue; + hdr = (void *)msdu->data; hdr->frame_control &= ~__cpu_to_le16(IEEE80211_FCTL_PROTECTED); } @@ -1407,6 +1470,9 @@ static void ath10k_htt_rx_h_deliver(struct ath10k *ar, struct ieee80211_rx_status *status) { struct sk_buff *msdu; + struct sk_buff *first_subframe; + + first_subframe = skb_peek(amsdu); while ((msdu = __skb_dequeue(amsdu))) { /* Setup per-MSDU flags */ @@ -1415,6 +1481,13 @@ static void ath10k_htt_rx_h_deliver(struct ath10k *ar, else status->flag |= RX_FLAG_AMSDU_MORE; + if (msdu == first_subframe) { + first_subframe = NULL; + status->flag &= ~RX_FLAG_ALLOW_SAME_PN; + } else { + status->flag |= RX_FLAG_ALLOW_SAME_PN; + } + ath10k_process_rx(ar, status, msdu); } } @@ -1557,7 +1630,7 @@ static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt) ath10k_htt_rx_h_ppdu(ar, &amsdu, rx_status, 0xffff); ath10k_htt_rx_h_unchain(ar, &amsdu, ret > 0); ath10k_htt_rx_h_filter(ar, &amsdu, rx_status); - ath10k_htt_rx_h_mpdu(ar, &amsdu, rx_status); + ath10k_htt_rx_h_mpdu(ar, &amsdu, rx_status, true); ath10k_htt_rx_h_deliver(ar, &amsdu, rx_status); return num_msdus; @@ -1892,7 +1965,7 @@ static int ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) num_msdus += skb_queue_len(&amsdu); ath10k_htt_rx_h_ppdu(ar, &amsdu, status, vdev_id); ath10k_htt_rx_h_filter(ar, &amsdu, status); - ath10k_htt_rx_h_mpdu(ar, &amsdu, status); + ath10k_htt_rx_h_mpdu(ar, &amsdu, status, false); ath10k_htt_rx_h_deliver(ar, &amsdu, status); break; case -EAGAIN: diff --git a/drivers/net/wireless/ath/ath10k/rx_desc.h b/drivers/net/wireless/ath/ath10k/rx_desc.h index 034e7a54c5b2..e4878d0044bf 100644 --- a/drivers/net/wireless/ath/ath10k/rx_desc.h +++ b/drivers/net/wireless/ath/ath10k/rx_desc.h @@ -239,6 +239,9 @@ enum htt_rx_mpdu_encrypt_type { HTT_RX_MPDU_ENCRYPT_WAPI = 5, HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2 = 6, HTT_RX_MPDU_ENCRYPT_NONE = 7, + HTT_RX_MPDU_ENCRYPT_AES_CCM256_WPA2 = 8, + HTT_RX_MPDU_ENCRYPT_AES_GCMP_WPA2 = 9, + HTT_RX_MPDU_ENCRYPT_AES_GCMP256_WPA2 = 10, }; #define RX_MPDU_START_INFO0_PEER_IDX_MASK 0x000007ff From c781e3be97a1cbeef8c853101e8f266db556b0a3 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 14 Dec 2017 17:40:50 -0800 Subject: [PATCH 004/705] KVM: Fix stack-out-of-bounds read in write_mmio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e39d200fa5bf5b94a0948db0dae44c1b73b84a56 upstream. Reported by syzkaller: BUG: KASAN: stack-out-of-bounds in write_mmio+0x11e/0x270 [kvm] Read of size 8 at addr ffff8803259df7f8 by task syz-executor/32298 CPU: 6 PID: 32298 Comm: syz-executor Tainted: G OE 4.15.0-rc2+ #18 Hardware name: LENOVO ThinkCentre M8500t-N000/SHARKBAY, BIOS FBKTC1AUS 02/16/2016 Call Trace: dump_stack+0xab/0xe1 print_address_description+0x6b/0x290 kasan_report+0x28a/0x370 write_mmio+0x11e/0x270 [kvm] emulator_read_write_onepage+0x311/0x600 [kvm] emulator_read_write+0xef/0x240 [kvm] emulator_fix_hypercall+0x105/0x150 [kvm] em_hypercall+0x2b/0x80 [kvm] x86_emulate_insn+0x2b1/0x1640 [kvm] x86_emulate_instruction+0x39a/0xb90 [kvm] handle_exception+0x1b4/0x4d0 [kvm_intel] vcpu_enter_guest+0x15a0/0x2640 [kvm] kvm_arch_vcpu_ioctl_run+0x549/0x7d0 [kvm] kvm_vcpu_ioctl+0x479/0x880 [kvm] do_vfs_ioctl+0x142/0x9a0 SyS_ioctl+0x74/0x80 entry_SYSCALL_64_fastpath+0x23/0x9a The path of patched vmmcall will patch 3 bytes opcode 0F 01 C1(vmcall) to the guest memory, however, write_mmio tracepoint always prints 8 bytes through *(u64 *)val since kvm splits the mmio access into 8 bytes. This leaks 5 bytes from the kernel stack (CVE-2017-17741). This patch fixes it by just accessing the bytes which we operate on. Before patch: syz-executor-5567 [007] .... 51370.561696: kvm_mmio: mmio write len 3 gpa 0x10 val 0x1ffff10077c1010f After patch: syz-executor-13416 [002] .... 51302.299573: kvm_mmio: mmio write len 3 gpa 0x10 val 0xc1010f Reported-by: Dmitry Vyukov Reviewed-by: Darren Kenny Reviewed-by: Marc Zyngier Tested-by: Marc Zyngier Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Marc Zyngier Cc: Christoffer Dall Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini Cc: Mathieu Desnoyers Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/mmio.c | 6 +++--- arch/x86/kvm/x86.c | 8 ++++---- include/trace/events/kvm.h | 7 +++++-- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index b6e715fd3c90..dac7ceb1a677 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -112,7 +112,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) } trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, - data); + &data); data = vcpu_data_host_to_guest(vcpu, data, len); vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data); } @@ -182,14 +182,14 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt), len); - trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data); + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, &data); kvm_mmio_write_buf(data_buf, len, data); ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len, data_buf); } else { trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len, - fault_ipa, 0); + fault_ipa, NULL); ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len, data_buf); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 73304b1a03cc..d3f80cccb9aa 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4264,7 +4264,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) addr, n, v)) && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v)) break; - trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v); + trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v); handled += n; addr += n; len -= n; @@ -4517,7 +4517,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes) { if (vcpu->mmio_read_completed) { trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, - vcpu->mmio_fragments[0].gpa, *(u64 *)val); + vcpu->mmio_fragments[0].gpa, val); vcpu->mmio_read_completed = 0; return 1; } @@ -4539,14 +4539,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa, static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val) { - trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val); + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val); return vcpu_mmio_write(vcpu, gpa, bytes, val); } static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, void *val, int bytes) { - trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); + trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL); return X86EMUL_IO_NEEDED; } diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 8ade3eb6c640..90fce4d6956a 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -208,7 +208,7 @@ TRACE_EVENT(kvm_ack_irq, { KVM_TRACE_MMIO_WRITE, "write" } TRACE_EVENT(kvm_mmio, - TP_PROTO(int type, int len, u64 gpa, u64 val), + TP_PROTO(int type, int len, u64 gpa, void *val), TP_ARGS(type, len, gpa, val), TP_STRUCT__entry( @@ -222,7 +222,10 @@ TRACE_EVENT(kvm_mmio, __entry->type = type; __entry->len = len; __entry->gpa = gpa; - __entry->val = val; + __entry->val = 0; + if (val) + memcpy(&__entry->val, val, + min_t(u32, sizeof(__entry->val), len)); ), TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx", From 02f201f78fb9da5d140abc17cf9b3a196b1b42dd Mon Sep 17 00:00:00 2001 From: Wolfgang Grandegger Date: Wed, 13 Dec 2017 19:52:23 +0100 Subject: [PATCH 005/705] can: gs_usb: fix return value of the "set_bittiming" callback commit d5b42e6607661b198d8b26a0c30969605b1bf5c7 upstream. The "set_bittiming" callback treats a positive return value as error! For that reason "can_changelink()" will quit silently after setting the bittiming values without processing ctrlmode, restart-ms, etc. Signed-off-by: Wolfgang Grandegger Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/gs_usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index eea9aea14b00..5d5012337d9e 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -449,7 +449,7 @@ static int gs_usb_set_bittiming(struct net_device *netdev) dev_err(netdev->dev.parent, "Couldn't set bittimings (err=%d)", rc); - return rc; + return (rc > 0) ? 0 : rc; } static void gs_usb_xmit_callback(struct urb *urb) From 30191718645d24c5b213f1ab3ae531a585a13f21 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 3 Jan 2018 13:39:15 -0800 Subject: [PATCH 006/705] IB/srpt: Disable RDMA access by the initiator commit bec40c26041de61162f7be9d2ce548c756ce0f65 upstream. With the SRP protocol all RDMA operations are initiated by the target. Since no RDMA operations are initiated by the initiator, do not grant the initiator permission to submit RDMA reads or writes to the target. Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/srpt/ib_srpt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index b9748970df4a..29ab814693fc 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -992,8 +992,7 @@ static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp) return -ENOMEM; attr->qp_state = IB_QPS_INIT; - attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_WRITE; + attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE; attr->port_num = ch->sport->port; attr->pkey_index = 0; From 14e1c579acba423e363d1494c3183f943b8a1e3d Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 27 Nov 2017 09:33:03 +0000 Subject: [PATCH 007/705] MIPS: Validate PR_SET_FP_MODE prctl(2) requests against the ABI of the task commit b67336eee3fcb8ecedc6c13e2bf88aacfa3151e2 upstream. Fix an API loophole introduced with commit 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS"), where the caller of prctl(2) is incorrectly allowed to make a change to CP0.Status.FR or CP0.Config5.FRE register bits even if CONFIG_MIPS_O32_FP64_SUPPORT has not been enabled, despite that an executable requesting the mode requested via ELF file annotation would not be allowed to run in the first place, or for n64 and n64 ABI tasks which do not have non-default modes defined at all. Add suitable checks to `mips_set_process_fp_mode' and bail out if an invalid mode change has been requested for the ABI in effect, even if the FPU hardware or emulation would otherwise allow it. Always succeed however without taking any further action if the mode requested is the same as one already in effect, regardless of whether any mode change, should it be requested, would actually be allowed for the task concerned. Signed-off-by: Maciej W. Rozycki Fixes: 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS") Reviewed-by: Paul Burton Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/17800/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/process.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index c558bce989cd..6e716a5f1173 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -683,6 +683,18 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value) struct task_struct *t; int max_users; + /* If nothing to change, return right away, successfully. */ + if (value == mips_get_process_fp_mode(task)) + return 0; + + /* Only accept a mode change if 64-bit FP enabled for o32. */ + if (!IS_ENABLED(CONFIG_MIPS_O32_FP64_SUPPORT)) + return -EOPNOTSUPP; + + /* And only for o32 tasks. */ + if (IS_ENABLED(CONFIG_64BIT) && !test_thread_flag(TIF_32BIT_REGS)) + return -EOPNOTSUPP; + /* Check the value is valid */ if (value & ~known_bits) return -EOPNOTSUPP; From 8eb5655aacdd9fee39a690e29252a89326c85df6 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 11 Dec 2017 22:51:35 +0000 Subject: [PATCH 008/705] MIPS: Factor out NT_PRFPREG regset access helpers commit a03fe72572c12e98f4173f8a535f32468e48b6ec upstream. In preparation to fix a commit 72b22bbad1e7 ("MIPS: Don't assume 64-bit FP registers for FP regset") FCSR access regression factor out NT_PRFPREG regset access helpers for the non-MSA and the MSA variants respectively, to avoid having to deal with excessive indentation in the actual fix. No functional change, however use `target->thread.fpu.fpr[0]' rather than `target->thread.fpu.fpr[i]' for FGR holding type size determination as there's no `i' variable to refer to anymore, and for the factored out `i' variable declaration use `unsigned int' rather than `unsigned' as its type, following the common style. Signed-off-by: Maciej W. Rozycki Fixes: 72b22bbad1e7 ("MIPS: Don't assume 64-bit FP registers for FP regset") Cc: James Hogan Cc: Paul Burton Cc: Alex Smith Cc: Dave Martin Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/17925/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/ptrace.c | 108 +++++++++++++++++++++++++++++--------- 1 file changed, 83 insertions(+), 25 deletions(-) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 11890e6e4093..c6df58e01418 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -439,25 +439,36 @@ static int gpr64_set(struct task_struct *target, #endif /* CONFIG_64BIT */ -static int fpr_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) +/* + * Copy the floating-point context to the supplied NT_PRFPREG buffer, + * !CONFIG_CPU_HAS_MSA variant. FP context's general register slots + * correspond 1:1 to buffer slots. + */ +static int fpr_get_fpa(struct task_struct *target, + unsigned int *pos, unsigned int *count, + void **kbuf, void __user **ubuf) { - unsigned i; - int err; + return user_regset_copyout(pos, count, kbuf, ubuf, + &target->thread.fpu, + 0, sizeof(elf_fpregset_t)); +} + +/* + * Copy the floating-point context to the supplied NT_PRFPREG buffer, + * CONFIG_CPU_HAS_MSA variant. Only lower 64 bits of FP context's + * general register slots are copied to buffer slots. + */ +static int fpr_get_msa(struct task_struct *target, + unsigned int *pos, unsigned int *count, + void **kbuf, void __user **ubuf) +{ + unsigned int i; u64 fpr_val; - - /* XXX fcr31 */ - - if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t)) - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu, - 0, sizeof(elf_fpregset_t)); + int err; for (i = 0; i < NUM_FPU_REGS; i++) { fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0); - err = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + err = user_regset_copyout(pos, count, kbuf, ubuf, &fpr_val, i * sizeof(elf_fpreg_t), (i + 1) * sizeof(elf_fpreg_t)); if (err) @@ -467,27 +478,54 @@ static int fpr_get(struct task_struct *target, return 0; } -static int fpr_set(struct task_struct *target, +/* Copy the floating-point context to the supplied NT_PRFPREG buffer. */ +static int fpr_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) + void *kbuf, void __user *ubuf) { - unsigned i; int err; - u64 fpr_val; /* XXX fcr31 */ - init_fp_ctx(target); + if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) + err = fpr_get_fpa(target, &pos, &count, &kbuf, &ubuf); + else + err = fpr_get_msa(target, &pos, &count, &kbuf, &ubuf); - if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t)) - return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu, - 0, sizeof(elf_fpregset_t)); + return err; +} + +/* + * Copy the supplied NT_PRFPREG buffer to the floating-point context, + * !CONFIG_CPU_HAS_MSA variant. Buffer slots correspond 1:1 to FP + * context's general register slots. + */ +static int fpr_set_fpa(struct task_struct *target, + unsigned int *pos, unsigned int *count, + const void **kbuf, const void __user **ubuf) +{ + return user_regset_copyin(pos, count, kbuf, ubuf, + &target->thread.fpu, + 0, sizeof(elf_fpregset_t)); +} + +/* + * Copy the supplied NT_PRFPREG buffer to the floating-point context, + * CONFIG_CPU_HAS_MSA variant. Buffer slots are copied to lower 64 + * bits only of FP context's general register slots. + */ +static int fpr_set_msa(struct task_struct *target, + unsigned int *pos, unsigned int *count, + const void **kbuf, const void __user **ubuf) +{ + unsigned int i; + u64 fpr_val; + int err; BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t)); - for (i = 0; i < NUM_FPU_REGS && count >= sizeof(elf_fpreg_t); i++) { - err = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + for (i = 0; i < NUM_FPU_REGS && *count >= sizeof(elf_fpreg_t); i++) { + err = user_regset_copyin(pos, count, kbuf, ubuf, &fpr_val, i * sizeof(elf_fpreg_t), (i + 1) * sizeof(elf_fpreg_t)); if (err) @@ -498,6 +536,26 @@ static int fpr_set(struct task_struct *target, return 0; } +/* Copy the supplied NT_PRFPREG buffer to the floating-point context. */ +static int fpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int err; + + /* XXX fcr31 */ + + init_fp_ctx(target); + + if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) + err = fpr_set_fpa(target, &pos, &count, &kbuf, &ubuf); + else + err = fpr_set_msa(target, &pos, &count, &kbuf, &ubuf); + + return err; +} + enum mips_regset { REGSET_GPR, REGSET_FPR, From 5b593a81fddd566aaf5f7b5f4d96ccb84b0c6148 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 11 Dec 2017 22:52:15 +0000 Subject: [PATCH 009/705] MIPS: Guard against any partial write attempt with PTRACE_SETREGSET commit dc24d0edf33c3e15099688b6bbdf7bdc24bf6e91 upstream. Complement commit d614fd58a283 ("mips/ptrace: Preserve previous registers for short regset write") and ensure that no partial register write attempt is made with PTRACE_SETREGSET, as we do not preinitialize any temporaries used to hold incoming register data and consequently random data could be written. It is the responsibility of the caller, such as `ptrace_regset', to arrange for writes to span whole registers only, so here we only assert that it has indeed happened. Signed-off-by: Maciej W. Rozycki Fixes: 72b22bbad1e7 ("MIPS: Don't assume 64-bit FP registers for FP regset") Cc: James Hogan Cc: Paul Burton Cc: Alex Smith Cc: Dave Martin Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/17926/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/ptrace.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index c6df58e01418..16a7809c73be 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -536,7 +536,15 @@ static int fpr_set_msa(struct task_struct *target, return 0; } -/* Copy the supplied NT_PRFPREG buffer to the floating-point context. */ +/* + * Copy the supplied NT_PRFPREG buffer to the floating-point context. + * + * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0', + * which is supposed to have been guaranteed by the kernel before + * calling us, e.g. in `ptrace_regset'. We enforce that requirement, + * so that we can safely avoid preinitializing temporaries for + * partial register writes. + */ static int fpr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, @@ -544,6 +552,8 @@ static int fpr_set(struct task_struct *target, { int err; + BUG_ON(count % sizeof(elf_fpreg_t)); + /* XXX fcr31 */ init_fp_ctx(target); From f616180a8720b499ada54c830b7e028ed8a8e1c7 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 11 Dec 2017 22:53:14 +0000 Subject: [PATCH 010/705] MIPS: Consistently handle buffer counter with PTRACE_SETREGSET commit 80b3ffce0196ea50068885d085ff981e4b8396f4 upstream. Update commit d614fd58a283 ("mips/ptrace: Preserve previous registers for short regset write") bug and consistently consume all data supplied to `fpr_set_msa' with the ptrace(2) PTRACE_SETREGSET request, such that a zero data buffer counter is returned where insufficient data has been given to fill a whole number of FP general registers. In reality this is not going to happen, as the caller is supposed to only supply data covering a whole number of registers and it is verified in `ptrace_regset' and again asserted in `fpr_set', however structuring code such that the presence of trailing partial FP general register data causes `fpr_set_msa' to return with a non-zero data buffer counter makes it appear that this trailing data will be used if there are subsequent writes made to FP registers, which is going to be the case with the FCSR once the missing write to that register has been fixed. Fixes: d614fd58a283 ("mips/ptrace: Preserve previous registers for short regset write") Signed-off-by: Maciej W. Rozycki Cc: James Hogan Cc: Paul Burton Cc: Alex Smith Cc: Dave Martin Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/17927/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 16a7809c73be..73ce5e5a989c 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -524,7 +524,7 @@ static int fpr_set_msa(struct task_struct *target, int err; BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t)); - for (i = 0; i < NUM_FPU_REGS && *count >= sizeof(elf_fpreg_t); i++) { + for (i = 0; i < NUM_FPU_REGS && *count > 0; i++) { err = user_regset_copyin(pos, count, kbuf, ubuf, &fpr_val, i * sizeof(elf_fpreg_t), (i + 1) * sizeof(elf_fpreg_t)); From cfc5c63a38ca0ae5f11d48589c4f9e348e9428aa Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 11 Dec 2017 22:54:33 +0000 Subject: [PATCH 011/705] MIPS: Fix an FCSR access API regression with NT_PRFPREG and MSA commit be07a6a1188372b6d19a3307ec33211fc9c9439d upstream. Fix a commit 72b22bbad1e7 ("MIPS: Don't assume 64-bit FP registers for FP regset") public API regression, then activated by commit 1db1af84d6df ("MIPS: Basic MSA context switching support"), that caused the FCSR register not to be read or written for CONFIG_CPU_HAS_MSA kernel configurations (regardless of actual presence or absence of the MSA feature in a given processor) with ptrace(2) PTRACE_GETREGSET and PTRACE_SETREGSET requests nor recorded in core dumps. This is because with !CONFIG_CPU_HAS_MSA configurations the whole of `elf_fpregset_t' array is bulk-copied as it is, which includes the FCSR in one half of the last, 33rd slot, whereas with CONFIG_CPU_HAS_MSA configurations array elements are copied individually, and then only the leading 32 FGR slots while the remaining slot is ignored. Correct the code then such that only FGR slots are copied in the respective !MSA and MSA helpers an then the FCSR slot is handled separately in common code. Use `ptrace_setfcr31' to update the FCSR too, so that the read-only mask is respected. Retrieving a correct value of FCSR is important in debugging not only for the human to be able to get the right interpretation of the situation, but for correct operation of GDB as well. This is because the condition code bits in FSCR are used by GDB to determine the location to place a breakpoint at when single-stepping through an FPU branch instruction. If such a breakpoint is placed incorrectly (i.e. with the condition reversed), then it will be missed, likely causing the debuggee to run away from the control of GDB and consequently breaking the process of investigation. Fortunately GDB continues using the older PTRACE_GETFPREGS ptrace(2) request which is unaffected, so the regression only really hits with post-mortem debug sessions using a core dump file, in which case execution, and consequently single-stepping through branches is not possible. Of course core files created by buggy kernels out there will have the value of FCSR recorded clobbered, but such core files cannot be corrected and the person using them simply will have to be aware that the value of FCSR retrieved is not reliable. Which also means we can likely get away without defining a replacement API which would ensure a correct value of FSCR to be retrieved, or none at all. This is based on previous work by Alex Smith, extensively rewritten. Signed-off-by: Alex Smith Signed-off-by: James Hogan Signed-off-by: Maciej W. Rozycki Fixes: 72b22bbad1e7 ("MIPS: Don't assume 64-bit FP registers for FP regset") Cc: Paul Burton Cc: Dave Martin Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/17928/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/ptrace.c | 47 ++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 73ce5e5a989c..800342a16b12 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -442,7 +442,7 @@ static int gpr64_set(struct task_struct *target, /* * Copy the floating-point context to the supplied NT_PRFPREG buffer, * !CONFIG_CPU_HAS_MSA variant. FP context's general register slots - * correspond 1:1 to buffer slots. + * correspond 1:1 to buffer slots. Only general registers are copied. */ static int fpr_get_fpa(struct task_struct *target, unsigned int *pos, unsigned int *count, @@ -450,13 +450,14 @@ static int fpr_get_fpa(struct task_struct *target, { return user_regset_copyout(pos, count, kbuf, ubuf, &target->thread.fpu, - 0, sizeof(elf_fpregset_t)); + 0, NUM_FPU_REGS * sizeof(elf_fpreg_t)); } /* * Copy the floating-point context to the supplied NT_PRFPREG buffer, * CONFIG_CPU_HAS_MSA variant. Only lower 64 bits of FP context's - * general register slots are copied to buffer slots. + * general register slots are copied to buffer slots. Only general + * registers are copied. */ static int fpr_get_msa(struct task_struct *target, unsigned int *pos, unsigned int *count, @@ -478,20 +479,29 @@ static int fpr_get_msa(struct task_struct *target, return 0; } -/* Copy the floating-point context to the supplied NT_PRFPREG buffer. */ +/* + * Copy the floating-point context to the supplied NT_PRFPREG buffer. + * Choose the appropriate helper for general registers, and then copy + * the FCSR register separately. + */ static int fpr_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { + const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t); int err; - /* XXX fcr31 */ - if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) err = fpr_get_fpa(target, &pos, &count, &kbuf, &ubuf); else err = fpr_get_msa(target, &pos, &count, &kbuf, &ubuf); + if (err) + return err; + + err = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.fpu.fcr31, + fcr31_pos, fcr31_pos + sizeof(u32)); return err; } @@ -499,7 +509,7 @@ static int fpr_get(struct task_struct *target, /* * Copy the supplied NT_PRFPREG buffer to the floating-point context, * !CONFIG_CPU_HAS_MSA variant. Buffer slots correspond 1:1 to FP - * context's general register slots. + * context's general register slots. Only general registers are copied. */ static int fpr_set_fpa(struct task_struct *target, unsigned int *pos, unsigned int *count, @@ -507,13 +517,14 @@ static int fpr_set_fpa(struct task_struct *target, { return user_regset_copyin(pos, count, kbuf, ubuf, &target->thread.fpu, - 0, sizeof(elf_fpregset_t)); + 0, NUM_FPU_REGS * sizeof(elf_fpreg_t)); } /* * Copy the supplied NT_PRFPREG buffer to the floating-point context, * CONFIG_CPU_HAS_MSA variant. Buffer slots are copied to lower 64 - * bits only of FP context's general register slots. + * bits only of FP context's general register slots. Only general + * registers are copied. */ static int fpr_set_msa(struct task_struct *target, unsigned int *pos, unsigned int *count, @@ -538,6 +549,8 @@ static int fpr_set_msa(struct task_struct *target, /* * Copy the supplied NT_PRFPREG buffer to the floating-point context. + * Choose the appropriate helper for general registers, and then copy + * the FCSR register separately. * * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0', * which is supposed to have been guaranteed by the kernel before @@ -550,18 +563,30 @@ static int fpr_set(struct task_struct *target, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { + const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t); + u32 fcr31; int err; BUG_ON(count % sizeof(elf_fpreg_t)); - /* XXX fcr31 */ - init_fp_ctx(target); if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) err = fpr_set_fpa(target, &pos, &count, &kbuf, &ubuf); else err = fpr_set_msa(target, &pos, &count, &kbuf, &ubuf); + if (err) + return err; + + if (count > 0) { + err = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &fcr31, + fcr31_pos, fcr31_pos + sizeof(u32)); + if (err) + return err; + + ptrace_setfcr31(target, fcr31); + } return err; } From 1f4cff1c364ba4d1b197af88fca63b8736a9d601 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 11 Dec 2017 22:55:40 +0000 Subject: [PATCH 012/705] MIPS: Also verify sizeof `elf_fpreg_t' with PTRACE_SETREGSET commit 006501e039eec411842bb3150c41358867d320c2 upstream. Complement commit d614fd58a283 ("mips/ptrace: Preserve previous registers for short regset write") and like with the PTRACE_GETREGSET ptrace(2) request also apply a BUILD_BUG_ON check for the size of the `elf_fpreg_t' type in the PTRACE_SETREGSET request handler. Signed-off-by: Maciej W. Rozycki Fixes: d614fd58a283 ("mips/ptrace: Preserve previous registers for short regset write") Cc: James Hogan Cc: Paul Burton Cc: Alex Smith Cc: Dave Martin Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/17929/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/ptrace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 800342a16b12..99bc1ce2f40a 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -467,6 +467,7 @@ static int fpr_get_msa(struct task_struct *target, u64 fpr_val; int err; + BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t)); for (i = 0; i < NUM_FPU_REGS; i++) { fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0); err = user_regset_copyout(pos, count, kbuf, ubuf, From 78c00f597ba8211007420afdac5f99e56f9c0e2f Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 11 Dec 2017 22:56:54 +0000 Subject: [PATCH 013/705] MIPS: Disallow outsized PTRACE_SETREGSET NT_PRFPREG regset accesses commit c8c5a3a24d395b14447a9a89d61586a913840a3b upstream. Complement commit c23b3d1a5311 ("MIPS: ptrace: Change GP regset to use correct core dump register layout") and also reject outsized PTRACE_SETREGSET requests to the NT_PRFPREG regset, like with the NT_PRSTATUS regset. Signed-off-by: Maciej W. Rozycki Fixes: c23b3d1a5311 ("MIPS: ptrace: Change GP regset to use correct core dump register layout") Cc: James Hogan Cc: Paul Burton Cc: Alex Smith Cc: Dave Martin Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/17930/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/ptrace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 99bc1ce2f40a..0c8ae2cc6380 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -570,6 +570,9 @@ static int fpr_set(struct task_struct *target, BUG_ON(count % sizeof(elf_fpreg_t)); + if (pos + count > sizeof(elf_fpregset_t)) + return -EIO; + init_fp_ctx(target); if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) From 491c0ca3dbd5f6f9a1c3aa67b8c3b3c9610c489e Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 3 Jan 2018 14:31:38 -0800 Subject: [PATCH 014/705] kvm: vmx: Scrub hardware GPRs at VM-exit commit 0cb5b30698fdc8f6b4646012e3acb4ddce430788 upstream. Guest GPR values are live in the hardware GPRs at VM-exit. Do not leave any guest values in hardware GPRs after the guest GPR values are saved to the vcpu_vmx structure. This is a partial mitigation for CVE 2017-5715 and CVE 2017-5753. Specifically, it defeats the Project Zero PoC for CVE 2017-5715. Suggested-by: Eric Northup Signed-off-by: Jim Mattson Reviewed-by: Eric Northup Reviewed-by: Benjamin Serebrin Reviewed-by: Andrew Honig [Paolo: Add AMD bits, Signed-off-by: Tom Lendacky ] Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 19 +++++++++++++++++++ arch/x86/kvm/vmx.c | 14 +++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8148d8ca7930..8d96f9ce1926 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4868,6 +4868,25 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) "mov %%r13, %c[r13](%[svm]) \n\t" "mov %%r14, %c[r14](%[svm]) \n\t" "mov %%r15, %c[r15](%[svm]) \n\t" +#endif + /* + * Clear host registers marked as clobbered to prevent + * speculative use. + */ + "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t" + "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t" + "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t" + "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t" + "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t" +#ifdef CONFIG_X86_64 + "xor %%r8, %%r8 \n\t" + "xor %%r9, %%r9 \n\t" + "xor %%r10, %%r10 \n\t" + "xor %%r11, %%r11 \n\t" + "xor %%r12, %%r12 \n\t" + "xor %%r13, %%r13 \n\t" + "xor %%r14, %%r14 \n\t" + "xor %%r15, %%r15 \n\t" #endif "pop %%" _ASM_BP : diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 263e56059fd5..d8582902a783 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8948,6 +8948,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) /* Save guest registers, load host registers, keep flags */ "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t" "pop %0 \n\t" + "setbe %c[fail](%0)\n\t" "mov %%" _ASM_AX ", %c[rax](%0) \n\t" "mov %%" _ASM_BX ", %c[rbx](%0) \n\t" __ASM_SIZE(pop) " %c[rcx](%0) \n\t" @@ -8964,12 +8965,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) "mov %%r13, %c[r13](%0) \n\t" "mov %%r14, %c[r14](%0) \n\t" "mov %%r15, %c[r15](%0) \n\t" + "xor %%r8d, %%r8d \n\t" + "xor %%r9d, %%r9d \n\t" + "xor %%r10d, %%r10d \n\t" + "xor %%r11d, %%r11d \n\t" + "xor %%r12d, %%r12d \n\t" + "xor %%r13d, %%r13d \n\t" + "xor %%r14d, %%r14d \n\t" + "xor %%r15d, %%r15d \n\t" #endif "mov %%cr2, %%" _ASM_AX " \n\t" "mov %%" _ASM_AX ", %c[cr2](%0) \n\t" + "xor %%eax, %%eax \n\t" + "xor %%ebx, %%ebx \n\t" + "xor %%esi, %%esi \n\t" + "xor %%edi, %%edi \n\t" "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t" - "setbe %c[fail](%0) \n\t" ".pushsection .rodata \n\t" ".global vmx_return \n\t" "vmx_return: " _ASM_PTR " 2b \n\t" From 64ab063b7193dd8f41f54751d9612c5b00735395 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 3 Jan 2018 12:49:29 +0100 Subject: [PATCH 015/705] platform/x86: wmi: Call acpi_wmi_init() later commit 98b8e4e5c17bf87c1b18ed929472051dab39878c upstream. Calling acpi_wmi_init() at the subsys_initcall() level causes ordering issues to appear on some systems and they are difficult to reproduce, because there is no guaranteed ordering between subsys_initcall() calls, so they may occur in different orders on different systems. In particular, commit 86d9f48534e8 (mm/slab: fix kmemcg cache creation delayed issue) exposed one of these issues where genl_init() and acpi_wmi_init() are both called at the same initcall level, but the former must run before the latter so as to avoid a NULL pointer dereference. For this reason, move the acpi_wmi_init() invocation to the initcall_sync level which should still be early enough for things to work correctly in the WMI land. Link: https://marc.info/?t=151274596700002&r=1&w=2 Reported-by: Jonathan McDowell Reported-by: Joonsoo Kim Tested-by: Jonathan McDowell Signed-off-by: Rafael J. Wysocki Signed-off-by: Darren Hart (VMware) Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/wmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index ceeb8c188ef3..00d82e8443bd 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -848,5 +848,5 @@ static void __exit acpi_wmi_exit(void) pr_info("Mapper unloaded\n"); } -subsys_initcall(acpi_wmi_init); +subsys_initcall_sync(acpi_wmi_init); module_exit(acpi_wmi_exit); From 0199927a8e51126ff420efc2155ad196a853678b Mon Sep 17 00:00:00 2001 From: Vikas C Sajjan Date: Thu, 16 Nov 2017 21:43:44 +0530 Subject: [PATCH 016/705] x86/acpi: Handle SCI interrupts above legacy space gracefully commit 252714155f04c5d16989cb3aadb85fd1b5772f99 upstream. Platforms which support only IOAPIC mode, pass the SCI information above the legacy space (0-15) via the FADT mechanism and not via MADT. In such cases mp_override_legacy_irq() which is invoked from acpi_sci_ioapic_setup() to register SCI interrupts fails for interrupts greater equal 16, since it is meant to handle only the legacy space and emits error "Invalid bus_irq %u for legacy override". Add a new function to handle SCI interrupts >= 16 and invoke it conditionally in acpi_sci_ioapic_setup(). The code duplication due to this new function will be cleaned up in a separate patch. Co-developed-by: Sunil V L Signed-off-by: Vikas C Sajjan Signed-off-by: Sunil V L Signed-off-by: Thomas Gleixner Tested-by: Abdul Lateef Attar Acked-by: Rafael J. Wysocki Cc: linux-pm@vger.kernel.org Cc: kkamagui@gmail.com Cc: linux-acpi@vger.kernel.org Link: https://lkml.kernel.org/r/1510848825-21965-2-git-send-email-vikas.cha.sajjan@hpe.com Cc: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/acpi/boot.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 11cc600f4df0..928259488bc3 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -422,6 +422,34 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger, return 0; } +static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity, + u8 trigger, u32 gsi) +{ + struct mpc_intsrc mp_irq; + int ioapic, pin; + + /* Convert 'gsi' to 'ioapic.pin'(INTIN#) */ + ioapic = mp_find_ioapic(gsi); + if (ioapic < 0) { + pr_warn("Failed to find ioapic for gsi : %u\n", gsi); + return ioapic; + } + + pin = mp_find_ioapic_pin(ioapic, gsi); + + mp_irq.type = MP_INTSRC; + mp_irq.irqtype = mp_INT; + mp_irq.irqflag = (trigger << 2) | polarity; + mp_irq.srcbus = MP_ISA_BUS; + mp_irq.srcbusirq = bus_irq; + mp_irq.dstapic = mpc_ioapic_id(ioapic); + mp_irq.dstirq = pin; + + mp_save_irq(&mp_irq); + + return 0; +} + static int __init acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) { @@ -466,7 +494,11 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger, if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK) polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK; - mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); + if (bus_irq < NR_IRQS_LEGACY) + mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); + else + mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi); + acpi_penalize_sci_irq(bus_irq, trigger, polarity); /* From 83da0245eda22dd99a44f2f127cca86d06479edd Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 1 Jan 2018 09:50:50 +0100 Subject: [PATCH 017/705] ALSA: pcm: Remove incorrect snd_BUG_ON() usages commit fe08f34d066f4404934a509b6806db1a4f700c86 upstream. syzkaller triggered kernel warnings through PCM OSS emulation at closing a stream: WARNING: CPU: 0 PID: 3502 at sound/core/pcm_lib.c:1635 snd_pcm_hw_param_first+0x289/0x690 sound/core/pcm_lib.c:1635 Call Trace: .... snd_pcm_hw_param_near.constprop.27+0x78d/0x9a0 sound/core/oss/pcm_oss.c:457 snd_pcm_oss_change_params+0x17d3/0x3720 sound/core/oss/pcm_oss.c:969 snd_pcm_oss_make_ready+0xaa/0x130 sound/core/oss/pcm_oss.c:1128 snd_pcm_oss_sync+0x257/0x830 sound/core/oss/pcm_oss.c:1638 snd_pcm_oss_release+0x20b/0x280 sound/core/oss/pcm_oss.c:2431 __fput+0x327/0x7e0 fs/file_table.c:210 .... This happens while it tries to open and set up the aloop device concurrently. The warning above (invoked from snd_BUG_ON() macro) is to detect the unexpected logical error where snd_pcm_hw_refine() call shouldn't fail. The theory is true for the case where the hw_params config rules are static. But for an aloop device, the hw_params rule condition does vary dynamically depending on the connected target; when another device is opened and changes the parameters, the device connected in another side is also affected, and it caused the error from snd_pcm_hw_refine(). That is, the simplest "solution" for this is to remove the incorrect assumption of static rules, and treat such an error as a normal error path. As there are a couple of other places using snd_BUG_ON() incorrectly, this patch removes these spurious snd_BUG_ON() calls. Reported-by: syzbot+6f11c7e2a1b91d466432@syzkaller.appspotmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/oss/pcm_oss.c | 1 - sound/core/pcm_lib.c | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index ebc9fdfe64df..2167363c18b3 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -466,7 +466,6 @@ static int snd_pcm_hw_param_near(struct snd_pcm_substream *pcm, v = snd_pcm_hw_param_last(pcm, params, var, dir); else v = snd_pcm_hw_param_first(pcm, params, var, dir); - snd_BUG_ON(v < 0); return v; } diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index c80d80e312e3..e685e779a4b8 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -1664,7 +1664,7 @@ int snd_pcm_hw_param_first(struct snd_pcm_substream *pcm, return changed; if (params->rmask) { int err = snd_pcm_hw_refine(pcm, params); - if (snd_BUG_ON(err < 0)) + if (err < 0) return err; } return snd_pcm_hw_param_value(params, var, dir); @@ -1711,7 +1711,7 @@ int snd_pcm_hw_param_last(struct snd_pcm_substream *pcm, return changed; if (params->rmask) { int err = snd_pcm_hw_refine(pcm, params); - if (snd_BUG_ON(err < 0)) + if (err < 0) return err; } return snd_pcm_hw_param_value(params, var, dir); From 8e81425e80c90667464540694967337e318e545b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 4 Jan 2018 16:39:27 +0100 Subject: [PATCH 018/705] ALSA: pcm: Add missing error checks in OSS emulation plugin builder commit 6708913750344a900f2e73bfe4a4d6dbbce4fe8d upstream. In the OSS emulation plugin builder where the frame size is parsed in the plugin chain, some places miss the possible errors returned from the plugin src_ or dst_frames callback. This patch papers over such places. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/oss/pcm_plugin.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c index 727ac44d39f4..a84a1d3d23e5 100644 --- a/sound/core/oss/pcm_plugin.c +++ b/sound/core/oss/pcm_plugin.c @@ -591,18 +591,26 @@ snd_pcm_sframes_t snd_pcm_plug_write_transfer(struct snd_pcm_substream *plug, st snd_pcm_sframes_t frames = size; plugin = snd_pcm_plug_first(plug); - while (plugin && frames > 0) { + while (plugin) { + if (frames <= 0) + return frames; if ((next = plugin->next) != NULL) { snd_pcm_sframes_t frames1 = frames; - if (plugin->dst_frames) + if (plugin->dst_frames) { frames1 = plugin->dst_frames(plugin, frames); + if (frames1 <= 0) + return frames1; + } if ((err = next->client_channels(next, frames1, &dst_channels)) < 0) { return err; } if (err != frames1) { frames = err; - if (plugin->src_frames) + if (plugin->src_frames) { frames = plugin->src_frames(plugin, frames1); + if (frames <= 0) + return frames; + } } } else dst_channels = NULL; From 3a00564cb49f17e5ca2a1d6e34c7ea7b1113f49e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 8 Jan 2018 13:58:31 +0100 Subject: [PATCH 019/705] ALSA: pcm: Abort properly at pending signal in OSS read/write loops commit 29159a4ed7044c52e3e2cf1a9fb55cec4745c60b upstream. The loops for read and write in PCM OSS emulation have no proper check of pending signals, and they keep processing even after user tries to break. This results in a very long delay, often seen as RCU stall when a huge unprocessed bytes remain queued. The bug could be easily triggered by syzkaller. As a simple workaround, this patch adds the proper check of pending signals and aborts the loop appropriately. Reported-by: syzbot+993cb4cfcbbff3947c21@syzkaller.appspotmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/oss/pcm_oss.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index 2167363c18b3..f82f4455cd77 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -1416,6 +1416,10 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha tmp != runtime->oss.period_bytes) break; } + if (signal_pending(current)) { + tmp = -ERESTARTSYS; + goto err; + } } mutex_unlock(&runtime->oss.params_lock); return xfer; @@ -1501,6 +1505,10 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use bytes -= tmp; xfer += tmp; } + if (signal_pending(current)) { + tmp = -ERESTARTSYS; + goto err; + } } mutex_unlock(&runtime->oss.params_lock); return xfer; From bee3f2d5c02aed452542fae2bb980f555dd887ad Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 8 Jan 2018 14:03:53 +0100 Subject: [PATCH 020/705] ALSA: pcm: Allow aborting mutex lock at OSS read/write loops commit 900498a34a3ac9c611e9b425094c8106bdd7dc1c upstream. PCM OSS read/write loops keep taking the mutex lock for the whole read/write, and this might take very long when the exceptionally high amount of data is given. Also, since it invokes with mutex_lock(), the concurrent read/write becomes unbreakable. This patch tries to address these issues by replacing mutex_lock() with mutex_lock_interruptible(), and also splits / re-takes the lock at each read/write period chunk, so that it can switch the context more finely if requested. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/oss/pcm_oss.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index f82f4455cd77..3321348fd86b 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -1369,8 +1369,11 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha if ((tmp = snd_pcm_oss_make_ready(substream)) < 0) return tmp; - mutex_lock(&runtime->oss.params_lock); while (bytes > 0) { + if (mutex_lock_interruptible(&runtime->oss.params_lock)) { + tmp = -ERESTARTSYS; + break; + } if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) { tmp = bytes; if (tmp + runtime->oss.buffer_used > runtime->oss.period_bytes) @@ -1414,18 +1417,18 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha xfer += tmp; if ((substream->f_flags & O_NONBLOCK) != 0 && tmp != runtime->oss.period_bytes) - break; + tmp = -EAGAIN; } + err: + mutex_unlock(&runtime->oss.params_lock); + if (tmp < 0) + break; if (signal_pending(current)) { tmp = -ERESTARTSYS; - goto err; + break; } + tmp = 0; } - mutex_unlock(&runtime->oss.params_lock); - return xfer; - - err: - mutex_unlock(&runtime->oss.params_lock); return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp; } @@ -1473,8 +1476,11 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use if ((tmp = snd_pcm_oss_make_ready(substream)) < 0) return tmp; - mutex_lock(&runtime->oss.params_lock); while (bytes > 0) { + if (mutex_lock_interruptible(&runtime->oss.params_lock)) { + tmp = -ERESTARTSYS; + break; + } if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) { if (runtime->oss.buffer_used == 0) { tmp = snd_pcm_oss_read2(substream, runtime->oss.buffer, runtime->oss.period_bytes, 1); @@ -1505,16 +1511,16 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use bytes -= tmp; xfer += tmp; } + err: + mutex_unlock(&runtime->oss.params_lock); + if (tmp < 0) + break; if (signal_pending(current)) { tmp = -ERESTARTSYS; - goto err; + break; } + tmp = 0; } - mutex_unlock(&runtime->oss.params_lock); - return xfer; - - err: - mutex_unlock(&runtime->oss.params_lock); return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp; } From 01046dd834ac5ecf3c57dc34b60de127e7ac2d29 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 5 Jan 2018 16:09:47 +0100 Subject: [PATCH 021/705] ALSA: aloop: Release cable upon open error path commit 9685347aa0a5c2869058ca6ab79fd8e93084a67f upstream. The aloop runtime object and its assignment in the cable are left even when opening a substream fails. This doesn't mean any memory leak, but it still keeps the invalid pointer that may be referred by the another side of the cable spontaneously, which is a potential Oops cause. Clean up the cable assignment and the empty cable upon the error path properly. Fixes: 597603d615d2 ("ALSA: introduce the snd-aloop module for the PCM loopback") Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/drivers/aloop.c | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c index 54f348a4fb78..2adc88d6d507 100644 --- a/sound/drivers/aloop.c +++ b/sound/drivers/aloop.c @@ -658,12 +658,31 @@ static int rule_channels(struct snd_pcm_hw_params *params, return snd_interval_refine(hw_param_interval(params, rule->var), &t); } +static void free_cable(struct snd_pcm_substream *substream) +{ + struct loopback *loopback = substream->private_data; + int dev = get_cable_index(substream); + struct loopback_cable *cable; + + cable = loopback->cables[substream->number][dev]; + if (!cable) + return; + if (cable->streams[!substream->stream]) { + /* other stream is still alive */ + cable->streams[substream->stream] = NULL; + } else { + /* free the cable */ + loopback->cables[substream->number][dev] = NULL; + kfree(cable); + } +} + static int loopback_open(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; struct loopback *loopback = substream->private_data; struct loopback_pcm *dpcm; - struct loopback_cable *cable; + struct loopback_cable *cable = NULL; int err = 0; int dev = get_cable_index(substream); @@ -682,7 +701,6 @@ static int loopback_open(struct snd_pcm_substream *substream) if (!cable) { cable = kzalloc(sizeof(*cable), GFP_KERNEL); if (!cable) { - kfree(dpcm); err = -ENOMEM; goto unlock; } @@ -724,6 +742,10 @@ static int loopback_open(struct snd_pcm_substream *substream) else runtime->hw = cable->hw; unlock: + if (err < 0) { + free_cable(substream); + kfree(dpcm); + } mutex_unlock(&loopback->cable_lock); return err; } @@ -732,20 +754,10 @@ static int loopback_close(struct snd_pcm_substream *substream) { struct loopback *loopback = substream->private_data; struct loopback_pcm *dpcm = substream->runtime->private_data; - struct loopback_cable *cable; - int dev = get_cable_index(substream); loopback_timer_stop(dpcm); mutex_lock(&loopback->cable_lock); - cable = loopback->cables[substream->number][dev]; - if (cable->streams[!substream->stream]) { - /* other stream is still alive */ - cable->streams[substream->stream] = NULL; - } else { - /* free the cable */ - loopback->cables[substream->number][dev] = NULL; - kfree(cable); - } + free_cable(substream); mutex_unlock(&loopback->cable_lock); return 0; } From 5af666d0ddb7dcb28acc822816fa11d8c55fe860 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 5 Jan 2018 16:15:33 +0100 Subject: [PATCH 022/705] ALSA: aloop: Fix inconsistent format due to incomplete rule commit b088b53e20c7d09b5ab84c5688e609f478e5c417 upstream. The extra hw constraint rule for the formats the aloop driver introduced has a slight flaw, where it doesn't return a positive value when the mask got changed. It came from the fact that it's basically a copy&paste from snd_hw_constraint_mask64(). The original code is supposed to be a single-shot and it modifies the mask bits only once and never after, while what we need for aloop is the dynamic hw rule that limits the mask bits. This difference results in the inconsistent state, as the hw_refine doesn't apply the dependencies fully. The worse and surprisingly result is that it causes a crash in OSS emulation when multiple full-duplex reads/writes are performed concurrently (I leave why it triggers Oops to readers as a homework). For fixing this, replace a few open-codes with the standard snd_mask_*() macros. Reported-by: syzbot+3902b5220e8ca27889ca@syzkaller.appspotmail.com Fixes: b1c73fc8e697 ("ALSA: snd-aloop: Fix hw_params restrictions and checking") Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/drivers/aloop.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c index 2adc88d6d507..59e4a88757b1 100644 --- a/sound/drivers/aloop.c +++ b/sound/drivers/aloop.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -622,14 +623,12 @@ static int rule_format(struct snd_pcm_hw_params *params, { struct snd_pcm_hardware *hw = rule->private; - struct snd_mask *maskp = hw_param_mask(params, rule->var); + struct snd_mask m; - maskp->bits[0] &= (u_int32_t)hw->formats; - maskp->bits[1] &= (u_int32_t)(hw->formats >> 32); - memset(maskp->bits + 2, 0, (SNDRV_MASK_MAX-64) / 8); /* clear rest */ - if (! maskp->bits[0] && ! maskp->bits[1]) - return -EINVAL; - return 0; + snd_mask_none(&m); + m.bits[0] = (u_int32_t)hw->formats; + m.bits[1] = (u_int32_t)(hw->formats >> 32); + return snd_mask_refine(hw_param_mask(params, rule->var), &m); } static int rule_rate(struct snd_pcm_hw_params *params, From 43ff00f873773afc676d362e3c1d941f16d569af Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 4 Jan 2018 17:38:54 +0100 Subject: [PATCH 023/705] ALSA: aloop: Fix racy hw constraints adjustment commit 898dfe4687f460ba337a01c11549f87269a13fa2 upstream. The aloop driver tries to update the hw constraints of the connected target on the cable of the opened PCM substream. This is done by adding the extra hw constraints rules referring to the substream runtime->hw fields, while the other substream may update the runtime hw of another side on the fly. This is, however, racy and may result in the inconsistent values when both PCM streams perform the prepare concurrently. One of the reason is that it overwrites the other's runtime->hw field; which is not only racy but also broken when it's called before the open of another side finishes. And, since the reference to runtime->hw isn't protected, the concurrent write may give the partial value update and become inconsistent. This patch is an attempt to fix and clean up: - The prepare doesn't change the runtime->hw of other side any longer, but only update the cable->hw that is referred commonly. - The extra rules refer to the loopback_pcm object instead of the runtime->hw. The actual hw is deduced from cable->hw. - The extra rules take the cable_lock to protect against the race. Fixes: b1c73fc8e697 ("ALSA: snd-aloop: Fix hw_params restrictions and checking") Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/drivers/aloop.c | 51 ++++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 30 deletions(-) diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c index 59e4a88757b1..cbd20cb8ca11 100644 --- a/sound/drivers/aloop.c +++ b/sound/drivers/aloop.c @@ -306,19 +306,6 @@ static int loopback_trigger(struct snd_pcm_substream *substream, int cmd) return 0; } -static void params_change_substream(struct loopback_pcm *dpcm, - struct snd_pcm_runtime *runtime) -{ - struct snd_pcm_runtime *dst_runtime; - - if (dpcm == NULL || dpcm->substream == NULL) - return; - dst_runtime = dpcm->substream->runtime; - if (dst_runtime == NULL) - return; - dst_runtime->hw = dpcm->cable->hw; -} - static void params_change(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; @@ -330,10 +317,6 @@ static void params_change(struct snd_pcm_substream *substream) cable->hw.rate_max = runtime->rate; cable->hw.channels_min = runtime->channels; cable->hw.channels_max = runtime->channels; - params_change_substream(cable->streams[SNDRV_PCM_STREAM_PLAYBACK], - runtime); - params_change_substream(cable->streams[SNDRV_PCM_STREAM_CAPTURE], - runtime); } static int loopback_prepare(struct snd_pcm_substream *substream) @@ -621,24 +604,29 @@ static unsigned int get_cable_index(struct snd_pcm_substream *substream) static int rule_format(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { - - struct snd_pcm_hardware *hw = rule->private; + struct loopback_pcm *dpcm = rule->private; + struct loopback_cable *cable = dpcm->cable; struct snd_mask m; snd_mask_none(&m); - m.bits[0] = (u_int32_t)hw->formats; - m.bits[1] = (u_int32_t)(hw->formats >> 32); + mutex_lock(&dpcm->loopback->cable_lock); + m.bits[0] = (u_int32_t)cable->hw.formats; + m.bits[1] = (u_int32_t)(cable->hw.formats >> 32); + mutex_unlock(&dpcm->loopback->cable_lock); return snd_mask_refine(hw_param_mask(params, rule->var), &m); } static int rule_rate(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { - struct snd_pcm_hardware *hw = rule->private; + struct loopback_pcm *dpcm = rule->private; + struct loopback_cable *cable = dpcm->cable; struct snd_interval t; - t.min = hw->rate_min; - t.max = hw->rate_max; + mutex_lock(&dpcm->loopback->cable_lock); + t.min = cable->hw.rate_min; + t.max = cable->hw.rate_max; + mutex_unlock(&dpcm->loopback->cable_lock); t.openmin = t.openmax = 0; t.integer = 0; return snd_interval_refine(hw_param_interval(params, rule->var), &t); @@ -647,11 +635,14 @@ static int rule_rate(struct snd_pcm_hw_params *params, static int rule_channels(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { - struct snd_pcm_hardware *hw = rule->private; + struct loopback_pcm *dpcm = rule->private; + struct loopback_cable *cable = dpcm->cable; struct snd_interval t; - t.min = hw->channels_min; - t.max = hw->channels_max; + mutex_lock(&dpcm->loopback->cable_lock); + t.min = cable->hw.channels_min; + t.max = cable->hw.channels_max; + mutex_unlock(&dpcm->loopback->cable_lock); t.openmin = t.openmax = 0; t.integer = 0; return snd_interval_refine(hw_param_interval(params, rule->var), &t); @@ -717,19 +708,19 @@ static int loopback_open(struct snd_pcm_substream *substream) /* are cached -> they do not reflect the actual state */ err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_FORMAT, - rule_format, &runtime->hw, + rule_format, dpcm, SNDRV_PCM_HW_PARAM_FORMAT, -1); if (err < 0) goto unlock; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, - rule_rate, &runtime->hw, + rule_rate, dpcm, SNDRV_PCM_HW_PARAM_RATE, -1); if (err < 0) goto unlock; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_CHANNELS, - rule_channels, &runtime->hw, + rule_channels, dpcm, SNDRV_PCM_HW_PARAM_CHANNELS, -1); if (err < 0) goto unlock; From 5c1b80f674e96708ca9bc7169ca07399a1a2e247 Mon Sep 17 00:00:00 2001 From: Vikas C Sajjan Date: Thu, 16 Nov 2017 21:43:45 +0530 Subject: [PATCH 024/705] x86/acpi: Reduce code duplication in mp_override_legacy_irq() commit 4ee2ec1b122599f7b10c849fa7915cebb37b7edb upstream. The new function mp_register_ioapic_irq() is a subset of the code in mp_override_legacy_irq(). Replace the code duplication by invoking mp_register_ioapic_irq() from mp_override_legacy_irq(). Signed-off-by: Vikas C Sajjan Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Acked-by: Rafael J. Wysocki Cc: linux-pm@vger.kernel.org Cc: kkamagui@gmail.com Cc: linux-acpi@vger.kernel.org Link: https://lkml.kernel.org/r/1510848825-21965-3-git-send-email-vikas.cha.sajjan@hpe.com Cc: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/acpi/boot.c | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 928259488bc3..0a1e8a67cc99 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -335,13 +335,12 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e #ifdef CONFIG_X86_IO_APIC #define MP_ISA_BUS 0 +static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity, + u8 trigger, u32 gsi); + static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) { - int ioapic; - int pin; - struct mpc_intsrc mp_irq; - /* * Check bus_irq boundary. */ @@ -350,14 +349,6 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, return; } - /* - * Convert 'gsi' to 'ioapic.pin'. - */ - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) - return; - pin = mp_find_ioapic_pin(ioapic, gsi); - /* * TBD: This check is for faulty timer entries, where the override * erroneously sets the trigger to level, resulting in a HUGE @@ -366,16 +357,8 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, if ((bus_irq == 0) && (trigger == 3)) trigger = 1; - mp_irq.type = MP_INTSRC; - mp_irq.irqtype = mp_INT; - mp_irq.irqflag = (trigger << 2) | polarity; - mp_irq.srcbus = MP_ISA_BUS; - mp_irq.srcbusirq = bus_irq; /* IRQ */ - mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */ - mp_irq.dstirq = pin; /* INTIN# */ - - mp_save_irq(&mp_irq); - + if (mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi) < 0) + return; /* * Reset default identity mapping if gsi is also an legacy IRQ, * otherwise there will be more than one entry with the same GSI From 1ecdfc1ee99d296a0bd69836a343f6e601190a75 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Mon, 27 Feb 2017 14:26:53 -0800 Subject: [PATCH 025/705] zswap: don't param_set_charp while holding spinlock commit fd5bb66cd934987e49557455b6497fc006521940 upstream. Change the zpool/compressor param callback function to release the zswap_pools_lock spinlock before calling param_set_charp, since that function may sleep when it calls kmalloc with GFP_KERNEL. While this problem has existed for a while, I wasn't able to trigger it using a tight loop changing either/both the zpool and compressor params; I think it's very unlikely to be an issue on the stable kernels, especially since most zswap users will change the compressor and/or zpool from sysfs only one time each boot - or zero times, if they add the params to the kernel boot. Fixes: c99b42c3529e ("zswap: use charp for zswap param strings") Link: http://lkml.kernel.org/r/20170126155821.4545-1-ddstreet@ieee.org Signed-off-by: Dan Streetman Reported-by: Sergey Senozhatsky Cc: Michal Hocko Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Vlastimil Babka Signed-off-by: Greg Kroah-Hartman --- mm/zswap.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/mm/zswap.c b/mm/zswap.c index dbef27822a98..ded051e3433d 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -752,18 +752,22 @@ static int __zswap_param_set(const char *val, const struct kernel_param *kp, pool = zswap_pool_find_get(type, compressor); if (pool) { zswap_pool_debug("using existing", pool); + WARN_ON(pool == zswap_pool_current()); list_del_rcu(&pool->list); - } else { - spin_unlock(&zswap_pools_lock); - pool = zswap_pool_create(type, compressor); - spin_lock(&zswap_pools_lock); } + spin_unlock(&zswap_pools_lock); + + if (!pool) + pool = zswap_pool_create(type, compressor); + if (pool) ret = param_set_charp(s, kp); else ret = -EINVAL; + spin_lock(&zswap_pools_lock); + if (!ret) { put_pool = zswap_pool_current(); list_add_rcu(&pool->list, &zswap_pools); From 542bcc549379e43c1de75a510ea4eb8b9badd918 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Apr 2017 09:59:24 -0700 Subject: [PATCH 026/705] lan78xx: use skb_cow_head() to deal with cloned skbs commit d4ca73591916b760478d2b04334d5dcadc028e9c upstream. We need to ensure there is enough headroom to push extra header, but we also need to check if we are allowed to change headers. skb_cow_head() is the proper helper to deal with this. Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Signed-off-by: Eric Dumazet Cc: James Hughes Cc: Woojung Huh Signed-off-by: David S. Miller Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/lan78xx.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index f33460cec79f..9c257ffedb15 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2419,14 +2419,9 @@ static struct sk_buff *lan78xx_tx_prep(struct lan78xx_net *dev, { u32 tx_cmd_a, tx_cmd_b; - if (skb_headroom(skb) < TX_OVERHEAD) { - struct sk_buff *skb2; - - skb2 = skb_copy_expand(skb, TX_OVERHEAD, 0, flags); + if (skb_cow_head(skb, TX_OVERHEAD)) { dev_kfree_skb_any(skb); - skb = skb2; - if (!skb) - return NULL; + return NULL; } if (lan78xx_linearize(skb) < 0) From ab4fd7a2ddc5d558b616cf09ff6fb5de1cafb7e8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Apr 2017 09:59:23 -0700 Subject: [PATCH 027/705] sr9700: use skb_cow_head() to deal with cloned skbs commit d532c1082f68176363ed766d09bf187616e282fe upstream. We need to ensure there is enough headroom to push extra header, but we also need to check if we are allowed to change headers. skb_cow_head() is the proper helper to deal with this. Fixes: c9b37458e956 ("USB2NET : SR9700 : One chip USB 1.1 USB2NET SR9700Device Driver Support") Signed-off-by: Eric Dumazet Cc: James Hughes Signed-off-by: David S. Miller Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/sr9700.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c index 4a1e9c489f1f..aadfe1d1c37e 100644 --- a/drivers/net/usb/sr9700.c +++ b/drivers/net/usb/sr9700.c @@ -456,14 +456,9 @@ static struct sk_buff *sr9700_tx_fixup(struct usbnet *dev, struct sk_buff *skb, len = skb->len; - if (skb_headroom(skb) < SR_TX_OVERHEAD) { - struct sk_buff *skb2; - - skb2 = skb_copy_expand(skb, SR_TX_OVERHEAD, 0, flags); + if (skb_cow_head(skb, SR_TX_OVERHEAD)) { dev_kfree_skb_any(skb); - skb = skb2; - if (!skb) - return NULL; + return NULL; } __skb_push(skb, SR_TX_OVERHEAD); From 7c5015409befbaa7521cea61b4759e1dfb686cd3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Apr 2017 09:59:21 -0700 Subject: [PATCH 028/705] smsc75xx: use skb_cow_head() to deal with cloned skbs commit b7c6d2675899cfff0180412c63fc9cbd5bacdb4d upstream. We need to ensure there is enough headroom to push extra header, but we also need to check if we are allowed to change headers. skb_cow_head() is the proper helper to deal with this. Fixes: d0cad871703b ("smsc75xx: SMSC LAN75xx USB gigabit ethernet adapter driver") Signed-off-by: Eric Dumazet Cc: James Hughes Signed-off-by: David S. Miller Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/smsc75xx.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index 9af9799935db..4cb9b11a545a 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -2205,13 +2205,9 @@ static struct sk_buff *smsc75xx_tx_fixup(struct usbnet *dev, { u32 tx_cmd_a, tx_cmd_b; - if (skb_headroom(skb) < SMSC75XX_TX_OVERHEAD) { - struct sk_buff *skb2 = - skb_copy_expand(skb, SMSC75XX_TX_OVERHEAD, 0, flags); + if (skb_cow_head(skb, SMSC75XX_TX_OVERHEAD)) { dev_kfree_skb_any(skb); - skb = skb2; - if (!skb) - return NULL; + return NULL; } tx_cmd_a = (u32)(skb->len & TX_CMD_A_LEN) | TX_CMD_A_FCS; From 135f98084eacc6c359b35899efb50363a5d3269a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Apr 2017 09:59:22 -0700 Subject: [PATCH 029/705] cx82310_eth: use skb_cow_head() to deal with cloned skbs commit a9e840a2081ed28c2b7caa6a9a0041c950b3c37d upstream. We need to ensure there is enough headroom to push extra header, but we also need to check if we are allowed to change headers. skb_cow_head() is the proper helper to deal with this. Fixes: cc28a20e77b2 ("introduce cx82310_eth: Conexant CX82310-based ADSL router USB ethernet driver") Signed-off-by: Eric Dumazet Cc: James Hughes Signed-off-by: David S. Miller Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/cx82310_eth.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/usb/cx82310_eth.c b/drivers/net/usb/cx82310_eth.c index e221bfcee76b..947bea81d924 100644 --- a/drivers/net/usb/cx82310_eth.c +++ b/drivers/net/usb/cx82310_eth.c @@ -293,12 +293,9 @@ static struct sk_buff *cx82310_tx_fixup(struct usbnet *dev, struct sk_buff *skb, { int len = skb->len; - if (skb_headroom(skb) < 2) { - struct sk_buff *skb2 = skb_copy_expand(skb, 2, 0, flags); + if (skb_cow_head(skb, 2)) { dev_kfree_skb_any(skb); - skb = skb2; - if (!skb) - return NULL; + return NULL; } skb_push(skb, 2); From 66bb6c2c4445d39f4086637d425da77dcc3364ae Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 11 Jan 2018 17:01:36 +0000 Subject: [PATCH 030/705] xhci: Fix ring leak in failure path of xhci_alloc_virt_device() This is a stable-only fix for the backport of commit 5d9b70f7d52e ("xhci: Don't add a virt_dev to the devs array before it's fully allocated"). In branches that predate commit c5628a2af83a ("xhci: remove endpoint ring cache") there is an additional failure path in xhci_alloc_virt_device() where ring cache allocation fails, in which case we need to free the ring allocated for endpoint 0. Signed-off-by: Ben Hutchings Cc: Mathias Nyman --- drivers/usb/host/xhci-mem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 82eea55a7b5c..3b7d69ca83be 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1086,7 +1086,8 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id, return 1; fail: - + if (dev->eps[0].ring) + xhci_ring_free(xhci, dev->eps[0].ring); if (dev->in_ctx) xhci_free_container_ctx(xhci, dev->in_ctx); if (dev->out_ctx) From fe71f34fbf83d0de41e5725cc4988e238d452d11 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 9 Jan 2018 13:40:41 -0800 Subject: [PATCH 031/705] 8021q: fix a memory leak for VLAN 0 device [ Upstream commit 78bbb15f2239bc8e663aa20bbe1987c91a0b75f6 ] A vlan device with vid 0 is allow to creat by not able to be fully cleaned up by unregister_vlan_dev() which checks for vlan_id!=0. Also, VLAN 0 is probably not a valid number and it is kinda "reserved" for HW accelerating devices, but it is probably too late to reject it from creation even if makes sense. Instead, just remove the check in unregister_vlan_dev(). Reported-by: Dmitry Vyukov Fixes: ad1afb003939 ("vlan_dev: VLAN 0 should be treated as "no vlan tag" (802.1p packet)") Cc: Vlad Yasevich Cc: Ben Hutchings Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/8021q/vlan.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 4a47074d1d7f..c8ea3cf9db85 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -111,12 +111,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) vlan_gvrp_uninit_applicant(real_dev); } - /* Take it out of our own structures, but be sure to interlock with - * HW accelerating devices or SW vlan input packet processing if - * VLAN is not 0 (leave it there for 802.1p). - */ - if (vlan_id) - vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id); + vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id); /* Get rid of the vlan's reference to real_dev */ dev_put(real_dev); From ca5681b723d39e85cbb2f05523a08ef2ee3a8086 Mon Sep 17 00:00:00 2001 From: Eli Cooper Date: Mon, 25 Dec 2017 10:43:49 +0800 Subject: [PATCH 032/705] ip6_tunnel: disable dst caching if tunnel is dual-stack [ Upstream commit 23263ec86a5f44312d2899323872468752324107 ] When an ip6_tunnel is in mode 'any', where the transport layer protocol can be either 4 or 41, dst_cache must be disabled. This is because xfrm policies might apply to only one of the two protocols. Caching dst would cause xfrm policies for one protocol incorrectly used for the other. Signed-off-by: Eli Cooper Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_tunnel.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 11d22d642488..131e6aa954bc 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1080,10 +1080,11 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); neigh_release(neigh); } - } else if (!(t->parms.flags & - (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) { - /* enable the cache only only if the routing decision does - * not depend on the current inner header value + } else if (t->parms.proto != 0 && !(t->parms.flags & + (IP6_TNL_F_USE_ORIG_TCLASS | + IP6_TNL_F_USE_ORIG_FWMARK))) { + /* enable the cache only if neither the outer protocol nor the + * routing decision depends on the current inner header value */ use_cache = true; } From 61196a67cac4faa27d5e8f765b468bc25822f84d Mon Sep 17 00:00:00 2001 From: Andrii Vladyka Date: Thu, 4 Jan 2018 13:09:17 +0200 Subject: [PATCH 033/705] net: core: fix module type in sock_diag_bind [ Upstream commit b8fd0823e0770c2d5fdbd865bccf0d5e058e5287 ] Use AF_INET6 instead of AF_INET in IPv6-related code path Signed-off-by: Andrii Vladyka Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/sock_diag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 6b10573cc9fa..d1d9faf3046b 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -295,7 +295,7 @@ static int sock_diag_bind(struct net *net, int group) case SKNLGRP_INET6_UDP_DESTROY: if (!sock_diag_handlers[AF_INET6]) request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, - NETLINK_SOCK_DIAG, AF_INET); + NETLINK_SOCK_DIAG, AF_INET6); break; } return 0; From cebb382931c4fb341162eae80b696650260e4a2b Mon Sep 17 00:00:00 2001 From: Mohamed Ghannam Date: Tue, 2 Jan 2018 19:44:34 +0000 Subject: [PATCH 034/705] RDS: Heap OOB write in rds_message_alloc_sgs() [ Upstream commit c095508770aebf1b9218e77026e48345d719b17c ] When args->nr_local is 0, nr_pages gets also 0 due some size calculation via rds_rm_size(), which is later used to allocate pages for DMA, this bug produces a heap Out-Of-Bound write access to a specific memory region. Signed-off-by: Mohamed Ghannam Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rds/rdma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index de8496e60735..4880f9a2d356 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -524,6 +524,9 @@ int rds_rdma_extra_size(struct rds_rdma_args *args) local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; + if (args->nr_local == 0) + return -EINVAL; + /* figure out the number of pages in the vector */ for (i = 0; i < args->nr_local; i++) { if (copy_from_user(&vec, &local_vec[i], From ce31b6ac1111096ae9bb0b45f4ba564a909bb366 Mon Sep 17 00:00:00 2001 From: Mohamed Ghannam Date: Wed, 3 Jan 2018 21:06:06 +0000 Subject: [PATCH 035/705] RDS: null pointer dereference in rds_atomic_free_op [ Upstream commit 7d11f77f84b27cef452cee332f4e469503084737 ] set rm->atomic.op_active to 0 when rds_pin_pages() fails or the user supplied address is invalid, this prevents a NULL pointer usage in rds_atomic_free_op() Signed-off-by: Mohamed Ghannam Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rds/rdma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 4880f9a2d356..f6027f41cd34 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -876,6 +876,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, err: if (page) put_page(page); + rm->atomic.op_active = 0; kfree(rm->atomic.op_notifier); return ret; From eb2f80e099d47a7b6e0d3acc87b59feaa0fa7364 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Wed, 3 Jan 2018 20:09:49 +0300 Subject: [PATCH 036/705] sh_eth: fix TSU resource handling [ Upstream commit dfe8266b8dd10e12a731c985b725fcf7f0e537f0 ] When switching the driver to the managed device API, I managed to break the case of a dual Ether devices sharing a single TSU: the 2nd Ether port wouldn't probe. Iwamatsu-san has tried to fix this but his patch was buggy and he then dropped the ball... The solution is to limit calling devm_request_mem_region() to the first of the two ports sharing the same TSU, so devm_ioremap_resource() can't be used anymore for the TSU resource... Fixes: d5e07e69218f ("sh_eth: use managed device API") Reported-by: Nobuhiro Iwamatsu Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/renesas/sh_eth.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 2140dedab712..dc39958537c7 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -3087,10 +3087,29 @@ static int sh_eth_drv_probe(struct platform_device *pdev) /* ioremap the TSU registers */ if (mdp->cd->tsu) { struct resource *rtsu; + rtsu = platform_get_resource(pdev, IORESOURCE_MEM, 1); - mdp->tsu_addr = devm_ioremap_resource(&pdev->dev, rtsu); - if (IS_ERR(mdp->tsu_addr)) { - ret = PTR_ERR(mdp->tsu_addr); + if (!rtsu) { + dev_err(&pdev->dev, "no TSU resource\n"); + ret = -ENODEV; + goto out_release; + } + /* We can only request the TSU region for the first port + * of the two sharing this TSU for the probe to succeed... + */ + if (devno % 2 == 0 && + !devm_request_mem_region(&pdev->dev, rtsu->start, + resource_size(rtsu), + dev_name(&pdev->dev))) { + dev_err(&pdev->dev, "can't request TSU resource.\n"); + ret = -EBUSY; + goto out_release; + } + mdp->tsu_addr = devm_ioremap(&pdev->dev, rtsu->start, + resource_size(rtsu)); + if (!mdp->tsu_addr) { + dev_err(&pdev->dev, "TSU region ioremap() failed.\n"); + ret = -ENOMEM; goto out_release; } mdp->port = devno % 2; From 7f4226ffcba0afe7a352e6fed7f095eb9730584e Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Thu, 4 Jan 2018 21:06:49 +0300 Subject: [PATCH 037/705] sh_eth: fix SH7757 GEther initialization [ Upstream commit 5133550296d43236439494aa955bfb765a89f615 ] Renesas SH7757 has 2 Fast and 2 Gigabit Ether controllers, while the 'sh_eth' driver can only reset and initialize TSU of the first controller pair. Shimoda-san tried to solve that adding the 'needs_init' member to the 'struct sh_eth_plat_data', however the platform code still never sets this flag. I think that we can infer this information from the 'devno' variable (set to 'platform_device::id') and reset/init the Ether controller pair only for an even 'devno'; therefore 'sh_eth_plat_data::needs_init' can be removed... Fixes: 150647fb2c31 ("net: sh_eth: change the condition of initialization") Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/renesas/sh_eth.c | 4 ++-- include/linux/sh_eth.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index dc39958537c7..b6816ae00b7a 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -3116,8 +3116,8 @@ static int sh_eth_drv_probe(struct platform_device *pdev) ndev->features = NETIF_F_HW_VLAN_CTAG_FILTER; } - /* initialize first or needed device */ - if (!devno || pd->needs_init) { + /* Need to init only the first port of the two sharing a TSU */ + if (devno % 2 == 0) { if (mdp->cd->chip_reset) mdp->cd->chip_reset(ndev); diff --git a/include/linux/sh_eth.h b/include/linux/sh_eth.h index f2e27e078362..01b3778ba6da 100644 --- a/include/linux/sh_eth.h +++ b/include/linux/sh_eth.h @@ -16,7 +16,6 @@ struct sh_eth_plat_data { unsigned char mac_addr[ETH_ALEN]; unsigned no_ether_link:1; unsigned ether_link_active_low:1; - unsigned needs_init:1; }; #endif From 6f237183c7cad875e5ddc7c32641094117b036c9 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 3 Jan 2018 16:46:29 +0100 Subject: [PATCH 038/705] net: stmmac: enable EEE in MII, GMII or RGMII only [ Upstream commit 879626e3a52630316d817cbda7cec9a5446d1d82 ] Note in the databook - Section 4.4 - EEE : " The EEE feature is not supported when the MAC is configured to use the TBI, RTBI, SMII, RMII or SGMII single PHY interface. Even if the MAC supports multiple PHY interfaces, you should activate the EEE mode only when the MAC is operating with GMII, MII, or RGMII interface." Applying this restriction solves a stability issue observed on Amlogic gxl platforms operating with RMII interface and the internal PHY. Fixes: 83bf79b6bb64 ("stmmac: disable at run-time the EEE if not supported") Signed-off-by: Jerome Brunet Tested-by: Arnaud Patard Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 ++++++ include/linux/phy.h | 11 +++++++++++ 2 files changed, 17 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index adf61a7b1b01..98bbb91336e4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -280,8 +280,14 @@ static void stmmac_eee_ctrl_timer(unsigned long arg) bool stmmac_eee_init(struct stmmac_priv *priv) { unsigned long flags; + int interface = priv->plat->interface; bool ret = false; + if ((interface != PHY_INTERFACE_MODE_MII) && + (interface != PHY_INTERFACE_MODE_GMII) && + !phy_interface_mode_is_rgmii(interface)) + goto out; + /* Using PCS we cannot dial with the phy registers at this stage * so we do not support extra feature like EEE. */ diff --git a/include/linux/phy.h b/include/linux/phy.h index a04d69ab7c34..867110c9d707 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -683,6 +683,17 @@ static inline bool phy_is_internal(struct phy_device *phydev) return phydev->is_internal; } +/** + * phy_interface_mode_is_rgmii - Convenience function for testing if a + * PHY interface mode is RGMII (all variants) + * @mode: the phy_interface_t enum + */ +static inline bool phy_interface_mode_is_rgmii(phy_interface_t mode) +{ + return mode >= PHY_INTERFACE_MODE_RGMII && + mode <= PHY_INTERFACE_MODE_RGMII_TXID; +}; + /** * phy_interface_is_rgmii - Convenience function for testing if a PHY interface * is RGMII (all variants) From dde00c92245d4f002a7c9adbea7639c74dc656e7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 10 Jan 2018 03:45:49 -0800 Subject: [PATCH 039/705] ipv6: fix possible mem leaks in ipv6_make_skb() [ Upstream commit 862c03ee1deb7e19e0f9931682e0294ecd1fcaf9 ] ip6_setup_cork() might return an error, while memory allocations have been done and must be rolled back. Fixes: 6422398c2ab0 ("ipv6: introduce ipv6_make_skb") Signed-off-by: Eric Dumazet Cc: Vlad Yasevich Reported-by: Mike Maloney Acked-by: Mike Maloney Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_output.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 506efba33a89..388584b8ff31 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1800,9 +1800,10 @@ struct sk_buff *ip6_make_skb(struct sock *sk, cork.base.opt = NULL; v6_cork.opt = NULL; err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6); - if (err) + if (err) { + ip6_cork_release(&cork, &v6_cork); return ERR_PTR(err); - + } if (ipc6->dontfrag < 0) ipc6->dontfrag = inet6_sk(sk)->dontfrag; From 16d5b481d098c9b9ceba5b4c897a0df9c97b34cf Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 29 Dec 2017 10:02:52 -0800 Subject: [PATCH 040/705] ethtool: do not print warning for applications using legacy API [ Upstream commit 71891e2dab6b55a870f8f7735e44a2963860b5c6 ] In kernel log ths message appears on every boot: "warning: `NetworkChangeNo' uses legacy ethtool link settings API, link modes are only partially reported" When ethtool link settings API changed, it started complaining about usages of old API. Ironically, the original patch was from google but the application using the legacy API is chrome. Linux ABI is fixed as much as possible. The kernel must not break it and should not complain about applications using legacy API's. This patch just removes the warning since using legacy API's in Linux is perfectly acceptable. Fixes: 3f1ac7a700d0 ("net: ethtool: add new ETHTOOL_xLINKSETTINGS API") Signed-off-by: Stephen Hemminger Signed-off-by: David Decotigny Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/ethtool.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index e9989b835a66..7913771ec474 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -742,15 +742,6 @@ static int ethtool_set_link_ksettings(struct net_device *dev, return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings); } -static void -warn_incomplete_ethtool_legacy_settings_conversion(const char *details) -{ - char name[sizeof(current->comm)]; - - pr_info_once("warning: `%s' uses legacy ethtool link settings API, %s\n", - get_task_comm(name, current), details); -} - /* Query device for its ethtool_cmd settings. * * Backward compatibility note: for compatibility with legacy ethtool, @@ -777,10 +768,8 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) &link_ksettings); if (err < 0) return err; - if (!convert_link_ksettings_to_legacy_settings(&cmd, - &link_ksettings)) - warn_incomplete_ethtool_legacy_settings_conversion( - "link modes are only partially reported"); + convert_link_ksettings_to_legacy_settings(&cmd, + &link_ksettings); /* send a sensible cmd tag back to user */ cmd.cmd = ETHTOOL_GSET; From e2b825e8de16803750bc5031db95bb20b037f426 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 25 Dec 2017 08:57:35 +0100 Subject: [PATCH 041/705] mlxsw: spectrum_router: Fix NULL pointer deref [ Upstream commit 8764a8267b128405cf383157d5e9a4a3735d2409 ] When we remove the neighbour associated with a nexthop we should always refuse to write the nexthop to the adjacency table. Regardless if it is already present in the table or not. Otherwise, we risk dereferencing the NULL pointer that was set instead of the neighbour. Fixes: a7ff87acd995 ("mlxsw: spectrum_router: Implement next-hop routing") Signed-off-by: Ido Schimmel Reported-by: Alexander Petrovskiy Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 9e31a3390154..8aa91ddff287 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1328,9 +1328,9 @@ set_trap: static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, bool removing) { - if (!removing && !nh->should_offload) + if (!removing) nh->should_offload = 1; - else if (removing && nh->offloaded) + else nh->should_offload = 0; nh->update = 1; } From b28394cbb4022db49e242e85e25110572b146b68 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 26 Dec 2017 07:48:51 +0200 Subject: [PATCH 042/705] net/sched: Fix update of lastuse in act modules implementing stats_update [ Upstream commit 3bb23421a504f01551b7cb9dff0e41dbf16656b0 ] We need to update lastuse to to the most updated value between what is already set and the new value. If HW matching fails, i.e. because of an issue, the stats are not updated but it could be that software did match and updated lastuse. Fixes: 5712bf9c5c30 ("net/sched: act_mirred: Use passed lastuse argument") Fixes: 9fea47d93bcc ("net/sched: act_gact: Update statistics when offloaded to hardware") Signed-off-by: Roi Dayan Reviewed-by: Paul Blakey Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/act_gact.c | 2 +- net/sched/act_mirred.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index e0aa30f83c6c..9617b42aaf20 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -161,7 +161,7 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets, if (action == TC_ACT_SHOT) this_cpu_ptr(gact->common.cpu_qstats)->drops += packets; - tm->lastuse = lastuse; + tm->lastuse = max_t(u64, tm->lastuse, lastuse); } static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 6b07fba5770b..fc3650b06192 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -211,7 +211,7 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets, struct tcf_t *tm = &m->tcf_tm; _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); - tm->lastuse = lastuse; + tm->lastuse = max_t(u64, tm->lastuse, lastuse); } static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, From 3752d2fb9a6d3356ce2386e94a69ff40296272ce Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 29 Dec 2017 14:30:19 -0600 Subject: [PATCH 043/705] crypto: algapi - fix NULL dereference in crypto_remove_spawns() commit 9a00674213a3f00394f4e3221b88f2d21fc05789 upstream. syzkaller triggered a NULL pointer dereference in crypto_remove_spawns() via a program that repeatedly and concurrently requests AEADs "authenc(cmac(des3_ede-asm),pcbc-aes-aesni)" and hashes "cmac(des3_ede)" through AF_ALG, where the hashes are requested as "untested" (CRYPTO_ALG_TESTED is set in ->salg_mask but clear in ->salg_feat; this causes the template to be instantiated for every request). Although AF_ALG users really shouldn't be able to request an "untested" algorithm, the NULL pointer dereference is actually caused by a longstanding race condition where crypto_remove_spawns() can encounter an instance which has had spawn(s) "grabbed" but hasn't yet been registered, resulting in ->cra_users still being NULL. We probably should properly initialize ->cra_users earlier, but that would require updating many templates individually. For now just fix the bug in a simple way that can easily be backported: make crypto_remove_spawns() treat a NULL ->cra_users list as empty. Reported-by: syzbot Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/algapi.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/crypto/algapi.c b/crypto/algapi.c index 1fad2a6b3bbb..5c098ffa7d3d 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -167,6 +167,18 @@ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list, spawn->alg = NULL; spawns = &inst->alg.cra_users; + + /* + * We may encounter an unregistered instance here, since + * an instance's spawns are set up prior to the instance + * being registered. An unregistered instance will have + * NULL ->cra_users.next, since ->cra_users isn't + * properly initialized until registration. But an + * unregistered instance cannot have any users, so treat + * it the same as ->cra_users being empty. + */ + if (spawns->next == NULL) + break; } } while ((spawns = crypto_more_spawns(alg, &stack, &top, &secondary_spawns))); From 553a8b8c8d87e0b10a04c4568f2c1c412e1fc3de Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 21 Dec 2017 15:35:11 +0100 Subject: [PATCH 044/705] rbd: set max_segments to USHRT_MAX commit 21acdf45f4958135940f0b4767185cf911d4b010 upstream. Commit d3834fefcfe5 ("rbd: bump queue_max_segments") bumped max_segments (unsigned short) to max_hw_sectors (unsigned int). max_hw_sectors is set to the number of 512-byte sectors in an object and overflows unsigned short for 32M (largest possible) objects, making the block layer resort to handing us single segment (i.e. single page or even smaller) bios in that case. Fixes: d3834fefcfe5 ("rbd: bump queue_max_segments") Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder Signed-off-by: Greg Kroah-Hartman --- drivers/block/rbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 24f4b544d270..e32badd26c8a 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4511,7 +4511,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) segment_size = rbd_obj_bytes(&rbd_dev->header); blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE); q->limits.max_sectors = queue_max_hw_sectors(q); - blk_queue_max_segments(q, segment_size / SECTOR_SIZE); + blk_queue_max_segments(q, USHRT_MAX); blk_queue_max_segment_size(q, segment_size); blk_queue_io_min(q, segment_size); blk_queue_io_opt(q, segment_size); From 431fd501aa3f1f6a6e44a2022f7be18d8dd60b32 Mon Sep 17 00:00:00 2001 From: Jia Zhang Date: Mon, 1 Jan 2018 10:04:47 +0800 Subject: [PATCH 045/705] x86/microcode/intel: Extend BDW late-loading with a revision check commit b94b7373317164402ff7728d10f7023127a02b60 upstream. Instead of blacklisting all model 79 CPUs when attempting a late microcode loading, limit that only to CPUs with microcode revisions < 0x0b000021 because only on those late loading may cause a system hang. For such processors either: a) a BIOS update which might contain a newer microcode revision or b) the early microcode loading method should be considered. Processors with revisions 0x0b000021 or higher will not experience such hangs. For more details, see erratum BDF90 in document #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family Specification Update) from September 2017. [ bp: Heavily massage commit message and pr_* statements. ] Fixes: 723f2828a98c ("x86/microcode/intel: Disable late loading on model 79") Signed-off-by: Jia Zhang Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Acked-by: Tony Luck Cc: x86-ml Link: http://lkml.kernel.org/r/1514772287-92959-1-git-send-email-qianyue.zj@alibaba-inc.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/microcode/intel.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 13dbcc0f9d03..ac3e636ad586 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -1051,8 +1051,17 @@ static bool is_blacklisted(unsigned int cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); - if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) { - pr_err_once("late loading on model 79 is disabled.\n"); + /* + * Late loading on model 79 with microcode revision less than 0x0b000021 + * may result in a system hang. This behavior is documented in item + * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family). + */ + if (c->x86 == 6 && + c->x86_model == INTEL_FAM6_BROADWELL_X && + c->x86_mask == 0x01 && + c->microcode < 0x0b000021) { + pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode); + pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n"); return true; } From 012df71d2980d9ddf8039da09b2d5741189da8f1 Mon Sep 17 00:00:00 2001 From: Andrew Honig Date: Wed, 10 Jan 2018 10:12:03 -0800 Subject: [PATCH 046/705] KVM: x86: Add memory barrier on vmcs field lookup commit 75f139aaf896d6fdeec2e468ddfa4b2fe469bf40 upstream. This adds a memory barrier when performing a lookup into the vmcs_field_to_offset_table. This is related to CVE-2017-5753. Signed-off-by: Andrew Honig Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d8582902a783..ab6605425497 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -857,8 +857,16 @@ static inline short vmcs_field_to_offset(unsigned long field) { BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX); - if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) || - vmcs_field_to_offset_table[field] == 0) + if (field >= ARRAY_SIZE(vmcs_field_to_offset_table)) + return -ENOENT; + + /* + * FIXME: Mitigation for CVE-2017-5753. To be replaced with a + * generic mechanism. + */ + asm("lfence"); + + if (vmcs_field_to_offset_table[field] == 0) return -ENOENT; return vmcs_field_to_offset_table[field]; From 08a7525811043b1d4e085fa028ee6e9fe89bd7cf Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 10 Jan 2018 12:40:04 +0300 Subject: [PATCH 047/705] drm/vmwgfx: Potential off by one in vmw_view_add() commit 0d9cac0ca0429830c40fe1a4e50e60f6221fd7b6 upstream. The vmw_view_cmd_to_type() function returns vmw_view_max (3) on error. It's one element beyond the end of the vmw_view_cotables[] table. My read on this is that it's possible to hit this failure. header->id comes from vmw_cmd_check() and it's a user controlled number between 1040 and 1225 so we can hit that error. But I don't have the hardware to test this code. Fixes: d80efd5cb3de ("drm/vmwgfx: Initial DX support") Signed-off-by: Dan Carpenter Reviewed-by: Thomas Hellstrom Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index fefb9d995d2c..81f5a552e32f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -2729,6 +2729,8 @@ static int vmw_cmd_dx_view_define(struct vmw_private *dev_priv, } view_type = vmw_view_cmd_to_type(header->id); + if (view_type == vmw_view_max) + return -EINVAL; cmd = container_of(header, typeof(*cmd), header); ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface, user_surface_converter, From ec61bafb2abd42a148d41143570732052a577fc0 Mon Sep 17 00:00:00 2001 From: Lepton Wu Date: Fri, 12 Jan 2018 13:42:56 -0800 Subject: [PATCH 048/705] kaiser: Set _PAGE_NX only if supported This finally resolve crash if loaded under qemu + haxm. Haitao Shan pointed out that the reason of that crash is that NX bit get set for page tables. It seems we missed checking if _PAGE_NX is supported in kaiser_add_user_map Link: https://www.spinics.net/lists/kernel/msg2689835.html Reviewed-by: Guenter Roeck Signed-off-by: Lepton Wu Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/kaiser.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c index 8f8e5e03d083..a8ade08a9bf5 100644 --- a/arch/x86/mm/kaiser.c +++ b/arch/x86/mm/kaiser.c @@ -197,6 +197,8 @@ static int kaiser_add_user_map(const void *__start_addr, unsigned long size, * requires that not to be #defined to 0): so mask it off here. */ flags &= ~_PAGE_GLOBAL; + if (!(__supported_pte_mask & _PAGE_NX)) + flags &= ~_PAGE_NX; for (; address < end_addr; address += PAGE_SIZE) { target_address = get_pa_from_mapping(address); From 748e1b6281f5128c5ef8f9296761a232558c20d8 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Fri, 27 Oct 2017 12:32:59 -0700 Subject: [PATCH 049/705] iscsi-target: Make TASK_REASSIGN use proper se_cmd->cmd_kref commit ae072726f6109bb1c94841d6fb3a82dde298ea85 upstream. Since commit 59b6986dbf fixed a potential NULL pointer dereference by allocating a se_tmr_req for ISCSI_TM_FUNC_TASK_REASSIGN, the se_tmr_req is currently leaked by iscsit_free_cmd() because no iscsi_cmd->se_cmd.se_tfo was associated. To address this, treat ISCSI_TM_FUNC_TASK_REASSIGN like any other TMR and call transport_init_se_cmd() + target_get_sess_cmd() to setup iscsi_cmd->se_cmd.se_tfo with se_cmd->cmd_kref of 2. This will ensure normal release operation once se_cmd->cmd_kref reaches zero and target_release_cmd_kref() is invoked, se_tmr_req will be released via existing target_free_cmd_mem() and core_tmr_release_req() code. Reported-by: Donald White Cc: Donald White Cc: Mike Christie Cc: Hannes Reinecke Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 72e926d9868f..04d2b6e25503 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -1940,7 +1940,6 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, struct iscsi_tmr_req *tmr_req; struct iscsi_tm *hdr; int out_of_order_cmdsn = 0, ret; - bool sess_ref = false; u8 function, tcm_function = TMR_UNKNOWN; hdr = (struct iscsi_tm *) buf; @@ -1982,18 +1981,17 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, buf); } + transport_init_se_cmd(&cmd->se_cmd, &iscsi_ops, + conn->sess->se_sess, 0, DMA_NONE, + TCM_SIMPLE_TAG, cmd->sense_buffer + 2); + + target_get_sess_cmd(&cmd->se_cmd, true); + /* * TASK_REASSIGN for ERL=2 / connection stays inside of * LIO-Target $FABRIC_MOD */ if (function != ISCSI_TM_FUNC_TASK_REASSIGN) { - transport_init_se_cmd(&cmd->se_cmd, &iscsi_ops, - conn->sess->se_sess, 0, DMA_NONE, - TCM_SIMPLE_TAG, cmd->sense_buffer + 2); - - target_get_sess_cmd(&cmd->se_cmd, true); - sess_ref = true; - switch (function) { case ISCSI_TM_FUNC_ABORT_TASK: tcm_function = TMR_ABORT_TASK; @@ -2132,12 +2130,8 @@ attach: * For connection recovery, this is also the default action for * TMR TASK_REASSIGN. */ - if (sess_ref) { - pr_debug("Handle TMR, using sess_ref=true check\n"); - target_put_sess_cmd(&cmd->se_cmd); - } - iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + target_put_sess_cmd(&cmd->se_cmd); return 0; } EXPORT_SYMBOL(iscsit_handle_task_mgt_cmd); From 60c7a9cd5050378a7bfe385dc5feef9d406f22e1 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Fri, 27 Oct 2017 22:19:26 -0800 Subject: [PATCH 050/705] target: Avoid early CMD_T_PRE_EXECUTE failures during ABORT_TASK commit 1c21a48055a67ceb693e9c2587824a8de60a217c upstream. This patch fixes bug where early se_cmd exceptions that occur before backend execution can result in use-after-free if/when a subsequent ABORT_TASK occurs for the same tag. Since an early se_cmd exception will have had se_cmd added to se_session->sess_cmd_list via target_get_sess_cmd(), it will not have CMD_T_COMPLETE set by the usual target_complete_cmd() backend completion path. This causes a subsequent ABORT_TASK + __target_check_io_state() to signal ABORT_TASK should proceed. As core_tmr_abort_task() executes, it will bring the outstanding se_cmd->cmd_kref count down to zero releasing se_cmd, after se_cmd has already been queued with error status into fabric driver response path code. To address this bug, introduce a CMD_T_PRE_EXECUTE bit that is set at target_get_sess_cmd() time, and cleared immediately before backend driver dispatch in target_execute_cmd() once CMD_T_ACTIVE is set. Then, check CMD_T_PRE_EXECUTE within __target_check_io_state() to determine when an early exception has occured, and avoid aborting this se_cmd since it will have already been queued into fabric driver response path code. Reported-by: Donald White Cc: Donald White Cc: Mike Christie Cc: Hannes Reinecke Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_tmr.c | 9 +++++++++ drivers/target/target_core_transport.c | 2 ++ include/target/target_core_base.h | 1 + 3 files changed, 12 insertions(+) diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index 27dd1e12f246..14bb2db5273c 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -133,6 +133,15 @@ static bool __target_check_io_state(struct se_cmd *se_cmd, spin_unlock(&se_cmd->t_state_lock); return false; } + if (se_cmd->transport_state & CMD_T_PRE_EXECUTE) { + if (se_cmd->scsi_status) { + pr_debug("Attempted to abort io tag: %llu early failure" + " status: 0x%02x\n", se_cmd->tag, + se_cmd->scsi_status); + spin_unlock(&se_cmd->t_state_lock); + return false; + } + } if (sess->sess_tearing_down || se_cmd->cmd_wait_set) { pr_debug("Attempted to abort io tag: %llu already shutdown," " skipping\n", se_cmd->tag); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 4c0782cb1e94..6f3eccf986c7 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1939,6 +1939,7 @@ void target_execute_cmd(struct se_cmd *cmd) } cmd->t_state = TRANSPORT_PROCESSING; + cmd->transport_state &= ~CMD_T_PRE_EXECUTE; cmd->transport_state |= CMD_T_ACTIVE|CMD_T_BUSY|CMD_T_SENT; spin_unlock_irq(&cmd->t_state_lock); @@ -2592,6 +2593,7 @@ int target_get_sess_cmd(struct se_cmd *se_cmd, bool ack_kref) ret = -ESHUTDOWN; goto out; } + se_cmd->transport_state |= CMD_T_PRE_EXECUTE; list_add_tail(&se_cmd->se_cmd_list, &se_sess->sess_cmd_list); out: spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index eb3b23b6ec54..30f99ce4c6ce 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -493,6 +493,7 @@ struct se_cmd { #define CMD_T_BUSY (1 << 9) #define CMD_T_TAS (1 << 10) #define CMD_T_FABRIC_STOP (1 << 11) +#define CMD_T_PRE_EXECUTE (1 << 12) spinlock_t t_state_lock; struct kref cmd_kref; struct completion t_transport_stop_comp; From 28035366afe93f7bdb833a7867caccf4b7eda166 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 15 Mar 2017 18:26:39 -0700 Subject: [PATCH 051/705] bpf: move fixup_bpf_calls() function commit e245c5c6a5656e4d61aa7bb08e9694fd6e5b2b9d upstream. no functional change. move fixup_bpf_calls() to verifier.c it's being refactored in the next patch Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller Cc: Jiri Slaby [backported to 4.9 - gregkh] Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/syscall.c | 54 ------------------------------------------- kernel/bpf/verifier.c | 54 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 54 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 72ea91df71c9..91a2d3752007 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -565,57 +565,6 @@ void bpf_register_prog_type(struct bpf_prog_type_list *tl) list_add(&tl->list_node, &bpf_prog_types); } -/* fixup insn->imm field of bpf_call instructions: - * if (insn->imm == BPF_FUNC_map_lookup_elem) - * insn->imm = bpf_map_lookup_elem - __bpf_call_base; - * else if (insn->imm == BPF_FUNC_map_update_elem) - * insn->imm = bpf_map_update_elem - __bpf_call_base; - * else ... - * - * this function is called after eBPF program passed verification - */ -static void fixup_bpf_calls(struct bpf_prog *prog) -{ - const struct bpf_func_proto *fn; - int i; - - for (i = 0; i < prog->len; i++) { - struct bpf_insn *insn = &prog->insnsi[i]; - - if (insn->code == (BPF_JMP | BPF_CALL)) { - /* we reach here when program has bpf_call instructions - * and it passed bpf_check(), means that - * ops->get_func_proto must have been supplied, check it - */ - BUG_ON(!prog->aux->ops->get_func_proto); - - if (insn->imm == BPF_FUNC_get_route_realm) - prog->dst_needed = 1; - if (insn->imm == BPF_FUNC_get_prandom_u32) - bpf_user_rnd_init_once(); - if (insn->imm == BPF_FUNC_tail_call) { - /* mark bpf_tail_call as different opcode - * to avoid conditional branch in - * interpeter for every normal call - * and to prevent accidental JITing by - * JIT compiler that doesn't support - * bpf_tail_call yet - */ - insn->imm = 0; - insn->code |= BPF_X; - continue; - } - - fn = prog->aux->ops->get_func_proto(insn->imm); - /* all functions that have prototype and verifier allowed - * programs to call them, must be real in-kernel functions - */ - BUG_ON(!fn->func); - insn->imm = fn->func - __bpf_call_base; - } - } -} - /* drop refcnt on maps used by eBPF program and free auxilary data */ static void free_used_maps(struct bpf_prog_aux *aux) { @@ -808,9 +757,6 @@ static int bpf_prog_load(union bpf_attr *attr) if (err < 0) goto free_used_maps; - /* fixup BPF_CALL->imm field */ - fixup_bpf_calls(prog); - /* eBPF program is ready to be JITed */ prog = bpf_prog_select_runtime(prog, &err); if (err < 0) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d7eeebfafe8d..0d7a8e95bbfa 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3362,6 +3362,57 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) return 0; } +/* fixup insn->imm field of bpf_call instructions: + * if (insn->imm == BPF_FUNC_map_lookup_elem) + * insn->imm = bpf_map_lookup_elem - __bpf_call_base; + * else if (insn->imm == BPF_FUNC_map_update_elem) + * insn->imm = bpf_map_update_elem - __bpf_call_base; + * else ... + * + * this function is called after eBPF program passed verification + */ +static void fixup_bpf_calls(struct bpf_prog *prog) +{ + const struct bpf_func_proto *fn; + int i; + + for (i = 0; i < prog->len; i++) { + struct bpf_insn *insn = &prog->insnsi[i]; + + if (insn->code == (BPF_JMP | BPF_CALL)) { + /* we reach here when program has bpf_call instructions + * and it passed bpf_check(), means that + * ops->get_func_proto must have been supplied, check it + */ + BUG_ON(!prog->aux->ops->get_func_proto); + + if (insn->imm == BPF_FUNC_get_route_realm) + prog->dst_needed = 1; + if (insn->imm == BPF_FUNC_get_prandom_u32) + bpf_user_rnd_init_once(); + if (insn->imm == BPF_FUNC_tail_call) { + /* mark bpf_tail_call as different opcode + * to avoid conditional branch in + * interpeter for every normal call + * and to prevent accidental JITing by + * JIT compiler that doesn't support + * bpf_tail_call yet + */ + insn->imm = 0; + insn->code |= BPF_X; + continue; + } + + fn = prog->aux->ops->get_func_proto(insn->imm); + /* all functions that have prototype and verifier allowed + * programs to call them, must be real in-kernel functions + */ + BUG_ON(!fn->func); + insn->imm = fn->func - __bpf_call_base; + } + } +} + static void free_states(struct bpf_verifier_env *env) { struct bpf_verifier_state_list *sl, *sln; @@ -3463,6 +3514,9 @@ skip_full_check: /* program is valid, convert *(u32*)(ctx + off) accesses */ ret = convert_ctx_accesses(env); + if (ret == 0) + fixup_bpf_calls(env->prog); + if (log_level && log_len >= log_size - 1) { BUG_ON(log_len >= log_size); /* verifier log exceeded user supplied buffer */ From f55093dccd3ac90f003698fae7ecd75cf2862179 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 15 Mar 2017 18:26:40 -0700 Subject: [PATCH 052/705] bpf: refactor fixup_bpf_calls() commit 79741b3bdec01a8628368fbcfccc7d189ed606cb upstream. reduce indent and make it iterate over instructions similar to convert_ctx_accesses(). Also convert hard BUG_ON into soft verifier error. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller Cc: Jiri Slaby [Backported to 4.9.y - gregkh] Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 75 ++++++++++++++++++++----------------------- 1 file changed, 35 insertions(+), 40 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0d7a8e95bbfa..27370410c04d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3362,55 +3362,50 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) return 0; } -/* fixup insn->imm field of bpf_call instructions: - * if (insn->imm == BPF_FUNC_map_lookup_elem) - * insn->imm = bpf_map_lookup_elem - __bpf_call_base; - * else if (insn->imm == BPF_FUNC_map_update_elem) - * insn->imm = bpf_map_update_elem - __bpf_call_base; - * else ... +/* fixup insn->imm field of bpf_call instructions * * this function is called after eBPF program passed verification */ -static void fixup_bpf_calls(struct bpf_prog *prog) +static int fixup_bpf_calls(struct bpf_verifier_env *env) { + struct bpf_prog *prog = env->prog; + struct bpf_insn *insn = prog->insnsi; const struct bpf_func_proto *fn; + const int insn_cnt = prog->len; int i; - for (i = 0; i < prog->len; i++) { - struct bpf_insn *insn = &prog->insnsi[i]; + for (i = 0; i < insn_cnt; i++, insn++) { + if (insn->code != (BPF_JMP | BPF_CALL)) + continue; - if (insn->code == (BPF_JMP | BPF_CALL)) { - /* we reach here when program has bpf_call instructions - * and it passed bpf_check(), means that - * ops->get_func_proto must have been supplied, check it - */ - BUG_ON(!prog->aux->ops->get_func_proto); - - if (insn->imm == BPF_FUNC_get_route_realm) - prog->dst_needed = 1; - if (insn->imm == BPF_FUNC_get_prandom_u32) - bpf_user_rnd_init_once(); - if (insn->imm == BPF_FUNC_tail_call) { - /* mark bpf_tail_call as different opcode - * to avoid conditional branch in - * interpeter for every normal call - * and to prevent accidental JITing by - * JIT compiler that doesn't support - * bpf_tail_call yet - */ - insn->imm = 0; - insn->code |= BPF_X; - continue; - } - - fn = prog->aux->ops->get_func_proto(insn->imm); - /* all functions that have prototype and verifier allowed - * programs to call them, must be real in-kernel functions - */ - BUG_ON(!fn->func); - insn->imm = fn->func - __bpf_call_base; + if (insn->imm == BPF_FUNC_get_route_realm) + prog->dst_needed = 1; + if (insn->imm == BPF_FUNC_get_prandom_u32) + bpf_user_rnd_init_once(); + if (insn->imm == BPF_FUNC_tail_call) { + /* mark bpf_tail_call as different opcode to avoid + * conditional branch in the interpeter for every normal + * call and to prevent accidental JITing by JIT compiler + * that doesn't support bpf_tail_call yet + */ + insn->imm = 0; + insn->code |= BPF_X; + continue; } + + fn = prog->aux->ops->get_func_proto(insn->imm); + /* all functions that have prototype and verifier allowed + * programs to call them, must be real in-kernel functions + */ + if (!fn->func) { + verbose("kernel subsystem misconfigured func %d\n", + insn->imm); + return -EFAULT; + } + insn->imm = fn->func - __bpf_call_base; } + + return 0; } static void free_states(struct bpf_verifier_env *env) @@ -3515,7 +3510,7 @@ skip_full_check: ret = convert_ctx_accesses(env); if (ret == 0) - fixup_bpf_calls(env->prog); + ret = fixup_bpf_calls(env); if (log_level && log_len >= log_size - 1) { BUG_ON(log_len >= log_size); From a9bfac14cde2b481eeb0e64fbe15305df66ab32e Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Sun, 7 Jan 2018 17:33:02 -0800 Subject: [PATCH 053/705] bpf: prevent out-of-bounds speculation commit b2157399cc9898260d6031c5bfe45fe137c1fbe7 upstream. Under speculation, CPUs may mis-predict branches in bounds checks. Thus, memory accesses under a bounds check may be speculated even if the bounds check fails, providing a primitive for building a side channel. To avoid leaking kernel data round up array-based maps and mask the index after bounds check, so speculated load with out of bounds index will load either valid value from the array or zero from the padded area. Unconditionally mask index for all array types even when max_entries are not rounded to power of 2 for root user. When map is created by unpriv user generate a sequence of bpf insns that includes AND operation to make sure that JITed code includes the same 'index & index_mask' operation. If prog_array map is created by unpriv user replace bpf_tail_call(ctx, map, index); with if (index >= max_entries) { index &= map->index_mask; bpf_tail_call(ctx, map, index); } (along with roundup to power 2) to prevent out-of-bounds speculation. There is secondary redundant 'if (index >= max_entries)' in the interpreter and in all JITs, but they can be optimized later if necessary. Other array-like maps (cpumap, devmap, sockmap, perf_event_array, cgroup_array) cannot be used by unpriv, so no changes there. That fixes bpf side of "Variant 1: bounds check bypass (CVE-2017-5753)" on all architectures with and without JIT. v2->v3: Daniel noticed that attack potentially can be crafted via syscall commands without loading the program, so add masking to those paths as well. Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: Daniel Borkmann Cc: Jiri Slaby [ Backported to 4.9 - gregkh ] Signed-off-by: Greg Kroah-Hartman --- include/linux/bpf.h | 2 ++ include/linux/bpf_verifier.h | 5 ++++- kernel/bpf/arraymap.c | 31 ++++++++++++++++++-------- kernel/bpf/verifier.c | 42 +++++++++++++++++++++++++++++++++--- 4 files changed, 67 insertions(+), 13 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 97498be2ca2e..75ffd3b2149e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -43,6 +43,7 @@ struct bpf_map { u32 max_entries; u32 map_flags; u32 pages; + bool unpriv_array; struct user_struct *user; const struct bpf_map_ops *ops; struct work_struct work; @@ -189,6 +190,7 @@ struct bpf_prog_aux { struct bpf_array { struct bpf_map map; u32 elem_size; + u32 index_mask; /* 'ownership' of prog_array is claimed by the first program that * is going to use this map or by the first program which FD is stored * in the map to make sure that all callers and callees have the same diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 4c4e9358c146..070fc49e39e2 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -67,7 +67,10 @@ struct bpf_verifier_state_list { }; struct bpf_insn_aux_data { - enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ + union { + enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ + struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ + }; bool seen; /* this insn was processed by the verifier */ }; diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index f3721e150d94..bc57ead11b0f 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -46,9 +46,10 @@ static int bpf_array_alloc_percpu(struct bpf_array *array) static struct bpf_map *array_map_alloc(union bpf_attr *attr) { bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; + u32 elem_size, index_mask, max_entries; + bool unpriv = !capable(CAP_SYS_ADMIN); struct bpf_array *array; u64 array_size; - u32 elem_size; /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || @@ -63,11 +64,20 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) elem_size = round_up(attr->value_size, 8); + max_entries = attr->max_entries; + index_mask = roundup_pow_of_two(max_entries) - 1; + + if (unpriv) + /* round up array size to nearest power of 2, + * since cpu will speculate within index_mask limits + */ + max_entries = index_mask + 1; + array_size = sizeof(*array); if (percpu) - array_size += (u64) attr->max_entries * sizeof(void *); + array_size += (u64) max_entries * sizeof(void *); else - array_size += (u64) attr->max_entries * elem_size; + array_size += (u64) max_entries * elem_size; /* make sure there is no u32 overflow later in round_up() */ if (array_size >= U32_MAX - PAGE_SIZE) @@ -77,6 +87,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) array = bpf_map_area_alloc(array_size); if (!array) return ERR_PTR(-ENOMEM); + array->index_mask = index_mask; + array->map.unpriv_array = unpriv; /* copy mandatory map attributes */ array->map.map_type = attr->map_type; @@ -110,7 +122,7 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key) if (unlikely(index >= array->map.max_entries)) return NULL; - return array->value + array->elem_size * index; + return array->value + array->elem_size * (index & array->index_mask); } /* Called from eBPF program */ @@ -122,7 +134,7 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) if (unlikely(index >= array->map.max_entries)) return NULL; - return this_cpu_ptr(array->pptrs[index]); + return this_cpu_ptr(array->pptrs[index & array->index_mask]); } int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) @@ -142,7 +154,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) */ size = round_up(map->value_size, 8); rcu_read_lock(); - pptr = array->pptrs[index]; + pptr = array->pptrs[index & array->index_mask]; for_each_possible_cpu(cpu) { bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size); off += size; @@ -190,10 +202,11 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, return -EEXIST; if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) - memcpy(this_cpu_ptr(array->pptrs[index]), + memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]), value, map->value_size); else - memcpy(array->value + array->elem_size * index, + memcpy(array->value + + array->elem_size * (index & array->index_mask), value, map->value_size); return 0; } @@ -227,7 +240,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, */ size = round_up(map->value_size, 8); rcu_read_lock(); - pptr = array->pptrs[index]; + pptr = array->pptrs[index & array->index_mask]; for_each_possible_cpu(cpu) { bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size); off += size; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 27370410c04d..19c44cf59bb2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1187,7 +1187,7 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) } } -static int check_call(struct bpf_verifier_env *env, int func_id) +static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) { struct bpf_verifier_state *state = &env->cur_state; const struct bpf_func_proto *fn = NULL; @@ -1238,6 +1238,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id) err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta); if (err) return err; + if (func_id == BPF_FUNC_tail_call) { + if (meta.map_ptr == NULL) { + verbose("verifier bug\n"); + return -EINVAL; + } + env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr; + } err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta); if (err) return err; @@ -3019,7 +3026,7 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } - err = check_call(env, insn->imm); + err = check_call(env, insn->imm, insn_idx); if (err) return err; @@ -3372,7 +3379,11 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) struct bpf_insn *insn = prog->insnsi; const struct bpf_func_proto *fn; const int insn_cnt = prog->len; - int i; + struct bpf_insn insn_buf[16]; + struct bpf_prog *new_prog; + struct bpf_map *map_ptr; + int i, cnt, delta = 0; + for (i = 0; i < insn_cnt; i++, insn++) { if (insn->code != (BPF_JMP | BPF_CALL)) @@ -3390,6 +3401,31 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) */ insn->imm = 0; insn->code |= BPF_X; + + /* instead of changing every JIT dealing with tail_call + * emit two extra insns: + * if (index >= max_entries) goto out; + * index &= array->index_mask; + * to avoid out-of-bounds cpu speculation + */ + map_ptr = env->insn_aux_data[i + delta].map_ptr; + if (!map_ptr->unpriv_array) + continue; + insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3, + map_ptr->max_entries, 2); + insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3, + container_of(map_ptr, + struct bpf_array, + map)->index_mask); + insn_buf[2] = *insn; + cnt = 3; + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; continue; } From 820ef2a0e54c4bed27758e393d09157d0d48c94c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 10 Jan 2018 23:25:05 +0100 Subject: [PATCH 054/705] bpf, array: fix overflow in max_entries and undefined behavior in index_mask commit bbeb6e4323dad9b5e0ee9f60c223dd532e2403b1 upstream. syzkaller tried to alloc a map with 0xfffffffd entries out of a userns, and thus unprivileged. With the recently added logic in b2157399cc98 ("bpf: prevent out-of-bounds speculation") we round this up to the next power of two value for max_entries for unprivileged such that we can apply proper masking into potentially zeroed out map slots. However, this will generate an index_mask of 0xffffffff, and therefore a + 1 will let this overflow into new max_entries of 0. This will pass allocation, etc, and later on map access we still enforce on the original attr->max_entries value which was 0xfffffffd, therefore triggering GPF all over the place. Thus bail out on overflow in such case. Moreover, on 32 bit archs roundup_pow_of_two() can also not be used, since fls_long(max_entries - 1) can result in 32 and 1UL << 32 in 32 bit space is undefined. Therefore, do this by hand in a 64 bit variable. This fixes all the issues triggered by syzkaller's reproducers. Fixes: b2157399cc98 ("bpf: prevent out-of-bounds speculation") Reported-by: syzbot+b0efb8e572d01bce1ae0@syzkaller.appspotmail.com Reported-by: syzbot+6c15e9744f75f2364773@syzkaller.appspotmail.com Reported-by: syzbot+d2f5524fb46fd3b312ee@syzkaller.appspotmail.com Reported-by: syzbot+61d23c95395cc90dbc2b@syzkaller.appspotmail.com Reported-by: syzbot+0d363c942452cca68c01@syzkaller.appspotmail.com Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/arraymap.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index bc57ead11b0f..9a1e6ed7babc 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -49,7 +49,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) u32 elem_size, index_mask, max_entries; bool unpriv = !capable(CAP_SYS_ADMIN); struct bpf_array *array; - u64 array_size; + u64 array_size, mask64; /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || @@ -65,13 +65,25 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) elem_size = round_up(attr->value_size, 8); max_entries = attr->max_entries; - index_mask = roundup_pow_of_two(max_entries) - 1; - if (unpriv) + /* On 32 bit archs roundup_pow_of_two() with max_entries that has + * upper most bit set in u32 space is undefined behavior due to + * resulting 1U << 32, so do it manually here in u64 space. + */ + mask64 = fls_long(max_entries - 1); + mask64 = 1ULL << mask64; + mask64 -= 1; + + index_mask = mask64; + if (unpriv) { /* round up array size to nearest power of 2, * since cpu will speculate within index_mask limits */ max_entries = index_mask + 1; + /* Check for overflows. */ + if (max_entries < attr->max_entries) + return ERR_PTR(-E2BIG); + } array_size = sizeof(*array); if (percpu) From 4abe275c2deeab918601ce6f520cf514b8738607 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Diego=20Elio=20Petten=C3=B2?= Date: Fri, 29 Dec 2017 09:54:25 +0000 Subject: [PATCH 055/705] USB: serial: cp210x: add IDs for LifeScan OneTouch Verio IQ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4307413256ac1e09b8f53e8715af3df9e49beec3 upstream. Add IDs for the OneTouch Verio IQ that comes with an embedded USB-to-serial converter. Signed-off-by: Diego Elio Pettenò Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 11ee55e080e5..c6d3d1c2d406 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -121,6 +121,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x8470) }, /* Juniper Networks BX Series System Console */ { USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */ { USB_DEVICE(0x10C4, 0x84B6) }, /* Starizona Hyperion */ + { USB_DEVICE(0x10C4, 0x85A7) }, /* LifeScan OneTouch Verio IQ */ { USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */ { USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */ { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */ From 11632d079e9e36a5bbfca8e581dba4db305eff10 Mon Sep 17 00:00:00 2001 From: Christian Holl Date: Wed, 3 Jan 2018 19:53:02 +0100 Subject: [PATCH 056/705] USB: serial: cp210x: add new device ID ELV ALC 8xxx commit d14ac576d10f865970bb1324d337e5e24d79aaf4 upstream. This adds the ELV ALC 8xxx Battery Charging device to the list of USB IDs of drivers/usb/serial/cp210x.c Signed-off-by: Christian Holl Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index c6d3d1c2d406..3178d8afb3e6 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -172,6 +172,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */ { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ { USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */ + { USB_DEVICE(0x18EF, 0xE030) }, /* ELV ALC 8xxx Battery Charger */ { USB_DEVICE(0x18EF, 0xE032) }, /* ELV TFD500 Data Logger */ { USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */ { USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */ From 9f6ca0ea7a7a21963e73d69be42026e5c5954bc4 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Thu, 11 Jan 2018 14:47:40 +0100 Subject: [PATCH 057/705] usb: misc: usb3503: make sure reset is low for at least 100us commit b8626f1dc29d3eee444bfaa92146ec7b291ef41c upstream. When using a GPIO which is high by default, and initialize the driver in USB Hub mode, initialization fails with: [ 111.757794] usb3503 0-0008: SP_ILOCK failed (-5) The reason seems to be that the chip is not properly reset. Probe does initialize reset low, however some lines later the code already set it back high, which is not long enouth. Make sure reset is asserted for at least 100us by inserting a delay after initializing the reset pin during probe. Signed-off-by: Stefan Agner Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/usb3503.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/misc/usb3503.c b/drivers/usb/misc/usb3503.c index 8e7737d7ac0a..03be5d574f23 100644 --- a/drivers/usb/misc/usb3503.c +++ b/drivers/usb/misc/usb3503.c @@ -292,6 +292,8 @@ static int usb3503_probe(struct usb3503 *hub) if (gpio_is_valid(hub->gpio_reset)) { err = devm_gpio_request_one(dev, hub->gpio_reset, GPIOF_OUT_INIT_LOW, "usb3503 reset"); + /* Datasheet defines a hardware reset to be at least 100us */ + usleep_range(100, 10000); if (err) { dev_err(dev, "unable to request GPIO %d as reset pin (%d)\n", From 435db24bb91f7dd16940d8ddedc3b5ca75547fb5 Mon Sep 17 00:00:00 2001 From: Pete Zaitcev Date: Mon, 8 Jan 2018 15:46:41 -0600 Subject: [PATCH 058/705] USB: fix usbmon BUG trigger commit 46eb14a6e1585d99c1b9f58d0e7389082a5f466b upstream. Automated tests triggered this by opening usbmon and accessing the mmap while simultaneously resizing the buffers. This bug was with us since 2006, because typically applications only size the buffers once and thus avoid racing. Reported by Kirill A. Shutemov. Reported-by: Signed-off-by: Pete Zaitcev Signed-off-by: Greg Kroah-Hartman --- drivers/usb/mon/mon_bin.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c index 1a874a1f3890..80b37d214beb 100644 --- a/drivers/usb/mon/mon_bin.c +++ b/drivers/usb/mon/mon_bin.c @@ -1002,7 +1002,9 @@ static long mon_bin_ioctl(struct file *file, unsigned int cmd, unsigned long arg break; case MON_IOCQ_RING_SIZE: + mutex_lock(&rp->fetch_lock); ret = rp->b_size; + mutex_unlock(&rp->fetch_lock); break; case MON_IOCT_RING_SIZE: @@ -1229,12 +1231,16 @@ static int mon_bin_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) unsigned long offset, chunk_idx; struct page *pageptr; + mutex_lock(&rp->fetch_lock); offset = vmf->pgoff << PAGE_SHIFT; - if (offset >= rp->b_size) + if (offset >= rp->b_size) { + mutex_unlock(&rp->fetch_lock); return VM_FAULT_SIGBUS; + } chunk_idx = offset / CHUNK_SIZE; pageptr = rp->b_vec[chunk_idx].pg; get_page(pageptr); + mutex_unlock(&rp->fetch_lock); vmf->page = pageptr; return 0; } From 6851ec74bfe44e8aa568fd00603759c4a2918752 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 22 Dec 2017 17:00:06 -0700 Subject: [PATCH 059/705] usbip: remove kernel addresses from usb device and urb debug msgs commit e1346fd87c71a1f61de1fe476ec8df1425ac931c upstream. usbip_dump_usb_device() and usbip_dump_urb() print kernel addresses. Remove kernel addresses from usb device and urb debug msgs and improve the message content. Instead of printing parent device and bus addresses, print parent device and bus names. Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/usbip_common.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c index e24b24fa0f16..2a5d3180777d 100644 --- a/drivers/usb/usbip/usbip_common.c +++ b/drivers/usb/usbip/usbip_common.c @@ -105,7 +105,7 @@ static void usbip_dump_usb_device(struct usb_device *udev) dev_dbg(dev, " devnum(%d) devpath(%s) usb speed(%s)", udev->devnum, udev->devpath, usb_speed_string(udev->speed)); - pr_debug("tt %p, ttport %d\n", udev->tt, udev->ttport); + pr_debug("tt hub ttport %d\n", udev->ttport); dev_dbg(dev, " "); for (i = 0; i < 16; i++) @@ -138,12 +138,8 @@ static void usbip_dump_usb_device(struct usb_device *udev) } pr_debug("\n"); - dev_dbg(dev, "parent %p, bus %p\n", udev->parent, udev->bus); - - dev_dbg(dev, - "descriptor %p, config %p, actconfig %p, rawdescriptors %p\n", - &udev->descriptor, udev->config, - udev->actconfig, udev->rawdescriptors); + dev_dbg(dev, "parent %s, bus %s\n", dev_name(&udev->parent->dev), + udev->bus->bus_name); dev_dbg(dev, "have_langid %d, string_langid %d\n", udev->have_langid, udev->string_langid); @@ -251,9 +247,6 @@ void usbip_dump_urb(struct urb *urb) dev = &urb->dev->dev; - dev_dbg(dev, " urb :%p\n", urb); - dev_dbg(dev, " dev :%p\n", urb->dev); - usbip_dump_usb_device(urb->dev); dev_dbg(dev, " pipe :%08x ", urb->pipe); @@ -262,11 +255,9 @@ void usbip_dump_urb(struct urb *urb) dev_dbg(dev, " status :%d\n", urb->status); dev_dbg(dev, " transfer_flags :%08X\n", urb->transfer_flags); - dev_dbg(dev, " transfer_buffer :%p\n", urb->transfer_buffer); dev_dbg(dev, " transfer_buffer_length:%d\n", urb->transfer_buffer_length); dev_dbg(dev, " actual_length :%d\n", urb->actual_length); - dev_dbg(dev, " setup_packet :%p\n", urb->setup_packet); if (urb->setup_packet && usb_pipetype(urb->pipe) == PIPE_CONTROL) usbip_dump_usb_ctrlrequest( @@ -276,8 +267,6 @@ void usbip_dump_urb(struct urb *urb) dev_dbg(dev, " number_of_packets :%d\n", urb->number_of_packets); dev_dbg(dev, " interval :%d\n", urb->interval); dev_dbg(dev, " error_count :%d\n", urb->error_count); - dev_dbg(dev, " context :%p\n", urb->context); - dev_dbg(dev, " complete :%p\n", urb->complete); } EXPORT_SYMBOL_GPL(usbip_dump_urb); From 86c8d58fc7538d0f44367f49d24568e925049c0d Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 22 Dec 2017 19:23:46 -0700 Subject: [PATCH 060/705] usbip: fix vudc_rx: harden CMD_SUBMIT path to handle malicious input commit b78d830f0049ef1966dc1e0ebd1ec2a594e2cf25 upstream. Harden CMD_SUBMIT path to handle malicious input that could trigger large memory allocations. Add checks to validate transfer_buffer_length and number_of_packets to protect against bad input requesting for unbounded memory allocations. Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vudc_rx.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/usb/usbip/vudc_rx.c b/drivers/usb/usbip/vudc_rx.c index e429b59f6f8a..d020e72b3122 100644 --- a/drivers/usb/usbip/vudc_rx.c +++ b/drivers/usb/usbip/vudc_rx.c @@ -132,6 +132,25 @@ static int v_recv_cmd_submit(struct vudc *udc, urb_p->new = 1; urb_p->seqnum = pdu->base.seqnum; + if (urb_p->ep->type == USB_ENDPOINT_XFER_ISOC) { + /* validate packet size and number of packets */ + unsigned int maxp, packets, bytes; + + maxp = usb_endpoint_maxp(urb_p->ep->desc); + maxp *= usb_endpoint_maxp_mult(urb_p->ep->desc); + bytes = pdu->u.cmd_submit.transfer_buffer_length; + packets = DIV_ROUND_UP(bytes, maxp); + + if (pdu->u.cmd_submit.number_of_packets < 0 || + pdu->u.cmd_submit.number_of_packets > packets) { + dev_err(&udc->gadget.dev, + "CMD_SUBMIT: isoc invalid num packets %d\n", + pdu->u.cmd_submit.number_of_packets); + ret = -EMSGSIZE; + goto free_urbp; + } + } + ret = alloc_urb_from_cmd(&urb_p->urb, pdu, urb_p->ep->type); if (ret) { usbip_event_add(&udc->ud, VUDC_EVENT_ERROR_MALLOC); From 8ab8c6e6607a14ebd54319535b17e384d2872df9 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 22 Dec 2017 19:23:47 -0700 Subject: [PATCH 061/705] usbip: vudc_tx: fix v_send_ret_submit() vulnerability to null xfer buffer commit 5fd77a3a0e408c23ab4002a57db980e46bc16e72 upstream. v_send_ret_submit() handles urb with a null transfer_buffer, when it replays a packet with potential malicious data that could contain a null buffer. Add a check for the condition when actual_length > 0 and transfer_buffer is null. Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vudc_tx.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/usb/usbip/vudc_tx.c b/drivers/usb/usbip/vudc_tx.c index 234661782fa0..3ab4c86486a7 100644 --- a/drivers/usb/usbip/vudc_tx.c +++ b/drivers/usb/usbip/vudc_tx.c @@ -97,6 +97,13 @@ static int v_send_ret_submit(struct vudc *udc, struct urbp *urb_p) memset(&pdu_header, 0, sizeof(pdu_header)); memset(&msg, 0, sizeof(msg)); + if (urb->actual_length > 0 && !urb->transfer_buffer) { + dev_err(&udc->gadget.dev, + "urb: actual_length %d transfer_buffer null\n", + urb->actual_length); + return -1; + } + if (urb_p->type == USB_ENDPOINT_XFER_ISOC) iovnum = 2 + urb->number_of_packets; else @@ -112,8 +119,8 @@ static int v_send_ret_submit(struct vudc *udc, struct urbp *urb_p) /* 1. setup usbip_header */ setup_ret_submit_pdu(&pdu_header, urb_p); - usbip_dbg_stub_tx("setup txdata seqnum: %d urb: %p\n", - pdu_header.base.seqnum, urb); + usbip_dbg_stub_tx("setup txdata seqnum: %d\n", + pdu_header.base.seqnum); usbip_header_correct_endian(&pdu_header, 1); iov[iovnum].iov_base = &pdu_header; From c51d23dffc2e9ca05d611c86c440f9055541c62d Mon Sep 17 00:00:00 2001 From: Viktor Slavkovic Date: Mon, 8 Jan 2018 10:43:03 -0800 Subject: [PATCH 062/705] staging: android: ashmem: fix a race condition in ASHMEM_SET_SIZE ioctl commit 443064cb0b1fb4569fe0a71209da7625129fb760 upstream. A lock-unlock is missing in ASHMEM_SET_SIZE ioctl which can result in a race condition when mmap is called. After the !asma->file check, before setting asma->size, asma->file can be set in mmap. That would result in having different asma->size than the mapped memory size. Combined with ASHMEM_UNPIN ioctl and shrinker invocation, this can result in memory corruption. Signed-off-by: Viktor Slavkovic Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ashmem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 2b770cb0c488..558a66b459fa 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -774,10 +774,12 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg) break; case ASHMEM_SET_SIZE: ret = -EINVAL; + mutex_lock(&ashmem_mutex); if (!asma->file) { ret = 0; asma->size = (size_t)arg; } + mutex_unlock(&ashmem_mutex); break; case ASHMEM_GET_SIZE: ret = asma->size; From 6aebc2670ebfdda0762a6b471fbf8ca18dcf44f2 Mon Sep 17 00:00:00 2001 From: Ben Seri Date: Fri, 8 Dec 2017 15:14:47 +0100 Subject: [PATCH 063/705] Bluetooth: Prevent stack info leak from the EFS element. commit 06e7e776ca4d36547e503279aeff996cbb292c16 upstream. In the function l2cap_parse_conf_rsp and in the function l2cap_parse_conf_req the following variable is declared without initialization: struct l2cap_conf_efs efs; In addition, when parsing input configuration parameters in both of these functions, the switch case for handling EFS elements may skip the memcpy call that will write to the efs variable: ... case L2CAP_CONF_EFS: if (olen == sizeof(efs)) memcpy(&efs, (void *)val, olen); ... The olen in the above if is attacker controlled, and regardless of that if, in both of these functions the efs variable would eventually be added to the outgoing configuration request that is being built: l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), (unsigned long) &efs); So by sending a configuration request, or response, that contains an L2CAP_CONF_EFS element, but with an element length that is not sizeof(efs) - the memcpy to the uninitialized efs variable can be avoided, and the uninitialized variable would be returned to the attacker (16 bytes). This issue has been assigned CVE-2017-1000410 Cc: Marcel Holtmann Cc: Gustavo Padovan Cc: Johan Hedberg Signed-off-by: Ben Seri Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/l2cap_core.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index ffd09c1675d4..2bbca23a9d05 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -3353,9 +3353,10 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data break; case L2CAP_CONF_EFS: - remote_efs = 1; - if (olen == sizeof(efs)) + if (olen == sizeof(efs)) { + remote_efs = 1; memcpy(&efs, (void *) val, olen); + } break; case L2CAP_CONF_EWS: @@ -3574,16 +3575,17 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, break; case L2CAP_CONF_EFS: - if (olen == sizeof(efs)) + if (olen == sizeof(efs)) { memcpy(&efs, (void *)val, olen); - if (chan->local_stype != L2CAP_SERV_NOTRAFIC && - efs.stype != L2CAP_SERV_NOTRAFIC && - efs.stype != chan->local_stype) - return -ECONNREFUSED; + if (chan->local_stype != L2CAP_SERV_NOTRAFIC && + efs.stype != L2CAP_SERV_NOTRAFIC && + efs.stype != chan->local_stype) + return -ECONNREFUSED; - l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), - (unsigned long) &efs, endptr - ptr); + l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), + (unsigned long) &efs, endptr - ptr); + } break; case L2CAP_CONF_FCS: From 3ba5d3a2cf40c4ebdc1f702af3b5dea405a6a11e Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Sat, 6 Jan 2018 00:56:44 +0800 Subject: [PATCH 064/705] uas: ignore UAS for Norelsys NS1068(X) chips commit 928afc85270753657b5543e052cc270c279a3fe9 upstream. The UAS mode of Norelsys NS1068(X) is reported to fail to work on several platforms with the following error message: xhci-hcd xhci-hcd.0.auto: ERROR Transfer event for unknown stream ring slot 1 ep 8 xhci-hcd xhci-hcd.0.auto: @00000000bf04a400 00000000 00000000 1b000000 01098001 And when trying to mount a partition on the disk the disk will disconnect from the USB controller, then after re-connecting the device will be offlined and not working at all. Falling back to USB mass storage can solve this problem, so ignore UAS function of this chip. Signed-off-by: Icenowy Zheng Acked-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_uas.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index 9f356f7cf7d5..719ec68ae309 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -156,6 +156,13 @@ UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_ATA_1X), +/* Reported-by: Icenowy Zheng */ +UNUSUAL_DEV(0x2537, 0x1068, 0x0000, 0x9999, + "Norelsys", + "NS1068X", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_IGNORE_UAS), + /* Reported-by: Takeo Nakayama */ UNUSUAL_DEV(0x357d, 0x7788, 0x0000, 0x9999, "JMicron", From d598347989aa8873cebfd7d7b4d29d7aba21b878 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Mon, 11 Dec 2017 16:26:40 +0900 Subject: [PATCH 065/705] e1000e: Fix e1000_check_for_copper_link_ich8lan return value. commit 4110e02eb45ea447ec6f5459c9934de0a273fb91 upstream. e1000e_check_for_copper_link() and e1000_check_for_copper_link_ich8lan() are the two functions that may be assigned to mac.ops.check_for_link when phy.media_type == e1000_media_type_copper. Commit 19110cfbb34d ("e1000e: Separate signaling for link check/link up") changed the meaning of the return value of check_for_link for copper media but only adjusted the first function. This patch adjusts the second function likewise. Reported-by: Christian Hesse Reported-by: Gabriel C Link: https://bugzilla.kernel.org/show_bug.cgi?id=198047 Fixes: 19110cfbb34d ("e1000e: Separate signaling for link check/link up") Signed-off-by: Benjamin Poirier Tested-by: Aaron Brown Tested-by: Christian Hesse Signed-off-by: Jeff Kirsher Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index f3aaca743ea3..8a48656a376b 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1364,6 +1364,9 @@ out: * Checks to see of the link status of the hardware has changed. If a * change in link status has been detected, then we read the PHY registers * to get the current speed/duplex if link exists. + * + * Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link + * up). **/ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) { @@ -1379,7 +1382,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * Change or Rx Sequence Error interrupt. */ if (!mac->get_link_status) - return 0; + return 1; /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex @@ -1611,10 +1614,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * different link partner. */ ret_val = e1000e_config_fc_after_link_up(hw); - if (ret_val) + if (ret_val) { e_dbg("Error configuring flow control\n"); + return ret_val; + } - return ret_val; + return 1; } static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter) From 4e6c2af2ba93ee8709695835920fc57148e4b397 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 5 Jan 2018 09:44:36 -0800 Subject: [PATCH 066/705] x86/Documentation: Add PTI description commit 01c9b17bf673b05bb401b76ec763e9730ccf1376 upstream. Add some details about how PTI works, what some of the downsides are, and how to debug it when things go wrong. Also document the kernel parameter: 'pti/nopti'. Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Reviewed-by: Randy Dunlap Reviewed-by: Kees Cook Cc: Moritz Lipp Cc: Daniel Gruss Cc: Michael Schwarz Cc: Richard Fellner Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Hugh Dickins Cc: Andi Lutomirsky Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180105174436.1BC6FA2B@viggo.jf.intel.com Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 21 ++-- Documentation/x86/pti.txt | 186 ++++++++++++++++++++++++++++ 2 files changed, 200 insertions(+), 7 deletions(-) create mode 100644 Documentation/x86/pti.txt diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 5d2676d043de..2b1d782eda6f 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2763,8 +2763,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nojitter [IA-64] Disables jitter checking for ITC timers. - nopti [X86-64] Disable KAISER isolation of kernel from user. - no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page @@ -3327,11 +3325,20 @@ bytes respectively. Such letter suffixes can also be entirely omitted. pt. [PARIDE] See Documentation/blockdev/paride.txt. - pti= [X86_64] - Control KAISER user/kernel address space isolation: - on - enable - off - disable - auto - default setting + pti= [X86_64] Control Page Table Isolation of user and + kernel address spaces. Disabling this feature + removes hardening, but improves performance of + system calls and interrupts. + + on - unconditionally enable + off - unconditionally disable + auto - kernel detects whether your CPU model is + vulnerable to issues that PTI mitigates + + Not specifying this option is equivalent to pti=auto. + + nopti [X86_64] + Equivalent to pti=off pty.legacy_count= [KNL] Number of legacy pty's. Overwrites compiled-in diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt new file mode 100644 index 000000000000..d11eff61fc9a --- /dev/null +++ b/Documentation/x86/pti.txt @@ -0,0 +1,186 @@ +Overview +======== + +Page Table Isolation (pti, previously known as KAISER[1]) is a +countermeasure against attacks on the shared user/kernel address +space such as the "Meltdown" approach[2]. + +To mitigate this class of attacks, we create an independent set of +page tables for use only when running userspace applications. When +the kernel is entered via syscalls, interrupts or exceptions, the +page tables are switched to the full "kernel" copy. When the system +switches back to user mode, the user copy is used again. + +The userspace page tables contain only a minimal amount of kernel +data: only what is needed to enter/exit the kernel such as the +entry/exit functions themselves and the interrupt descriptor table +(IDT). There are a few strictly unnecessary things that get mapped +such as the first C function when entering an interrupt (see +comments in pti.c). + +This approach helps to ensure that side-channel attacks leveraging +the paging structures do not function when PTI is enabled. It can be +enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time. +Once enabled at compile-time, it can be disabled at boot with the +'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt). + +Page Table Management +===================== + +When PTI is enabled, the kernel manages two sets of page tables. +The first set is very similar to the single set which is present in +kernels without PTI. This includes a complete mapping of userspace +that the kernel can use for things like copy_to_user(). + +Although _complete_, the user portion of the kernel page tables is +crippled by setting the NX bit in the top level. This ensures +that any missed kernel->user CR3 switch will immediately crash +userspace upon executing its first instruction. + +The userspace page tables map only the kernel data needed to enter +and exit the kernel. This data is entirely contained in the 'struct +cpu_entry_area' structure which is placed in the fixmap which gives +each CPU's copy of the area a compile-time-fixed virtual address. + +For new userspace mappings, the kernel makes the entries in its +page tables like normal. The only difference is when the kernel +makes entries in the top (PGD) level. In addition to setting the +entry in the main kernel PGD, a copy of the entry is made in the +userspace page tables' PGD. + +This sharing at the PGD level also inherently shares all the lower +layers of the page tables. This leaves a single, shared set of +userspace page tables to manage. One PTE to lock, one set of +accessed bits, dirty bits, etc... + +Overhead +======== + +Protection against side-channel attacks is important. But, +this protection comes at a cost: + +1. Increased Memory Use + a. Each process now needs an order-1 PGD instead of order-0. + (Consumes an additional 4k per process). + b. The 'cpu_entry_area' structure must be 2MB in size and 2MB + aligned so that it can be mapped by setting a single PMD + entry. This consumes nearly 2MB of RAM once the kernel + is decompressed, but no space in the kernel image itself. + +2. Runtime Cost + a. CR3 manipulation to switch between the page table copies + must be done at interrupt, syscall, and exception entry + and exit (it can be skipped when the kernel is interrupted, + though.) Moves to CR3 are on the order of a hundred + cycles, and are required at every entry and exit. + b. A "trampoline" must be used for SYSCALL entry. This + trampoline depends on a smaller set of resources than the + non-PTI SYSCALL entry code, so requires mapping fewer + things into the userspace page tables. The downside is + that stacks must be switched at entry time. + d. Global pages are disabled for all kernel structures not + mapped into both kernel and userspace page tables. This + feature of the MMU allows different processes to share TLB + entries mapping the kernel. Losing the feature means more + TLB misses after a context switch. The actual loss of + performance is very small, however, never exceeding 1%. + d. Process Context IDentifiers (PCID) is a CPU feature that + allows us to skip flushing the entire TLB when switching page + tables by setting a special bit in CR3 when the page tables + are changed. This makes switching the page tables (at context + switch, or kernel entry/exit) cheaper. But, on systems with + PCID support, the context switch code must flush both the user + and kernel entries out of the TLB. The user PCID TLB flush is + deferred until the exit to userspace, minimizing the cost. + See intel.com/sdm for the gory PCID/INVPCID details. + e. The userspace page tables must be populated for each new + process. Even without PTI, the shared kernel mappings + are created by copying top-level (PGD) entries into each + new process. But, with PTI, there are now *two* kernel + mappings: one in the kernel page tables that maps everything + and one for the entry/exit structures. At fork(), we need to + copy both. + f. In addition to the fork()-time copying, there must also + be an update to the userspace PGD any time a set_pgd() is done + on a PGD used to map userspace. This ensures that the kernel + and userspace copies always map the same userspace + memory. + g. On systems without PCID support, each CR3 write flushes + the entire TLB. That means that each syscall, interrupt + or exception flushes the TLB. + h. INVPCID is a TLB-flushing instruction which allows flushing + of TLB entries for non-current PCIDs. Some systems support + PCIDs, but do not support INVPCID. On these systems, addresses + can only be flushed from the TLB for the current PCID. When + flushing a kernel address, we need to flush all PCIDs, so a + single kernel address flush will require a TLB-flushing CR3 + write upon the next use of every PCID. + +Possible Future Work +==================== +1. We can be more careful about not actually writing to CR3 + unless its value is actually changed. +2. Allow PTI to be enabled/disabled at runtime in addition to the + boot-time switching. + +Testing +======== + +To test stability of PTI, the following test procedure is recommended, +ideally doing all of these in parallel: + +1. Set CONFIG_DEBUG_ENTRY=y +2. Run several copies of all of the tools/testing/selftests/x86/ tests + (excluding MPX and protection_keys) in a loop on multiple CPUs for + several minutes. These tests frequently uncover corner cases in the + kernel entry code. In general, old kernels might cause these tests + themselves to crash, but they should never crash the kernel. +3. Run the 'perf' tool in a mode (top or record) that generates many + frequent performance monitoring non-maskable interrupts (see "NMI" + in /proc/interrupts). This exercises the NMI entry/exit code which + is known to trigger bugs in code paths that did not expect to be + interrupted, including nested NMIs. Using "-c" boosts the rate of + NMIs, and using two -c with separate counters encourages nested NMIs + and less deterministic behavior. + + while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done + +4. Launch a KVM virtual machine. +5. Run 32-bit binaries on systems supporting the SYSCALL instruction. + This has been a lightly-tested code path and needs extra scrutiny. + +Debugging +========= + +Bugs in PTI cause a few different signatures of crashes +that are worth noting here. + + * Failures of the selftests/x86 code. Usually a bug in one of the + more obscure corners of entry_64.S + * Crashes in early boot, especially around CPU bringup. Bugs + in the trampoline code or mappings cause these. + * Crashes at the first interrupt. Caused by bugs in entry_64.S, + like screwing up a page table switch. Also caused by + incorrectly mapping the IRQ handler entry code. + * Crashes at the first NMI. The NMI code is separate from main + interrupt handlers and can have bugs that do not affect + normal interrupts. Also caused by incorrectly mapping NMI + code. NMIs that interrupt the entry code must be very + careful and can be the cause of crashes that show up when + running perf. + * Kernel crashes at the first exit to userspace. entry_64.S + bugs, or failing to map some of the exit code. + * Crashes at first interrupt that interrupts userspace. The paths + in entry_64.S that return to userspace are sometimes separate + from the ones that return to the kernel. + * Double faults: overflowing the kernel stack because of page + faults upon page faults. Caused by touching non-pti-mapped + data in the entry code, or forgetting to switch to kernel + CR3 before calling into C functions which are not pti-mapped. + * Userspace segfaults early in boot, sometimes manifesting + as mount(8) failing to mount the rootfs. These have + tended to be TLB invalidation issues. Usually invalidating + the wrong PCID, or otherwise missing an invalidation. + +1. https://gruss.cc/files/kaiser.pdf +2. https://meltdownattack.com/meltdown.pdf From ef463981018e29a7d880556181335a1853c3abf5 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 18 Jan 2017 11:15:38 -0800 Subject: [PATCH 067/705] x86/cpu: Factor out application of forced CPU caps commit 8bf1ebca215c262e48c15a4a15f175991776f57f upstream. There are multiple call sites that apply forced CPU caps. Factor them into a helper. Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Matthew Whitehead Cc: Oleg Nesterov Cc: One Thousand Gnomes Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/623ff7555488122143e4417de09b18be2085ad06.1484705016.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 918e44772b04..4c652255bd06 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -706,6 +706,16 @@ void cpu_detect(struct cpuinfo_x86 *c) } } +static void apply_forced_caps(struct cpuinfo_x86 *c) +{ + int i; + + for (i = 0; i < NCAPINTS; i++) { + c->x86_capability[i] &= ~cpu_caps_cleared[i]; + c->x86_capability[i] |= cpu_caps_set[i]; + } +} + void get_cpu_cap(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; @@ -1086,10 +1096,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) this_cpu->c_identify(c); /* Clear/Set all flags overridden by options, after probe */ - for (i = 0; i < NCAPINTS; i++) { - c->x86_capability[i] &= ~cpu_caps_cleared[i]; - c->x86_capability[i] |= cpu_caps_set[i]; - } + apply_forced_caps(c); #ifdef CONFIG_X86_64 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); @@ -1151,10 +1158,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) * Clear/Set all flags overridden by options, need do it * before following smp all cpus cap AND. */ - for (i = 0; i < NCAPINTS; i++) { - c->x86_capability[i] &= ~cpu_caps_cleared[i]; - c->x86_capability[i] |= cpu_caps_set[i]; - } + apply_forced_caps(c); /* * On SMP, boot_cpu_data holds the common feature set between From c2cacde516a4cf675f63d84aad7cf1e14a093f4c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Dec 2017 15:07:32 +0100 Subject: [PATCH 068/705] x86/cpufeatures: Make CPU bugs sticky commit 6cbd2171e89b13377261d15e64384df60ecb530e upstream. There is currently no way to force CPU bug bits like CPU feature bits. That makes it impossible to set a bug bit once at boot and have it stick for all upcoming CPUs. Extend the force set/clear arrays to handle bug bits as well. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150606.992156574@linutronix.de Signed-off-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeature.h | 2 ++ arch/x86/include/asm/processor.h | 4 ++-- arch/x86/kernel/cpu/common.c | 6 +++--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 1d2b69fc0ceb..9ea67a04ff4f 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -135,6 +135,8 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; set_bit(bit, (unsigned long *)cpu_caps_set); \ } while (0) +#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) + #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS) /* * Static testing of CPU features. Used the same as boot_cpu_has(). diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 8cb52ee3ade6..e40b19ca486e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -156,8 +156,8 @@ extern struct cpuinfo_x86 boot_cpu_data; extern struct cpuinfo_x86 new_cpu_data; extern struct tss_struct doublefault_tss; -extern __u32 cpu_caps_cleared[NCAPINTS]; -extern __u32 cpu_caps_set[NCAPINTS]; +extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; +extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS]; #ifdef CONFIG_SMP DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4c652255bd06..ba9b6014a426 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -480,8 +480,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c) return NULL; /* Not found */ } -__u32 cpu_caps_cleared[NCAPINTS]; -__u32 cpu_caps_set[NCAPINTS]; +__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; +__u32 cpu_caps_set[NCAPINTS + NBUGINTS]; void load_percpu_segment(int cpu) { @@ -710,7 +710,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c) { int i; - for (i = 0; i < NCAPINTS; i++) { + for (i = 0; i < NCAPINTS + NBUGINTS; i++) { c->x86_capability[i] &= ~cpu_caps_cleared[i]; c->x86_capability[i] |= cpu_caps_set[i]; } From d88f601b9ac9d2b819bb9d947b272d1cf1c36665 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Dec 2017 15:07:33 +0100 Subject: [PATCH 069/705] x86/cpufeatures: Add X86_BUG_CPU_INSECURE commit a89f040fa34ec9cd682aed98b8f04e3c47d998bd upstream. Many x86 CPUs leak information to user space due to missing isolation of user space and kernel space page tables. There are many well documented ways to exploit that. The upcoming software migitation of isolating the user and kernel space page tables needs a misfeature flag so code can be made runtime conditional. Add the BUG bits which indicates that the CPU is affected and add a feature bit which indicates that the software migitation is enabled. Assume for now that _ALL_ x86 CPUs are affected by this. Exceptions can be made later. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/common.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 454a37adb823..57bd52cd2a2c 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -316,5 +316,6 @@ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ +#define X86_BUG_CPU_INSECURE X86_BUG(14) /* CPU is insecure and needs kernel page table isolation */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ba9b6014a426..8c81adc0b926 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -882,6 +882,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) } setup_force_cpu_cap(X86_FEATURE_ALWAYS); + + /* Assume for now that ALL x86 CPUs are insecure */ + setup_force_cpu_bug(X86_BUG_CPU_INSECURE); + fpu__init_system(c); } From 43fe95308d276bdfd133f5951cc25565e39982ec Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 5 Jan 2018 15:27:34 +0100 Subject: [PATCH 070/705] x86/pti: Rename BUG_CPU_INSECURE to BUG_CPU_MELTDOWN commit de791821c295cc61419a06fe5562288417d1bc58 upstream. Use the name associated with the particular attack which needs page table isolation for mitigation. Signed-off-by: Thomas Gleixner Acked-by: David Woodhouse Cc: Alan Cox Cc: Jiri Koshina Cc: Linus Torvalds Cc: Tim Chen Cc: Andi Lutomirski Cc: Andi Kleen Cc: Peter Zijlstra Cc: Paul Turner Cc: Tom Lendacky Cc: Greg KH Cc: Dave Hansen Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801051525300.1724@nanos Signed-off-by: Razvan Ghitulete Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/kernel/cpu/common.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 57bd52cd2a2c..985dfd7b3c31 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -316,6 +316,6 @@ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ -#define X86_BUG_CPU_INSECURE X86_BUG(14) /* CPU is insecure and needs kernel page table isolation */ +#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8c81adc0b926..5ab4fd7f343f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -884,7 +884,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_cap(X86_FEATURE_ALWAYS); /* Assume for now that ALL x86 CPUs are insecure */ - setup_force_cpu_bug(X86_BUG_CPU_INSECURE); + setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); fpu__init_system(c); } From 26323fb4d717e11a69484c6df02eeef90dba7ef2 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 6 Jan 2018 11:49:23 +0000 Subject: [PATCH 071/705] x86/cpufeatures: Add X86_BUG_SPECTRE_V[12] commit 99c6fa2511d8a683e61468be91b83f85452115fa upstream. Add the bug bits for spectre v1/2 and force them unconditionally for all cpus. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1515239374-23361-2-git-send-email-dwmw@amazon.co.uk Signed-off-by: Razvan Ghitulete Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 2 ++ arch/x86/kernel/cpu/common.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 985dfd7b3c31..f364c8919eb8 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -317,5 +317,7 @@ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ #define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ +#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ +#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 5ab4fd7f343f..8339b4363fb9 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -886,6 +886,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) /* Assume for now that ALL x86 CPUs are insecure */ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); + setup_force_cpu_bug(X86_BUG_SPECTRE_V1); + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + fpu__init_system(c); } From 56eff367e071981bed4a75428993ad896baa69eb Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 24 Oct 2016 19:38:43 +0200 Subject: [PATCH 072/705] x86/cpu: Merge bugs.c and bugs_64.c commit 62a67e123e058a67db58bc6a14354dd037bafd0a upstream. Should be easier when following boot paths. It probably is a left over from the x86 unification eons ago. No functionality change. Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20161024173844.23038-3-bp@alien8.de Signed-off-by: Ingo Molnar Signed-off-by: Razvan Ghitulete Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/Makefile | 4 +--- arch/x86/kernel/cpu/bugs.c | 26 ++++++++++++++++++++++---- arch/x86/kernel/cpu/bugs_64.c | 33 --------------------------------- 3 files changed, 23 insertions(+), 40 deletions(-) delete mode 100644 arch/x86/kernel/cpu/bugs_64.c diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 4a8697f7d4ef..33b63670bf09 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -20,13 +20,11 @@ obj-y := intel_cacheinfo.o scattered.o topology.o obj-y += common.o obj-y += rdrand.o obj-y += match.o +obj-y += bugs.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o -obj-$(CONFIG_X86_32) += bugs.o -obj-$(CONFIG_X86_64) += bugs_64.o - obj-$(CONFIG_CPU_SUP_INTEL) += intel.o obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 0b6124315441..5d82f2ca4acf 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -16,6 +16,8 @@ #include #include #include +#include +#include void __init check_bugs(void) { @@ -28,11 +30,13 @@ void __init check_bugs(void) #endif identify_boot_cpu(); -#ifndef CONFIG_SMP - pr_info("CPU: "); - print_cpu_info(&boot_cpu_data); -#endif + if (!IS_ENABLED(CONFIG_SMP)) { + pr_info("CPU: "); + print_cpu_info(&boot_cpu_data); + } + +#ifdef CONFIG_X86_32 /* * Check whether we are able to run this kernel safely on SMP. * @@ -48,4 +52,18 @@ void __init check_bugs(void) alternative_instructions(); fpu__init_check_bugs(); +#else /* CONFIG_X86_64 */ + alternative_instructions(); + + /* + * Make sure the first 2MB area is not mapped by huge pages + * There are typically fixed size MTRRs in there and overlapping + * MTRRs into large pages causes slow downs. + * + * Right now we don't do that with gbpages because there seems + * very little benefit for that case. + */ + if (!direct_gbpages) + set_memory_4k((unsigned long)__va(0), 1); +#endif } diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c deleted file mode 100644 index a972ac4c7e7d..000000000000 --- a/arch/x86/kernel/cpu/bugs_64.c +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (C) 1994 Linus Torvalds - * Copyright (C) 2000 SuSE - */ - -#include -#include -#include -#include -#include -#include -#include - -void __init check_bugs(void) -{ - identify_boot_cpu(); -#if !defined(CONFIG_SMP) - pr_info("CPU: "); - print_cpu_info(&boot_cpu_data); -#endif - alternative_instructions(); - - /* - * Make sure the first 2MB area is not mapped by huge pages - * There are typically fixed size MTRRs in there and overlapping - * MTRRs into large pages causes slow downs. - * - * Right now we don't do that with gbpages because there seems - * very little benefit for that case. - */ - if (!direct_gbpages) - set_memory_4k((unsigned long)__va(0), 1); -} From 11ec2df9c02071a7c0a63a1febb53e76cdee56ac Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 7 Jan 2018 22:48:00 +0100 Subject: [PATCH 073/705] sysfs/cpu: Add vulnerability folder commit 87590ce6e373d1a5401f6539f0c59ef92dd924a9 upstream. As the meltdown/spectre problem affects several CPU architectures, it makes sense to have common way to express whether a system is affected by a particular vulnerability or not. If affected the way to express the mitigation should be common as well. Create /sys/devices/system/cpu/vulnerabilities folder and files for meltdown, spectre_v1 and spectre_v2. Allow architectures to override the show function. Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Konrad Rzeszutek Wilk Cc: Peter Zijlstra Cc: Will Deacon Cc: Dave Hansen Cc: Linus Torvalds Cc: Borislav Petkov Cc: David Woodhouse Link: https://lkml.kernel.org/r/20180107214913.096657732@linutronix.de Signed-off-by: Greg Kroah-Hartman --- .../ABI/testing/sysfs-devices-system-cpu | 16 +++++++ drivers/base/Kconfig | 3 ++ drivers/base/cpu.c | 48 +++++++++++++++++++ include/linux/cpu.h | 7 +++ 4 files changed, 74 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 498741737055..8b30a48b4c5e 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -350,3 +350,19 @@ Contact: Linux ARM Kernel Mailing list Description: AArch64 CPU registers 'identification' directory exposes the CPU ID registers for identifying model and revision of the CPU. + +What: /sys/devices/system/cpu/vulnerabilities + /sys/devices/system/cpu/vulnerabilities/meltdown + /sys/devices/system/cpu/vulnerabilities/spectre_v1 + /sys/devices/system/cpu/vulnerabilities/spectre_v2 +Date: Januar 2018 +Contact: Linux kernel mailing list +Description: Information about CPU vulnerabilities + + The files are named after the code names of CPU + vulnerabilities. The output of those files reflects the + state of the CPUs in the system. Possible output values: + + "Not affected" CPU is not affected by the vulnerability + "Vulnerable" CPU is affected and no mitigation in effect + "Mitigation: $M" CPU is affetcted and mitigation $M is in effect diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index d02e7c0f5bfd..0651010bba21 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -235,6 +235,9 @@ config GENERIC_CPU_DEVICES config GENERIC_CPU_AUTOPROBE bool +config GENERIC_CPU_VULNERABILITIES + bool + config SOC_BUS bool diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 4c28e1a09786..56b6c8508a89 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -499,10 +499,58 @@ static void __init cpu_dev_register_generic(void) #endif } +#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES + +ssize_t __weak cpu_show_meltdown(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Not affected\n"); +} + +ssize_t __weak cpu_show_spectre_v1(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Not affected\n"); +} + +ssize_t __weak cpu_show_spectre_v2(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Not affected\n"); +} + +static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); +static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); +static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); + +static struct attribute *cpu_root_vulnerabilities_attrs[] = { + &dev_attr_meltdown.attr, + &dev_attr_spectre_v1.attr, + &dev_attr_spectre_v2.attr, + NULL +}; + +static const struct attribute_group cpu_root_vulnerabilities_group = { + .name = "vulnerabilities", + .attrs = cpu_root_vulnerabilities_attrs, +}; + +static void __init cpu_register_vulnerabilities(void) +{ + if (sysfs_create_group(&cpu_subsys.dev_root->kobj, + &cpu_root_vulnerabilities_group)) + pr_err("Unable to register CPU vulnerabilities\n"); +} + +#else +static inline void cpu_register_vulnerabilities(void) { } +#endif + void __init cpu_dev_init(void) { if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups)) panic("Failed to register CPU subsystem"); cpu_dev_register_generic(); + cpu_register_vulnerabilities(); } diff --git a/include/linux/cpu.h b/include/linux/cpu.h index e571128ad99a..2f475ad89a0d 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -44,6 +44,13 @@ extern void cpu_remove_dev_attr(struct device_attribute *attr); extern int cpu_add_dev_attr_group(struct attribute_group *attrs); extern void cpu_remove_dev_attr_group(struct attribute_group *attrs); +extern ssize_t cpu_show_meltdown(struct device *dev, + struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_spectre_v1(struct device *dev, + struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_spectre_v2(struct device *dev, + struct device_attribute *attr, char *buf); + extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, const struct attribute_group **groups, From 45a98824bd79b1cf969beadb6288438b66082f17 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 7 Jan 2018 22:48:01 +0100 Subject: [PATCH 074/705] x86/cpu: Implement CPU vulnerabilites sysfs functions commit 61dc0f555b5c761cdafb0ba5bd41ecf22d68a4c4 upstream. Implement the CPU vulnerabilty show functions for meltdown, spectre_v1 and spectre_v2. Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Konrad Rzeszutek Wilk Cc: Peter Zijlstra Cc: Will Deacon Cc: Dave Hansen Cc: Linus Torvalds Cc: Borislav Petkov Cc: David Woodhouse Link: https://lkml.kernel.org/r/20180107214913.177414879@linutronix.de Signed-off-by: Razvan Ghitulete Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 1 + arch/x86/kernel/cpu/bugs.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index da8156fd3d58..447edeb0cfcb 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -64,6 +64,7 @@ config X86 select GENERIC_CLOCKEVENTS_MIN_ADJUST select GENERIC_CMOS_UPDATE select GENERIC_CPU_AUTOPROBE + select GENERIC_CPU_VULNERABILITIES select GENERIC_EARLY_IOREMAP select GENERIC_FIND_FIRST_BIT select GENERIC_IOMAP diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 5d82f2ca4acf..cd46f9039119 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -9,6 +9,7 @@ */ #include #include +#include #include #include #include @@ -67,3 +68,31 @@ void __init check_bugs(void) set_memory_4k((unsigned long)__va(0), 1); #endif } + +#ifdef CONFIG_SYSFS +ssize_t cpu_show_meltdown(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) + return sprintf(buf, "Not affected\n"); + if (boot_cpu_has(X86_FEATURE_KAISER)) + return sprintf(buf, "Mitigation: PTI\n"); + return sprintf(buf, "Vulnerable\n"); +} + +ssize_t cpu_show_spectre_v1(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) + return sprintf(buf, "Not affected\n"); + return sprintf(buf, "Vulnerable\n"); +} + +ssize_t cpu_show_spectre_v2(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + return sprintf(buf, "Not affected\n"); + return sprintf(buf, "Vulnerable\n"); +} +#endif From abcc3e5f0079b850dc4e343f53de1476ac6f5e5c Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 8 Jan 2018 16:09:21 -0600 Subject: [PATCH 075/705] x86/cpu/AMD: Make LFENCE a serializing instruction commit e4d0e84e490790798691aaa0f2e598637f1867ec upstream. To aid in speculation control, make LFENCE a serializing instruction since it has less overhead than MFENCE. This is done by setting bit 1 of MSR 0xc0011029 (DE_CFG). Some families that support LFENCE do not have this MSR. For these families, the LFENCE instruction is already serializing. Signed-off-by: Tom Lendacky Signed-off-by: Thomas Gleixner Reviewed-by: Reviewed-by: Borislav Petkov Cc: Peter Zijlstra Cc: Tim Chen Cc: Dave Hansen Cc: Borislav Petkov Cc: Dan Williams Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: David Woodhouse Cc: Paul Turner Link: https://lkml.kernel.org/r/20180108220921.12580.71694.stgit@tlendack-t1.amdoffice.net Signed-off-by: Razvan Ghitulete Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 2 ++ arch/x86/kernel/cpu/amd.c | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b601ddac5719..29a554e5dff6 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -330,6 +330,8 @@ #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL #define FAM10H_MMIO_CONF_BASE_SHIFT 20 #define MSR_FAM10H_NODE_ID 0xc001100c +#define MSR_F10H_DECFG 0xc0011029 +#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 2b4cf04239b6..8b5b19d38182 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -782,6 +782,16 @@ static void init_amd(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_K8); if (cpu_has(c, X86_FEATURE_XMM2)) { + /* + * A serializing LFENCE has less overhead than MFENCE, so + * use it for execution serialization. On families which + * don't have that MSR, LFENCE is already serializing. + * msr_set_bit() uses the safe accessors, too, even if the MSR + * is not present. + */ + msr_set_bit(MSR_F10H_DECFG, + MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); + /* MFENCE stops RDTSC speculation */ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); } From 9c5e750c8e84bea3cacd652a8b20fbdd92998ecc Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 8 Jan 2018 16:09:32 -0600 Subject: [PATCH 076/705] x86/cpu/AMD: Use LFENCE_RDTSC in preference to MFENCE_RDTSC commit 9c6a73c75864ad9fa49e5fa6513e4c4071c0e29f upstream. With LFENCE now a serializing instruction, use LFENCE_RDTSC in preference to MFENCE_RDTSC. However, since the kernel could be running under a hypervisor that does not support writing that MSR, read the MSR back and verify that the bit has been set successfully. If the MSR can be read and the bit is set, then set the LFENCE_RDTSC feature, otherwise set the MFENCE_RDTSC feature. Signed-off-by: Tom Lendacky Signed-off-by: Thomas Gleixner Reviewed-by: Reviewed-by: Borislav Petkov Cc: Peter Zijlstra Cc: Tim Chen Cc: Dave Hansen Cc: Borislav Petkov Cc: Dan Williams Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: David Woodhouse Cc: Paul Turner Link: https://lkml.kernel.org/r/20180108220932.12580.52458.stgit@tlendack-t1.amdoffice.net Signed-off-by: Razvan Ghitulete Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 1 + arch/x86/kernel/cpu/amd.c | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 29a554e5dff6..b11c4c072df8 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -332,6 +332,7 @@ #define MSR_FAM10H_NODE_ID 0xc001100c #define MSR_F10H_DECFG 0xc0011029 #define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 +#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT) /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 8b5b19d38182..1b89f0c4251e 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -782,6 +782,9 @@ static void init_amd(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_K8); if (cpu_has(c, X86_FEATURE_XMM2)) { + unsigned long long val; + int ret; + /* * A serializing LFENCE has less overhead than MFENCE, so * use it for execution serialization. On families which @@ -792,8 +795,19 @@ static void init_amd(struct cpuinfo_x86 *c) msr_set_bit(MSR_F10H_DECFG, MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); - /* MFENCE stops RDTSC speculation */ - set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + /* + * Verify that the MSR write was successful (could be running + * under a hypervisor) and only then assume that LFENCE is + * serializing. + */ + ret = rdmsrl_safe(MSR_F10H_DECFG, &val); + if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) { + /* A serializing LFENCE stops RDTSC speculation */ + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); + } else { + /* MFENCE stops RDTSC speculation */ + set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + } } /* From 5ddd318a4715f4806aba256f33db1f0f3ab043db Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 9 Jan 2018 15:02:51 +0000 Subject: [PATCH 077/705] sysfs/cpu: Fix typos in vulnerability documentation commit 9ecccfaa7cb5249bd31bdceb93fcf5bedb8a24d8 upstream. Fixes: 87590ce6e ("sysfs/cpu: Add vulnerability folder") Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-devices-system-cpu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 8b30a48b4c5e..dfd56ec7a850 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -355,7 +355,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/meltdown /sys/devices/system/cpu/vulnerabilities/spectre_v1 /sys/devices/system/cpu/vulnerabilities/spectre_v2 -Date: Januar 2018 +Date: January 2018 Contact: Linux kernel mailing list Description: Information about CPU vulnerabilities @@ -365,4 +365,4 @@ Description: Information about CPU vulnerabilities "Not affected" CPU is not affected by the vulnerability "Vulnerable" CPU is affected and no mitigation in effect - "Mitigation: $M" CPU is affetcted and mitigation $M is in effect + "Mitigation: $M" CPU is affected and mitigation $M is in effect From 194dc04770f5d29b707832a5a71c30fffe2af582 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 10 Jan 2018 12:28:16 +0100 Subject: [PATCH 078/705] x86/alternatives: Fix optimize_nops() checking commit 612e8e9350fd19cae6900cf36ea0c6892d1a0dca upstream. The alternatives code checks only the first byte whether it is a NOP, but with NOPs in front of the payload and having actual instructions after it breaks the "optimized' test. Make sure to scan all bytes before deciding to optimize the NOPs in there. Reported-by: David Woodhouse Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Tom Lendacky Cc: Andi Kleen Cc: Tim Chen Cc: Peter Zijlstra Cc: Jiri Kosina Cc: Dave Hansen Cc: Andi Kleen Cc: Andrew Lutomirski Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/20180110112815.mgciyf5acwacphkq@pd.tnic Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/alternative.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 5cb272a7a5a3..10d5a3d6affc 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -340,9 +340,12 @@ done: static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr) { unsigned long flags; + int i; - if (instr[0] != 0x90) - return; + for (i = 0; i < a->padlen; i++) { + if (instr[i] != 0x90) + return; + } local_irq_save(flags); add_nops(instr + (a->instrlen - a->padlen), a->padlen); From 91b7e5cdc80a3b684154cf0983f22d22ec9b29e5 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 4 Jan 2018 14:37:05 +0000 Subject: [PATCH 079/705] x86/alternatives: Add missing '\n' at end of ALTERNATIVE inline asm commit b9e705ef7cfaf22db0daab91ad3cd33b0fa32eb9 upstream. Where an ALTERNATIVE is used in the middle of an inline asm block, this would otherwise lead to the following instruction being appended directly to the trailing ".popsection", and a failed compile. Fixes: 9cebed423c84 ("x86, alternative: Use .pushsection/.popsection") Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: ak@linux.intel.com Cc: Tim Chen Cc: Peter Zijlstra Cc: Paul Turner Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180104143710.8961-8-dwmw@amazon.co.uk Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/alternative.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index d4aea31eec03..deca9b9c7923 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -139,7 +139,7 @@ static inline int alternatives_text_reserved(void *start, void *end) ".popsection\n" \ ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ - ".popsection" + ".popsection\n" #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ OLDINSTR_2(oldinstr, 1, 2) \ @@ -150,7 +150,7 @@ static inline int alternatives_text_reserved(void *start, void *end) ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ - ".popsection" + ".popsection\n" /* * Alternative instructions for different CPU types or capabilities. From 00bcb5ada638d884818aad7d46f90501bac761e9 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 17 Sep 2017 09:03:50 -0700 Subject: [PATCH 080/705] x86/mm/32: Move setup_clear_cpu_cap(X86_FEATURE_PCID) earlier commit b8b7abaed7a49b350f8ba659ddc264b04931d581 upstream. Otherwise we might have the PCID feature bit set during cpu_init(). This is just for robustness. I haven't seen any actual bugs here. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: cba4671af755 ("x86/mm: Disable PCID on 32-bit kernels") Link: http://lkml.kernel.org/r/b16dae9d6b0db5d9801ddbebbfd83384097c61f3.1505663533.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 8 -------- arch/x86/kernel/cpu/common.c | 8 ++++++++ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index cd46f9039119..cb6b4f9d0b7a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -22,14 +22,6 @@ void __init check_bugs(void) { -#ifdef CONFIG_X86_32 - /* - * Regardless of whether PCID is enumerated, the SDM says - * that it can't be enabled in 32-bit mode. - */ - setup_clear_cpu_cap(X86_FEATURE_PCID); -#endif - identify_boot_cpu(); if (!IS_ENABLED(CONFIG_SMP)) { diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8339b4363fb9..7b9ae04ddf5d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -890,6 +890,14 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SPECTRE_V2); fpu__init_system(c); + +#ifdef CONFIG_X86_32 + /* + * Regardless of whether PCID is enumerated, the SDM says + * that it can't be enabled in 32-bit mode. + */ + setup_clear_cpu_cap(X86_FEATURE_PCID); +#endif } void __init early_cpu_init(void) From 35aee626fa6311c3942fedd50c50f6748922e89f Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 1 Mar 2017 12:04:44 -0600 Subject: [PATCH 081/705] objtool, modules: Discard objtool annotation sections for modules commit e390f9a9689a42f477a6073e2e7df530a4c1b740 upstream. The '__unreachable' and '__func_stack_frame_non_standard' sections are only used at compile time. They're discarded for vmlinux but they should also be discarded for modules. Since this is a recurring pattern, prefix the section names with ".discard.". It's a nice convention and vmlinux.lds.h already discards such sections. Also remove the 'a' (allocatable) flag from the __unreachable section since it doesn't make sense for a discarded section. Suggested-by: Linus Torvalds Signed-off-by: Josh Poimboeuf Cc: Jessica Yu Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends") Link: http://lkml.kernel.org/r/20170301180444.lhd53c5tibc4ns77@treble Signed-off-by: Ingo Molnar [dwmw2: Remove the unreachable part in backporting since it's not here yet] Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- include/linux/frame.h | 2 +- scripts/mod/modpost.c | 1 + scripts/module-common.lds | 5 ++++- tools/objtool/builtin-check.c | 2 +- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/linux/frame.h b/include/linux/frame.h index e6baaba3f1ae..d772c61c31da 100644 --- a/include/linux/frame.h +++ b/include/linux/frame.h @@ -11,7 +11,7 @@ * For more information, see tools/objtool/Documentation/stack-validation.txt. */ #define STACK_FRAME_NON_STANDARD(func) \ - static void __used __section(__func_stack_frame_non_standard) \ + static void __used __section(.discard.func_stack_frame_non_standard) \ *__func_stack_frame_non_standard_##func = func #else /* !CONFIG_STACK_VALIDATION */ diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index bd8349759095..845eb9b800f3 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -838,6 +838,7 @@ static const char *const section_white_list[] = ".cmem*", /* EZchip */ ".fmt_slot*", /* EZchip */ ".gnu.lto*", + ".discard.*", NULL }; diff --git a/scripts/module-common.lds b/scripts/module-common.lds index 53234e85192a..9b6e246a45d0 100644 --- a/scripts/module-common.lds +++ b/scripts/module-common.lds @@ -4,7 +4,10 @@ * combine them automatically. */ SECTIONS { - /DISCARD/ : { *(.discard) } + /DISCARD/ : { + *(.discard) + *(.discard.*) + } __ksymtab 0 : { *(SORT(___ksymtab+*)) } __ksymtab_gpl 0 : { *(SORT(___ksymtab_gpl+*)) } diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index b8dadb050d2b..f8b6cf5fec87 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -1229,7 +1229,7 @@ int cmd_check(int argc, const char **argv) INIT_LIST_HEAD(&file.insn_list); hash_init(file.insn_hash); - file.whitelist = find_section_by_name(file.elf, "__func_stack_frame_non_standard"); + file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard"); file.rodata = find_section_by_name(file.elf, ".rodata"); file.ignore_unreachables = false; file.c_file = find_section_by_name(file.elf, ".comment"); From 3adb52ab29760624cd59ea0579317a24c4827e9f Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 11 Jan 2018 21:46:23 +0000 Subject: [PATCH 082/705] objtool: Detect jumps to retpoline thunks commit 39b735332cb8b33a27c28592d969e4016c86c3ea upstream. A direct jump to a retpoline thunk is really an indirect jump in disguise. Change the objtool instruction type accordingly. Objtool needs to know where indirect branches are so it can detect switch statement jump tables. This fixes a bunch of warnings with CONFIG_RETPOLINE like: arch/x86/events/intel/uncore_nhmex.o: warning: objtool: nhmex_rbox_msr_enable_event()+0x44: sibling call from callable instruction with modified stack frame kernel/signal.o: warning: objtool: copy_siginfo_to_user()+0x91: sibling call from callable instruction with modified stack frame ... Signed-off-by: Josh Poimboeuf Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-2-git-send-email-dwmw@amazon.co.uk [dwmw2: Applies to tools/objtool/builtin-check.c not check.c] Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- tools/objtool/builtin-check.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index f8b6cf5fec87..293a23e27968 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -382,6 +382,13 @@ static int add_jump_destinations(struct objtool_file *file) } else if (rela->sym->sec->idx) { dest_sec = rela->sym->sec; dest_off = rela->sym->sym.st_value + rela->addend + 4; + } else if (strstr(rela->sym->name, "_indirect_thunk_")) { + /* + * Retpoline jumps are really dynamic jumps in + * disguise, so convert them accordingly. + */ + insn->type = INSN_JUMP_DYNAMIC; + continue; } else { /* sibling call */ insn->jump_dest = 0; From 4d8bd3e2f6b1e45d8e080030f2554476e7c18da7 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 11 Jan 2018 21:46:24 +0000 Subject: [PATCH 083/705] objtool: Allow alternatives to be ignored commit 258c76059cece01bebae098e81bacb1af2edad17 upstream. Getting objtool to understand retpolines is going to be a bit of a challenge. For now, take advantage of the fact that retpolines are patched in with alternatives. Just read the original (sane) non-alternative instruction, and ignore the patched-in retpoline. This allows objtool to understand the control flow *around* the retpoline, even if it can't yet follow what's inside. This means the ORC unwinder will fail to unwind from inside a retpoline, but will work fine otherwise. Signed-off-by: Josh Poimboeuf Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-3-git-send-email-dwmw@amazon.co.uk [dwmw2: Applies to tools/objtool/builtin-check.c not check.[ch]] Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- tools/objtool/builtin-check.c | 64 +++++++++++++++++++++++++++++++---- 1 file changed, 57 insertions(+), 7 deletions(-) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 293a23e27968..f789621cbdba 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -51,7 +51,7 @@ struct instruction { unsigned int len, state; unsigned char type; unsigned long immediate; - bool alt_group, visited; + bool alt_group, visited, ignore_alts; struct symbol *call_dest; struct instruction *jump_dest; struct list_head alts; @@ -352,6 +352,40 @@ static void add_ignores(struct objtool_file *file) } } +/* + * FIXME: For now, just ignore any alternatives which add retpolines. This is + * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline. + * But it at least allows objtool to understand the control flow *around* the + * retpoline. + */ +static int add_nospec_ignores(struct objtool_file *file) +{ + struct section *sec; + struct rela *rela; + struct instruction *insn; + + sec = find_section_by_name(file->elf, ".rela.discard.nospec"); + if (!sec) + return 0; + + list_for_each_entry(rela, &sec->rela_list, list) { + if (rela->sym->type != STT_SECTION) { + WARN("unexpected relocation symbol type in %s", sec->name); + return -1; + } + + insn = find_insn(file, rela->sym->sec, rela->addend); + if (!insn) { + WARN("bad .discard.nospec entry"); + return -1; + } + + insn->ignore_alts = true; + } + + return 0; +} + /* * Find the destination instructions for all jumps. */ @@ -435,11 +469,18 @@ static int add_call_destinations(struct objtool_file *file) dest_off = insn->offset + insn->len + insn->immediate; insn->call_dest = find_symbol_by_offset(insn->sec, dest_off); + /* + * FIXME: Thanks to retpolines, it's now considered + * normal for a function to call within itself. So + * disable this warning for now. + */ +#if 0 if (!insn->call_dest) { WARN_FUNC("can't find call dest symbol at offset 0x%lx", insn->sec, insn->offset, dest_off); return -1; } +#endif } else if (rela->sym->type == STT_SECTION) { insn->call_dest = find_symbol_by_offset(rela->sym->sec, rela->addend+4); @@ -601,12 +642,6 @@ static int add_special_section_alts(struct objtool_file *file) return ret; list_for_each_entry_safe(special_alt, tmp, &special_alts, list) { - alt = malloc(sizeof(*alt)); - if (!alt) { - WARN("malloc failed"); - ret = -1; - goto out; - } orig_insn = find_insn(file, special_alt->orig_sec, special_alt->orig_off); @@ -617,6 +652,10 @@ static int add_special_section_alts(struct objtool_file *file) goto out; } + /* Ignore retpoline alternatives. */ + if (orig_insn->ignore_alts) + continue; + new_insn = NULL; if (!special_alt->group || special_alt->new_len) { new_insn = find_insn(file, special_alt->new_sec, @@ -642,6 +681,13 @@ static int add_special_section_alts(struct objtool_file *file) goto out; } + alt = malloc(sizeof(*alt)); + if (!alt) { + WARN("malloc failed"); + ret = -1; + goto out; + } + alt->insn = new_insn; list_add_tail(&alt->list, &orig_insn->alts); @@ -861,6 +907,10 @@ static int decode_sections(struct objtool_file *file) add_ignores(file); + ret = add_nospec_ignores(file); + if (ret) + return ret; + ret = add_jump_destinations(file); if (ret) return ret; From 4bf050da57d970ea19bb8643e4c278ffb7262228 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 29 Sep 2017 17:15:36 +0300 Subject: [PATCH 084/705] x86/asm: Use register variable to get stack pointer value commit 196bd485ee4f03ce4c690bfcf38138abfcd0a4bc upstream. Currently we use current_stack_pointer() function to get the value of the stack pointer register. Since commit: f5caf621ee35 ("x86/asm: Fix inline asm call constraints for Clang") ... we have a stack register variable declared. It can be used instead of current_stack_pointer() function which allows to optimize away some excessive "mov %rsp, %" instructions: -mov %rsp,%rdx -sub %rdx,%rax -cmp $0x3fff,%rax -ja ffffffff810722fd +sub %rsp,%rax +cmp $0x3fff,%rax +ja ffffffff810722fa Remove current_stack_pointer(), rename __asm_call_sp to current_stack_pointer and use it instead of the removed function. Signed-off-by: Andrey Ryabinin Reviewed-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170929141537.29167-1-aryabinin@virtuozzo.com Signed-off-by: Ingo Molnar [dwmw2: We want ASM_CALL_CONSTRAINT for retpoline] Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/asm.h | 11 +++++++++++ arch/x86/include/asm/thread_info.h | 11 ----------- arch/x86/kernel/irq_32.c | 6 +++--- arch/x86/kernel/traps.c | 2 +- arch/x86/mm/tlb.c | 2 +- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 7acb51c49fec..00523524edbf 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -125,4 +125,15 @@ /* For C file, we already have NOKPROBE_SYMBOL macro */ #endif +#ifndef __ASSEMBLY__ +/* + * This output constraint should be used for any inline asm which has a "call" + * instruction. Otherwise the asm may be inserted before the frame pointer + * gets set up by the containing function. If you forget to do this, objtool + * may print a "call without frame pointer save/setup" warning. + */ +register unsigned long current_stack_pointer asm(_ASM_SP); +#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) +#endif + #endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index ad6f5eb07a95..bdf9c4c91572 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -152,17 +152,6 @@ struct thread_info { */ #ifndef __ASSEMBLY__ -static inline unsigned long current_stack_pointer(void) -{ - unsigned long sp; -#ifdef CONFIG_X86_64 - asm("mov %%rsp,%0" : "=g" (sp)); -#else - asm("mov %%esp,%0" : "=g" (sp)); -#endif - return sp; -} - /* * Walks up the stack frames to make sure that the specified object is * entirely contained by a single stack frame. diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 1f38d9a4d9de..d4eb450144fd 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -64,7 +64,7 @@ static void call_on_stack(void *func, void *stack) static inline void *current_stack(void) { - return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1)); + return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1)); } static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) @@ -88,7 +88,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) /* Save the next esp at the bottom of the stack */ prev_esp = (u32 *)irqstk; - *prev_esp = current_stack_pointer(); + *prev_esp = current_stack_pointer; if (unlikely(overflow)) call_on_stack(print_stack_overflow, isp); @@ -139,7 +139,7 @@ void do_softirq_own_stack(void) /* Push the previous esp onto the stack */ prev_esp = (u32 *)irqstk; - *prev_esp = current_stack_pointer(); + *prev_esp = current_stack_pointer; call_on_stack(__do_softirq, isp); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index bd4e3d4d3625..322f433fbc76 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -153,7 +153,7 @@ void ist_begin_non_atomic(struct pt_regs *regs) * from double_fault. */ BUG_ON((unsigned long)(current_top_of_stack() - - current_stack_pointer()) >= THREAD_SIZE); + current_stack_pointer) >= THREAD_SIZE); preempt_enable_no_resched(); } diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 41205de487e7..578973ade71b 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -110,7 +110,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * mapped in the new pgd, we'll double-fault. Forcibly * map it. */ - unsigned int stack_pgd_index = pgd_index(current_stack_pointer()); + unsigned int stack_pgd_index = pgd_index(current_stack_pointer); pgd_t *pgd = next->pgd + stack_pgd_index; From 2bb5de42f254bf5addedf17c9c25c68d65639b55 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:25 +0000 Subject: [PATCH 085/705] x86/retpoline: Add initial retpoline support commit 76b043848fd22dbf7f8bf3a1452f8c70d557b860 upstream. Enable the use of -mindirect-branch=thunk-extern in newer GCC, and provide the corresponding thunks. Provide assembler macros for invoking the thunks in the same way that GCC does, from native and inline assembler. This adds X86_FEATURE_RETPOLINE and sets it by default on all CPUs. In some circumstances, IBRS microcode features may be used instead, and the retpoline can be disabled. On AMD CPUs if lfence is serialising, the retpoline can be dramatically simplified to a simple "lfence; jmp *\reg". A future patch, after it has been verified that lfence really is serialising in all circumstances, can enable this by setting the X86_FEATURE_RETPOLINE_AMD feature bit in addition to X86_FEATURE_RETPOLINE. Do not align the retpoline in the altinstr section, because there is no guarantee that it stays aligned when it's copied over the oldinstr during alternative patching. [ Andi Kleen: Rename the macros, add CONFIG_RETPOLINE option, export thunks] [ tglx: Put actual function CALL/JMP in front of the macros, convert to symbolic labels ] [ dwmw2: Convert back to numeric labels, merge objtool fixes ] Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Acked-by: Ingo Molnar Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-4-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 13 +++ arch/x86/Makefile | 10 ++ arch/x86/include/asm/asm-prototypes.h | 25 +++++ arch/x86/include/asm/cpufeatures.h | 3 + arch/x86/include/asm/nospec-branch.h | 128 ++++++++++++++++++++++++++ arch/x86/kernel/cpu/common.c | 4 + arch/x86/lib/Makefile | 1 + arch/x86/lib/retpoline.S | 48 ++++++++++ 8 files changed, 232 insertions(+) create mode 100644 arch/x86/include/asm/nospec-branch.h create mode 100644 arch/x86/lib/retpoline.S diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 447edeb0cfcb..0ca4d12ce95c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -408,6 +408,19 @@ config GOLDFISH def_bool y depends on X86_GOLDFISH +config RETPOLINE + bool "Avoid speculative indirect branches in kernel" + default y + ---help--- + Compile kernel with the retpoline compiler options to guard against + kernel-to-user data leaks by avoiding speculative indirect + branches. Requires a compiler with -mindirect-branch=thunk-extern + support for full protection. The kernel may run slower. + + Without compiler support, at least indirect branches in assembler + code are eliminated. Since this includes the syscall entry path, + it is not entirely pointless. + if X86_32 config X86_EXTENDED_PLATFORM bool "Support for extended (non-PC) x86 platforms" diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 2d449337a360..1e1a7334db0f 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -182,6 +182,16 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables KBUILD_CFLAGS += $(mflags-y) KBUILD_AFLAGS += $(mflags-y) +# Avoid indirect branches in kernel to deal with Spectre +ifdef CONFIG_RETPOLINE + RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) + ifneq ($(RETPOLINE_CFLAGS),) + KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE + else + $(warning CONFIG_RETPOLINE=y, but not supported by the compiler. Toolchain update recommended.) + endif +endif + archscripts: scripts_basic $(Q)$(MAKE) $(build)=arch/x86/tools relocs diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 44b8762fa0c7..b15aa4083dfd 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -10,7 +10,32 @@ #include #include #include +#include #ifndef CONFIG_X86_CMPXCHG64 extern void cmpxchg8b_emu(void); #endif + +#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_X86_32 +#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void); +#else +#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void); +INDIRECT_THUNK(8) +INDIRECT_THUNK(9) +INDIRECT_THUNK(10) +INDIRECT_THUNK(11) +INDIRECT_THUNK(12) +INDIRECT_THUNK(13) +INDIRECT_THUNK(14) +INDIRECT_THUNK(15) +#endif +INDIRECT_THUNK(ax) +INDIRECT_THUNK(bx) +INDIRECT_THUNK(cx) +INDIRECT_THUNK(dx) +INDIRECT_THUNK(si) +INDIRECT_THUNK(di) +INDIRECT_THUNK(bp) +INDIRECT_THUNK(sp) +#endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f364c8919eb8..4467568a531b 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -194,6 +194,9 @@ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ +#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ + #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h new file mode 100644 index 000000000000..e20e92ef2ca8 --- /dev/null +++ b/arch/x86/include/asm/nospec-branch.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __NOSPEC_BRANCH_H__ +#define __NOSPEC_BRANCH_H__ + +#include +#include +#include + +#ifdef __ASSEMBLY__ + +/* + * This should be used immediately before a retpoline alternative. It tells + * objtool where the retpolines are so that it can make sense of the control + * flow by just reading the original instruction(s) and ignoring the + * alternatives. + */ +.macro ANNOTATE_NOSPEC_ALTERNATIVE + .Lannotate_\@: + .pushsection .discard.nospec + .long .Lannotate_\@ - . + .popsection +.endm + +/* + * These are the bare retpoline primitives for indirect jmp and call. + * Do not use these directly; they only exist to make the ALTERNATIVE + * invocation below less ugly. + */ +.macro RETPOLINE_JMP reg:req + call .Ldo_rop_\@ +.Lspec_trap_\@: + pause + jmp .Lspec_trap_\@ +.Ldo_rop_\@: + mov \reg, (%_ASM_SP) + ret +.endm + +/* + * This is a wrapper around RETPOLINE_JMP so the called function in reg + * returns to the instruction after the macro. + */ +.macro RETPOLINE_CALL reg:req + jmp .Ldo_call_\@ +.Ldo_retpoline_jmp_\@: + RETPOLINE_JMP \reg +.Ldo_call_\@: + call .Ldo_retpoline_jmp_\@ +.endm + +/* + * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple + * indirect jmp/call which may be susceptible to the Spectre variant 2 + * attack. + */ +.macro JMP_NOSPEC reg:req +#ifdef CONFIG_RETPOLINE + ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE_2 __stringify(jmp *\reg), \ + __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \ + __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD +#else + jmp *\reg +#endif +.endm + +.macro CALL_NOSPEC reg:req +#ifdef CONFIG_RETPOLINE + ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE_2 __stringify(call *\reg), \ + __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\ + __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD +#else + call *\reg +#endif +.endm + +#else /* __ASSEMBLY__ */ + +#define ANNOTATE_NOSPEC_ALTERNATIVE \ + "999:\n\t" \ + ".pushsection .discard.nospec\n\t" \ + ".long 999b - .\n\t" \ + ".popsection\n\t" + +#if defined(CONFIG_X86_64) && defined(RETPOLINE) + +/* + * Since the inline asm uses the %V modifier which is only in newer GCC, + * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE. + */ +# define CALL_NOSPEC \ + ANNOTATE_NOSPEC_ALTERNATIVE \ + ALTERNATIVE( \ + "call *%[thunk_target]\n", \ + "call __x86_indirect_thunk_%V[thunk_target]\n", \ + X86_FEATURE_RETPOLINE) +# define THUNK_TARGET(addr) [thunk_target] "r" (addr) + +#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE) +/* + * For i386 we use the original ret-equivalent retpoline, because + * otherwise we'll run out of registers. We don't care about CET + * here, anyway. + */ +# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \ + " jmp 904f;\n" \ + " .align 16\n" \ + "901: call 903f;\n" \ + "902: pause;\n" \ + " jmp 902b;\n" \ + " .align 16\n" \ + "903: addl $4, %%esp;\n" \ + " pushl %[thunk_target];\n" \ + " ret;\n" \ + " .align 16\n" \ + "904: call 901b;\n", \ + X86_FEATURE_RETPOLINE) + +# define THUNK_TARGET(addr) [thunk_target] "rm" (addr) +#else /* No retpoline */ +# define CALL_NOSPEC "call *%[thunk_target]\n" +# define THUNK_TARGET(addr) [thunk_target] "rm" (addr) +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* __NOSPEC_BRANCH_H__ */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7b9ae04ddf5d..6e885cc68152 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -889,6 +889,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SPECTRE_V1); setup_force_cpu_bug(X86_BUG_SPECTRE_V2); +#ifdef CONFIG_RETPOLINE + setup_force_cpu_cap(X86_FEATURE_RETPOLINE); +#endif + fpu__init_system(c); #ifdef CONFIG_X86_32 diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 34a74131a12c..6bf1898ddf49 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -25,6 +25,7 @@ lib-y += memcpy_$(BITS).o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o +lib-$(CONFIG_RETPOLINE) += retpoline.o obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S new file mode 100644 index 000000000000..cb45c6cb465f --- /dev/null +++ b/arch/x86/lib/retpoline.S @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include +#include +#include +#include +#include + +.macro THUNK reg + .section .text.__x86.indirect_thunk.\reg + +ENTRY(__x86_indirect_thunk_\reg) + CFI_STARTPROC + JMP_NOSPEC %\reg + CFI_ENDPROC +ENDPROC(__x86_indirect_thunk_\reg) +.endm + +/* + * Despite being an assembler file we can't just use .irp here + * because __KSYM_DEPS__ only uses the C preprocessor and would + * only see one instance of "__x86_indirect_thunk_\reg" rather + * than one per register with the correct names. So we do it + * the simple and nasty way... + */ +#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg) +#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg) + +GENERATE_THUNK(_ASM_AX) +GENERATE_THUNK(_ASM_BX) +GENERATE_THUNK(_ASM_CX) +GENERATE_THUNK(_ASM_DX) +GENERATE_THUNK(_ASM_SI) +GENERATE_THUNK(_ASM_DI) +GENERATE_THUNK(_ASM_BP) +GENERATE_THUNK(_ASM_SP) +#ifdef CONFIG_64BIT +GENERATE_THUNK(r8) +GENERATE_THUNK(r9) +GENERATE_THUNK(r10) +GENERATE_THUNK(r11) +GENERATE_THUNK(r12) +GENERATE_THUNK(r13) +GENERATE_THUNK(r14) +GENERATE_THUNK(r15) +#endif From 8f96937ee30484aebf687f33e65e8be7ecace13a Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:26 +0000 Subject: [PATCH 086/705] x86/spectre: Add boot time option to select Spectre v2 mitigation commit da285121560e769cc31797bba6422eea71d473e0 upstream. Add a spectre_v2= option to select the mitigation used for the indirect branch speculation vulnerability. Currently, the only option available is retpoline, in its various forms. This will be expanded to cover the new IBRS/IBPB microcode features. The RETPOLINE_AMD feature relies on a serializing LFENCE for speculation control. For AMD hardware, only set RETPOLINE_AMD if LFENCE is a serializing instruction, which is indicated by the LFENCE_RDTSC feature. [ tglx: Folded back the LFENCE/AMD fixes and reworked it so IBRS integration becomes simple ] Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-5-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 28 +++++ arch/x86/include/asm/nospec-branch.h | 10 ++ arch/x86/kernel/cpu/bugs.c | 158 ++++++++++++++++++++++++++- arch/x86/kernel/cpu/common.c | 4 - 4 files changed, 195 insertions(+), 5 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 2b1d782eda6f..4c2667aa4634 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2691,6 +2691,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nosmt [KNL,S390] Disable symmetric multithreading (SMT). Equivalent to smt=1. + nospectre_v2 [X86] Disable all mitigations for the Spectre variant 2 + (indirect branch prediction) vulnerability. System may + allow data leaks with this option, which is equivalent + to spectre_v2=off. + noxsave [BUGS=X86] Disables x86 extended register state save and restore using xsave. The kernel will fallback to enabling legacy floating-point and sse state. @@ -3944,6 +3949,29 @@ bytes respectively. Such letter suffixes can also be entirely omitted. sonypi.*= [HW] Sony Programmable I/O Control Device driver See Documentation/laptops/sonypi.txt + spectre_v2= [X86] Control mitigation of Spectre variant 2 + (indirect branch speculation) vulnerability. + + on - unconditionally enable + off - unconditionally disable + auto - kernel detects whether your CPU model is + vulnerable + + Selecting 'on' will, and 'auto' may, choose a + mitigation method at run time according to the + CPU, the available microcode, the setting of the + CONFIG_RETPOLINE configuration option, and the + compiler with which the kernel was built. + + Specific mitigations can also be selected manually: + + retpoline - replace indirect branches + retpoline,generic - google's original retpoline + retpoline,amd - AMD-specific minimal thunk + + Not specifying this option is equivalent to + spectre_v2=auto. + spia_io_base= [HW,MTD] spia_fio_base= spia_pedr= diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index e20e92ef2ca8..ea034fa6e261 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -124,5 +124,15 @@ # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) #endif +/* The Spectre V2 mitigation variants */ +enum spectre_v2_mitigation { + SPECTRE_V2_NONE, + SPECTRE_V2_RETPOLINE_MINIMAL, + SPECTRE_V2_RETPOLINE_MINIMAL_AMD, + SPECTRE_V2_RETPOLINE_GENERIC, + SPECTRE_V2_RETPOLINE_AMD, + SPECTRE_V2_IBRS, +}; + #endif /* __ASSEMBLY__ */ #endif /* __NOSPEC_BRANCH_H__ */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index cb6b4f9d0b7a..49d25ddf0e9f 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -10,6 +10,9 @@ #include #include #include + +#include +#include #include #include #include @@ -20,6 +23,8 @@ #include #include +static void __init spectre_v2_select_mitigation(void); + void __init check_bugs(void) { identify_boot_cpu(); @@ -29,6 +34,9 @@ void __init check_bugs(void) print_cpu_info(&boot_cpu_data); } + /* Select the proper spectre mitigation before patching alternatives */ + spectre_v2_select_mitigation(); + #ifdef CONFIG_X86_32 /* * Check whether we are able to run this kernel safely on SMP. @@ -61,6 +69,153 @@ void __init check_bugs(void) #endif } +/* The kernel command line selection */ +enum spectre_v2_mitigation_cmd { + SPECTRE_V2_CMD_NONE, + SPECTRE_V2_CMD_AUTO, + SPECTRE_V2_CMD_FORCE, + SPECTRE_V2_CMD_RETPOLINE, + SPECTRE_V2_CMD_RETPOLINE_GENERIC, + SPECTRE_V2_CMD_RETPOLINE_AMD, +}; + +static const char *spectre_v2_strings[] = { + [SPECTRE_V2_NONE] = "Vulnerable", + [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline", + [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline", + [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", + [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", +}; + +#undef pr_fmt +#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt + +static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; + +static void __init spec2_print_if_insecure(const char *reason) +{ + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + pr_info("%s\n", reason); +} + +static void __init spec2_print_if_secure(const char *reason) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + pr_info("%s\n", reason); +} + +static inline bool retp_compiler(void) +{ + return __is_defined(RETPOLINE); +} + +static inline bool match_option(const char *arg, int arglen, const char *opt) +{ + int len = strlen(opt); + + return len == arglen && !strncmp(arg, opt, len); +} + +static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) +{ + char arg[20]; + int ret; + + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, + sizeof(arg)); + if (ret > 0) { + if (match_option(arg, ret, "off")) { + goto disable; + } else if (match_option(arg, ret, "on")) { + spec2_print_if_secure("force enabled on command line."); + return SPECTRE_V2_CMD_FORCE; + } else if (match_option(arg, ret, "retpoline")) { + spec2_print_if_insecure("retpoline selected on command line."); + return SPECTRE_V2_CMD_RETPOLINE; + } else if (match_option(arg, ret, "retpoline,amd")) { + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { + pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); + return SPECTRE_V2_CMD_AUTO; + } + spec2_print_if_insecure("AMD retpoline selected on command line."); + return SPECTRE_V2_CMD_RETPOLINE_AMD; + } else if (match_option(arg, ret, "retpoline,generic")) { + spec2_print_if_insecure("generic retpoline selected on command line."); + return SPECTRE_V2_CMD_RETPOLINE_GENERIC; + } else if (match_option(arg, ret, "auto")) { + return SPECTRE_V2_CMD_AUTO; + } + } + + if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2")) + return SPECTRE_V2_CMD_AUTO; +disable: + spec2_print_if_insecure("disabled on command line."); + return SPECTRE_V2_CMD_NONE; +} + +static void __init spectre_v2_select_mitigation(void) +{ + enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); + enum spectre_v2_mitigation mode = SPECTRE_V2_NONE; + + /* + * If the CPU is not affected and the command line mode is NONE or AUTO + * then nothing to do. + */ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) && + (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO)) + return; + + switch (cmd) { + case SPECTRE_V2_CMD_NONE: + return; + + case SPECTRE_V2_CMD_FORCE: + /* FALLTRHU */ + case SPECTRE_V2_CMD_AUTO: + goto retpoline_auto; + + case SPECTRE_V2_CMD_RETPOLINE_AMD: + if (IS_ENABLED(CONFIG_RETPOLINE)) + goto retpoline_amd; + break; + case SPECTRE_V2_CMD_RETPOLINE_GENERIC: + if (IS_ENABLED(CONFIG_RETPOLINE)) + goto retpoline_generic; + break; + case SPECTRE_V2_CMD_RETPOLINE: + if (IS_ENABLED(CONFIG_RETPOLINE)) + goto retpoline_auto; + break; + } + pr_err("kernel not compiled with retpoline; no mitigation available!"); + return; + +retpoline_auto: + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { + retpoline_amd: + if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { + pr_err("LFENCE not serializing. Switching to generic retpoline\n"); + goto retpoline_generic; + } + mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD : + SPECTRE_V2_RETPOLINE_MINIMAL_AMD; + setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD); + setup_force_cpu_cap(X86_FEATURE_RETPOLINE); + } else { + retpoline_generic: + mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC : + SPECTRE_V2_RETPOLINE_MINIMAL; + setup_force_cpu_cap(X86_FEATURE_RETPOLINE); + } + + spectre_v2_enabled = mode; + pr_info("%s\n", spectre_v2_strings[mode]); +} + +#undef pr_fmt + #ifdef CONFIG_SYSFS ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) @@ -85,6 +240,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, { if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return sprintf(buf, "Not affected\n"); - return sprintf(buf, "Vulnerable\n"); + + return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]); } #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6e885cc68152..7b9ae04ddf5d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -889,10 +889,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SPECTRE_V1); setup_force_cpu_bug(X86_BUG_SPECTRE_V2); -#ifdef CONFIG_RETPOLINE - setup_force_cpu_cap(X86_FEATURE_RETPOLINE); -#endif - fpu__init_system(c); #ifdef CONFIG_X86_32 From 2adc2f74449f7d65ee20675aa2987c3a7446bfab Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:27 +0000 Subject: [PATCH 087/705] x86/retpoline/crypto: Convert crypto assembler indirect jumps commit 9697fa39efd3fc3692f2949d4045f393ec58450b upstream. Convert all indirect jumps in crypto assembler code to use non-speculative sequences when CONFIG_RETPOLINE is enabled. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Acked-by: Ingo Molnar Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-6-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/aesni-intel_asm.S | 5 +++-- arch/x86/crypto/camellia-aesni-avx-asm_64.S | 3 ++- arch/x86/crypto/camellia-aesni-avx2-asm_64.S | 3 ++- arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 3 ++- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 383a6f84a060..fa8801b35e51 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -32,6 +32,7 @@ #include #include #include +#include /* * The following macros are used to move an (un)aligned 16 byte value to/from @@ -2734,7 +2735,7 @@ ENTRY(aesni_xts_crypt8) pxor INC, STATE4 movdqu IV, 0x30(OUTP) - call *%r11 + CALL_NOSPEC %r11 movdqu 0x00(OUTP), INC pxor INC, STATE1 @@ -2779,7 +2780,7 @@ ENTRY(aesni_xts_crypt8) _aesni_gf128mul_x_ble() movups IV, (IVP) - call *%r11 + CALL_NOSPEC %r11 movdqu 0x40(OUTP), INC pxor INC, STATE1 diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index aa9e8bd163f6..77ff4de2224d 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S @@ -17,6 +17,7 @@ #include #include +#include #define CAMELLIA_TABLE_BYTE_LEN 272 @@ -1224,7 +1225,7 @@ camellia_xts_crypt_16way: vpxor 14 * 16(%rax), %xmm15, %xmm14; vpxor 15 * 16(%rax), %xmm15, %xmm15; - call *%r9; + CALL_NOSPEC %r9; addq $(16 * 16), %rsp; diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index 16186c18656d..7384342fbb41 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -12,6 +12,7 @@ #include #include +#include #define CAMELLIA_TABLE_BYTE_LEN 272 @@ -1337,7 +1338,7 @@ camellia_xts_crypt_32way: vpxor 14 * 32(%rax), %ymm15, %ymm14; vpxor 15 * 32(%rax), %ymm15, %ymm15; - call *%r9; + CALL_NOSPEC %r9; addq $(16 * 32), %rsp; diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index dc05f010ca9b..174fd4146043 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -45,6 +45,7 @@ #include #include +#include ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction @@ -172,7 +173,7 @@ continue_block: movzxw (bufp, %rax, 2), len lea crc_array(%rip), bufp lea (bufp, len, 1), bufp - jmp *bufp + JMP_NOSPEC bufp ################################################################ ## 2a) PROCESS FULL BLOCKS: From 8b1bacc3218c75707175782ed70c2aa916d366bb Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:28 +0000 Subject: [PATCH 088/705] x86/retpoline/entry: Convert entry assembler indirect jumps commit 2641f08bb7fc63a636a2b18173221d7040a3512e upstream. Convert indirect jumps in core 32/64bit entry assembler code to use non-speculative sequences when CONFIG_RETPOLINE is enabled. Don't use CALL_NOSPEC in entry_SYSCALL_64_fastpath because the return address after the 'call' instruction must be *precisely* at the .Lentry_SYSCALL_64_after_fastpath label for stub_ptregs_64 to work, and the use of alternatives will mess that up unless we play horrid games to prepend with NOPs and make the variants the same length. It's not worth it; in the case where we ALTERNATIVE out the retpoline, the first instruction at __x86.indirect_thunk.rax is going to be a bare jmp *%rax anyway. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Acked-by: Arjan van de Ven Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-7-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_32.S | 5 +++-- arch/x86/entry/entry_64.S | 10 ++++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index edba8606b99a..7b95f35ba818 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -45,6 +45,7 @@ #include #include #include +#include .section .entry.text, "ax" @@ -260,7 +261,7 @@ ENTRY(ret_from_fork) /* kernel thread */ 1: movl %edi, %eax - call *%ebx + CALL_NOSPEC %ebx /* * A kernel thread is allowed to return here after successfully * calling do_execve(). Exit to userspace to complete the execve() @@ -1062,7 +1063,7 @@ error_code: movl %ecx, %es TRACE_IRQS_OFF movl %esp, %eax # pt_regs pointer - call *%edi + CALL_NOSPEC %edi jmp ret_from_exception END(page_fault) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index af4e58132d91..b9c901ce6582 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -37,6 +37,7 @@ #include #include #include +#include #include /* Avoid __ASSEMBLER__'ifying just for this. */ @@ -208,7 +209,12 @@ entry_SYSCALL_64_fastpath: * It might end up jumping to the slow path. If it jumps, RAX * and all argument registers are clobbered. */ +#ifdef CONFIG_RETPOLINE + movq sys_call_table(, %rax, 8), %rax + call __x86_indirect_thunk_rax +#else call *sys_call_table(, %rax, 8) +#endif .Lentry_SYSCALL_64_after_fastpath_call: movq %rax, RAX(%rsp) @@ -380,7 +386,7 @@ ENTRY(stub_ptregs_64) jmp entry_SYSCALL64_slow_path 1: - jmp *%rax /* Called from C */ + JMP_NOSPEC %rax /* Called from C */ END(stub_ptregs_64) .macro ptregs_stub func @@ -457,7 +463,7 @@ ENTRY(ret_from_fork) 1: /* kernel thread */ movq %r12, %rdi - call *%rbx + CALL_NOSPEC %rbx /* * A kernel thread is allowed to return here after successfully * calling do_execve(). Exit to userspace to complete the execve() From 83d7658362cc65e3d8c71b6002b107dc1f5bc55a Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:29 +0000 Subject: [PATCH 089/705] x86/retpoline/ftrace: Convert ftrace assembler indirect jumps commit 9351803bd803cdbeb9b5a7850b7b6f464806e3db upstream. Convert all indirect jumps in ftrace assembler code to use non-speculative sequences when CONFIG_RETPOLINE is enabled. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Acked-by: Ingo Molnar Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-8-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_32.S | 5 +++-- arch/x86/kernel/mcount_64.S | 7 ++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 7b95f35ba818..bdc9aeaf2e45 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -985,7 +985,8 @@ trace: movl 0x4(%ebp), %edx subl $MCOUNT_INSN_SIZE, %eax - call *ftrace_trace_function + movl ftrace_trace_function, %ecx + CALL_NOSPEC %ecx popl %edx popl %ecx @@ -1021,7 +1022,7 @@ return_to_handler: movl %eax, %ecx popl %edx popl %eax - jmp *%ecx + JMP_NOSPEC %ecx #endif #ifdef CONFIG_TRACING diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index 7b0d3da52fb4..287ec3bc141f 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -8,7 +8,7 @@ #include #include #include - +#include .code64 .section .entry.text, "ax" @@ -290,8 +290,9 @@ trace: * ip and parent ip are used and the list function is called when * function tracing is enabled. */ - call *ftrace_trace_function + movq ftrace_trace_function, %r8 + CALL_NOSPEC %r8 restore_mcount_regs jmp fgraph_trace @@ -334,5 +335,5 @@ GLOBAL(return_to_handler) movq 8(%rsp), %rdx movq (%rsp), %rax addq $24, %rsp - jmp *%rdi + JMP_NOSPEC %rdi #endif From 9e37da4c3de1b6cca215a8515491de75360022ba Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:30 +0000 Subject: [PATCH 090/705] x86/retpoline/hyperv: Convert assembler indirect jumps commit e70e5892b28c18f517f29ab6e83bd57705104b31 upstream. Convert all indirect jumps in hyperv inline asm code to use non-speculative sequences when CONFIG_RETPOLINE is enabled. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Acked-by: Ingo Molnar Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-9-git-send-email-dwmw@amazon.co.uk [ backport to 4.9, hopefully correct, not tested... - gregkh ] Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index e0a8216ecf2b..13c32eb40738 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "hyperv_vmbus.h" /* The one and only */ @@ -103,9 +104,10 @@ u64 hv_do_hypercall(u64 control, void *input, void *output) return (u64)ULLONG_MAX; __asm__ __volatile__("mov %0, %%r8" : : "r" (output_address) : "r8"); - __asm__ __volatile__("call *%3" : "=a" (hv_status) : + __asm__ __volatile__(CALL_NOSPEC : + "=a" (hv_status) : "c" (control), "d" (input_address), - "m" (hypercall_page)); + THUNK_TARGET(hypercall_page)); return hv_status; @@ -123,11 +125,12 @@ u64 hv_do_hypercall(u64 control, void *input, void *output) if (!hypercall_page) return (u64)ULLONG_MAX; - __asm__ __volatile__ ("call *%8" : "=d"(hv_status_hi), + __asm__ __volatile__ (CALL_NOSPEC : "=d"(hv_status_hi), "=a"(hv_status_lo) : "d" (control_hi), "a" (control_lo), "b" (input_address_hi), "c" (input_address_lo), "D"(output_address_hi), - "S"(output_address_lo), "m" (hypercall_page)); + "S"(output_address_lo), + THUNK_TARGET(hypercall_page)); return hv_status_lo | ((u64)hv_status_hi << 32); #endif /* !x86_64 */ From 87a1fe36250d65e0bffb0199ebd144ea80f87cd7 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:31 +0000 Subject: [PATCH 091/705] x86/retpoline/xen: Convert Xen hypercall indirect jumps commit ea08816d5b185ab3d09e95e393f265af54560350 upstream. Convert indirect call in Xen hypercall to use non-speculative sequence, when CONFIG_RETPOLINE is enabled. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Acked-by: Ingo Molnar Reviewed-by: Juergen Gross Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-10-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/xen/hypercall.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 8b678af866f7..ccdc23d89b60 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -216,9 +217,9 @@ privcmd_call(unsigned call, __HYPERCALL_5ARG(a1, a2, a3, a4, a5); stac(); - asm volatile("call *%[call]" + asm volatile(CALL_NOSPEC : __HYPERCALL_5PARAM - : [call] "a" (&hypercall_page[call]) + : [thunk_target] "a" (&hypercall_page[call]) : __HYPERCALL_CLOBBER5); clac(); From a590960ae6ea94a8b35b5abd52ba680dba62db66 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:32 +0000 Subject: [PATCH 092/705] x86/retpoline/checksum32: Convert assembler indirect jumps commit 5096732f6f695001fa2d6f1335a2680b37912c69 upstream. Convert all indirect jumps in 32bit checksum assembler code to use non-speculative sequences when CONFIG_RETPOLINE is enabled. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Acked-by: Ingo Molnar Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-11-git-send-email-dwmw@amazon.co.uk Signed-off-by: Greg Kroah-Hartman --- arch/x86/lib/checksum_32.S | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S index 4d34bb548b41..46e71a74e612 100644 --- a/arch/x86/lib/checksum_32.S +++ b/arch/x86/lib/checksum_32.S @@ -29,7 +29,8 @@ #include #include #include - +#include + /* * computes a partial checksum, e.g. for TCP/UDP fragments */ @@ -156,7 +157,7 @@ ENTRY(csum_partial) negl %ebx lea 45f(%ebx,%ebx,2), %ebx testl %esi, %esi - jmp *%ebx + JMP_NOSPEC %ebx # Handle 2-byte-aligned regions 20: addw (%esi), %ax @@ -439,7 +440,7 @@ ENTRY(csum_partial_copy_generic) andl $-32,%edx lea 3f(%ebx,%ebx), %ebx testl %esi, %esi - jmp *%ebx + JMP_NOSPEC %ebx 1: addl $64,%esi addl $64,%edi SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) From 276e300447109c92a4f807d4ed7da01c5f590568 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 11 Jan 2018 21:46:33 +0000 Subject: [PATCH 093/705] x86/retpoline/irq32: Convert assembler indirect jumps commit 7614e913db1f40fff819b36216484dc3808995d4 upstream. Convert all indirect jumps in 32bit irq inline asm code to use non speculative sequences. Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Acked-by: Ingo Molnar Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-12-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/irq_32.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index d4eb450144fd..2763573ee1d2 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -19,6 +19,7 @@ #include #include +#include #ifdef CONFIG_DEBUG_STACKOVERFLOW @@ -54,11 +55,11 @@ DEFINE_PER_CPU(struct irq_stack *, softirq_stack); static void call_on_stack(void *func, void *stack) { asm volatile("xchgl %%ebx,%%esp \n" - "call *%%edi \n" + CALL_NOSPEC "movl %%ebx,%%esp \n" : "=b" (stack) : "0" (stack), - "D"(func) + [thunk_target] "D"(func) : "memory", "cc", "edx", "ecx", "eax"); } @@ -94,11 +95,11 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) call_on_stack(print_stack_overflow, isp); asm volatile("xchgl %%ebx,%%esp \n" - "call *%%edi \n" + CALL_NOSPEC "movl %%ebx,%%esp \n" : "=a" (arg1), "=b" (isp) : "0" (desc), "1" (isp), - "D" (desc->handle_irq) + [thunk_target] "D" (desc->handle_irq) : "memory", "cc", "ecx"); return 1; } From c1ddd99a029636e234a800f28790a60d6ac0318f Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 12 Jan 2018 11:11:27 +0000 Subject: [PATCH 094/705] x86/retpoline: Fill return stack buffer on vmexit commit 117cc7a908c83697b0b737d15ae1eb5943afe35b upstream. In accordance with the Intel and AMD documentation, we need to overwrite all entries in the RSB on exiting a guest, to prevent malicious branch target predictions from affecting the host kernel. This is needed both for retpoline and for IBRS. [ak: numbers again for the RSB stuffing labels] Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Tested-by: Peter Zijlstra (Intel) Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515755487-8524-1-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 78 +++++++++++++++++++++++++++- arch/x86/kvm/svm.c | 4 ++ arch/x86/kvm/vmx.c | 4 ++ 3 files changed, 85 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index ea034fa6e261..402a11c803c3 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -7,6 +7,48 @@ #include #include +/* + * Fill the CPU return stack buffer. + * + * Each entry in the RSB, if used for a speculative 'ret', contains an + * infinite 'pause; jmp' loop to capture speculative execution. + * + * This is required in various cases for retpoline and IBRS-based + * mitigations for the Spectre variant 2 vulnerability. Sometimes to + * eliminate potentially bogus entries from the RSB, and sometimes + * purely to ensure that it doesn't get empty, which on some CPUs would + * allow predictions from other (unwanted!) sources to be used. + * + * We define a CPP macro such that it can be used from both .S files and + * inline assembly. It's possible to do a .macro and then include that + * from C via asm(".include ") but let's not go there. + */ + +#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ +#define RSB_FILL_LOOPS 16 /* To avoid underflow */ + +/* + * Google experimented with loop-unrolling and this turned out to be + * the optimal version — two calls, each with their own speculation + * trap should their return address end up getting used, in a loop. + */ +#define __FILL_RETURN_BUFFER(reg, nr, sp) \ + mov $(nr/2), reg; \ +771: \ + call 772f; \ +773: /* speculation trap */ \ + pause; \ + jmp 773b; \ +772: \ + call 774f; \ +775: /* speculation trap */ \ + pause; \ + jmp 775b; \ +774: \ + dec reg; \ + jnz 771b; \ + add $(BITS_PER_LONG/8) * nr, sp; + #ifdef __ASSEMBLY__ /* @@ -74,6 +116,20 @@ #else call *\reg #endif +.endm + + /* + * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP + * monstrosity above, manually. + */ +.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req +#ifdef CONFIG_RETPOLINE + ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE "jmp .Lskip_rsb_\@", \ + __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ + \ftr +.Lskip_rsb_\@: +#endif .endm #else /* __ASSEMBLY__ */ @@ -119,7 +175,7 @@ X86_FEATURE_RETPOLINE) # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) -#else /* No retpoline */ +#else /* No retpoline for C / inline asm */ # define CALL_NOSPEC "call *%[thunk_target]\n" # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) #endif @@ -134,5 +190,25 @@ enum spectre_v2_mitigation { SPECTRE_V2_IBRS, }; +/* + * On VMEXIT we must ensure that no RSB predictions learned in the guest + * can be followed in the host, by overwriting the RSB completely. Both + * retpoline and IBRS mitigations for Spectre v2 need this; only on future + * CPUs with IBRS_ATT *might* it be avoided. + */ +static inline void vmexit_fill_RSB(void) +{ +#ifdef CONFIG_RETPOLINE + unsigned long loops = RSB_CLEAR_LOOPS / 2; + + asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE("jmp 910f", + __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), + X86_FEATURE_RETPOLINE) + "910:" + : "=&r" (loops), ASM_CALL_CONSTRAINT + : "r" (loops) : "memory" ); +#endif +} #endif /* __ASSEMBLY__ */ #endif /* __NOSPEC_BRANCH_H__ */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8d96f9ce1926..24af898fb3a6 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include "trace.h" @@ -4917,6 +4918,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* Eliminate branch target predictions from guest mode */ + vmexit_fill_RSB(); + #ifdef CONFIG_X86_64 wrmsrl(MSR_GS_BASE, svm->host.gs_base); #else diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ab6605425497..3ca6d15994e4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -48,6 +48,7 @@ #include #include #include +#include #include "trace.h" #include "pmu.h" @@ -9026,6 +9027,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* Eliminate branch target predictions from guest mode */ + vmexit_fill_RSB(); + /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ if (debugctlmsr) update_debugctlmsr(debugctlmsr); From c05d544d53437de40cdc2d1fa5297dfda558bcdb Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 11 Jan 2018 17:16:51 -0800 Subject: [PATCH 095/705] selftests/x86: Add test_vsyscall commit 352909b49ba0d74929b96af6dfbefc854ab6ebb5 upstream. This tests that the vsyscall entries do what they're expected to do. It also confirms that attempts to read the vsyscall page behave as expected. If changes are made to the vsyscall code or its memory map handling, running this test in all three of vsyscall=none, vsyscall=emulate, and vsyscall=native are helpful. (Because it's easy, this also compares the vsyscall results to their vDSO equivalents.) Note to KAISER backporters: please test this under all three vsyscall modes. Also, in the emulate and native modes, make sure that test_vsyscall_64 agrees with the command line or config option as to which mode you're in. It's quite easy to mess up the kernel such that native mode accidentally emulates or vice versa. Greg, etc: please backport this to all your Meltdown-patched kernels. It'll help make sure the patches didn't regress vsyscalls. CSigned-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: Hugh Dickins Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/2b9c5a174c1d60fd7774461d518aa75598b1d8fd.1515719552.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/test_vsyscall.c | 500 ++++++++++++++++++++ 2 files changed, 501 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/x86/test_vsyscall.c diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 6300c1a41ff6..4af37bfe4aea 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -6,7 +6,7 @@ include ../lib.mk TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \ check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test \ - protection_keys + protection_keys test_vsyscall TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c new file mode 100644 index 000000000000..6e0bd52ad53d --- /dev/null +++ b/tools/testing/selftests/x86/test_vsyscall.c @@ -0,0 +1,500 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __x86_64__ +# define VSYS(x) (x) +#else +# define VSYS(x) 0 +#endif + +#ifndef SYS_getcpu +# ifdef __x86_64__ +# define SYS_getcpu 309 +# else +# define SYS_getcpu 318 +# endif +#endif + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +/* vsyscalls and vDSO */ +bool should_read_vsyscall = false; + +typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz); +gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000); +gtod_t vdso_gtod; + +typedef int (*vgettime_t)(clockid_t, struct timespec *); +vgettime_t vdso_gettime; + +typedef long (*time_func_t)(time_t *t); +time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400); +time_func_t vdso_time; + +typedef long (*getcpu_t)(unsigned *, unsigned *, void *); +getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800); +getcpu_t vdso_getcpu; + +static void init_vdso(void) +{ + void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + if (!vdso) + vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + if (!vdso) { + printf("[WARN]\tfailed to find vDSO\n"); + return; + } + + vdso_gtod = (gtod_t)dlsym(vdso, "__vdso_gettimeofday"); + if (!vdso_gtod) + printf("[WARN]\tfailed to find gettimeofday in vDSO\n"); + + vdso_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime"); + if (!vdso_gettime) + printf("[WARN]\tfailed to find clock_gettime in vDSO\n"); + + vdso_time = (time_func_t)dlsym(vdso, "__vdso_time"); + if (!vdso_time) + printf("[WARN]\tfailed to find time in vDSO\n"); + + vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu"); + if (!vdso_getcpu) { + /* getcpu() was never wired up in the 32-bit vDSO. */ + printf("[%s]\tfailed to find getcpu in vDSO\n", + sizeof(long) == 8 ? "WARN" : "NOTE"); + } +} + +static int init_vsys(void) +{ +#ifdef __x86_64__ + int nerrs = 0; + FILE *maps; + char line[128]; + bool found = false; + + maps = fopen("/proc/self/maps", "r"); + if (!maps) { + printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n"); + should_read_vsyscall = true; + return 0; + } + + while (fgets(line, sizeof(line), maps)) { + char r, x; + void *start, *end; + char name[128]; + if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s", + &start, &end, &r, &x, name) != 5) + continue; + + if (strcmp(name, "[vsyscall]")) + continue; + + printf("\tvsyscall map: %s", line); + + if (start != (void *)0xffffffffff600000 || + end != (void *)0xffffffffff601000) { + printf("[FAIL]\taddress range is nonsense\n"); + nerrs++; + } + + printf("\tvsyscall permissions are %c-%c\n", r, x); + should_read_vsyscall = (r == 'r'); + if (x != 'x') { + vgtod = NULL; + vtime = NULL; + vgetcpu = NULL; + } + + found = true; + break; + } + + fclose(maps); + + if (!found) { + printf("\tno vsyscall map in /proc/self/maps\n"); + should_read_vsyscall = false; + vgtod = NULL; + vtime = NULL; + vgetcpu = NULL; + } + + return nerrs; +#else + return 0; +#endif +} + +/* syscalls */ +static inline long sys_gtod(struct timeval *tv, struct timezone *tz) +{ + return syscall(SYS_gettimeofday, tv, tz); +} + +static inline int sys_clock_gettime(clockid_t id, struct timespec *ts) +{ + return syscall(SYS_clock_gettime, id, ts); +} + +static inline long sys_time(time_t *t) +{ + return syscall(SYS_time, t); +} + +static inline long sys_getcpu(unsigned * cpu, unsigned * node, + void* cache) +{ + return syscall(SYS_getcpu, cpu, node, cache); +} + +static jmp_buf jmpbuf; + +static void sigsegv(int sig, siginfo_t *info, void *ctx_void) +{ + siglongjmp(jmpbuf, 1); +} + +static double tv_diff(const struct timeval *a, const struct timeval *b) +{ + return (double)(a->tv_sec - b->tv_sec) + + (double)((int)a->tv_usec - (int)b->tv_usec) * 1e-6; +} + +static int check_gtod(const struct timeval *tv_sys1, + const struct timeval *tv_sys2, + const struct timezone *tz_sys, + const char *which, + const struct timeval *tv_other, + const struct timezone *tz_other) +{ + int nerrs = 0; + double d1, d2; + + if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || tz_sys->tz_dsttime != tz_other->tz_dsttime)) { + printf("[FAIL] %s tz mismatch\n", which); + nerrs++; + } + + d1 = tv_diff(tv_other, tv_sys1); + d2 = tv_diff(tv_sys2, tv_other); + printf("\t%s time offsets: %lf %lf\n", which, d1, d2); + + if (d1 < 0 || d2 < 0) { + printf("[FAIL]\t%s time was inconsistent with the syscall\n", which); + nerrs++; + } else { + printf("[OK]\t%s gettimeofday()'s timeval was okay\n", which); + } + + return nerrs; +} + +static int test_gtod(void) +{ + struct timeval tv_sys1, tv_sys2, tv_vdso, tv_vsys; + struct timezone tz_sys, tz_vdso, tz_vsys; + long ret_vdso = -1; + long ret_vsys = -1; + int nerrs = 0; + + printf("[RUN]\ttest gettimeofday()\n"); + + if (sys_gtod(&tv_sys1, &tz_sys) != 0) + err(1, "syscall gettimeofday"); + if (vdso_gtod) + ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso); + if (vgtod) + ret_vsys = vgtod(&tv_vsys, &tz_vsys); + if (sys_gtod(&tv_sys2, &tz_sys) != 0) + err(1, "syscall gettimeofday"); + + if (vdso_gtod) { + if (ret_vdso == 0) { + nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso); + } else { + printf("[FAIL]\tvDSO gettimeofday() failed: %ld\n", ret_vdso); + nerrs++; + } + } + + if (vgtod) { + if (ret_vsys == 0) { + nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys); + } else { + printf("[FAIL]\tvsys gettimeofday() failed: %ld\n", ret_vsys); + nerrs++; + } + } + + return nerrs; +} + +static int test_time(void) { + int nerrs = 0; + + printf("[RUN]\ttest time()\n"); + long t_sys1, t_sys2, t_vdso = 0, t_vsys = 0; + long t2_sys1 = -1, t2_sys2 = -1, t2_vdso = -1, t2_vsys = -1; + t_sys1 = sys_time(&t2_sys1); + if (vdso_time) + t_vdso = vdso_time(&t2_vdso); + if (vtime) + t_vsys = vtime(&t2_vsys); + t_sys2 = sys_time(&t2_sys2); + if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) { + printf("[FAIL]\tsyscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n", t_sys1, t2_sys1, t_sys2, t2_sys2); + nerrs++; + return nerrs; + } + + if (vdso_time) { + if (t_vdso < 0 || t_vdso != t2_vdso) { + printf("[FAIL]\tvDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso); + nerrs++; + } else if (t_vdso < t_sys1 || t_vdso > t_sys2) { + printf("[FAIL]\tvDSO returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vdso, t_sys2); + nerrs++; + } else { + printf("[OK]\tvDSO time() is okay\n"); + } + } + + if (vtime) { + if (t_vsys < 0 || t_vsys != t2_vsys) { + printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys); + nerrs++; + } else if (t_vsys < t_sys1 || t_vsys > t_sys2) { + printf("[FAIL]\tvsyscall returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vsys, t_sys2); + nerrs++; + } else { + printf("[OK]\tvsyscall time() is okay\n"); + } + } + + return nerrs; +} + +static int test_getcpu(int cpu) +{ + int nerrs = 0; + long ret_sys, ret_vdso = -1, ret_vsys = -1; + + printf("[RUN]\tgetcpu() on CPU %d\n", cpu); + + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) { + printf("[SKIP]\tfailed to force CPU %d\n", cpu); + return nerrs; + } + + unsigned cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys; + unsigned node = 0; + bool have_node = false; + ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0); + if (vdso_getcpu) + ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0); + if (vgetcpu) + ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0); + + if (ret_sys == 0) { + if (cpu_sys != cpu) { + printf("[FAIL]\tsyscall reported CPU %hu but should be %d\n", cpu_sys, cpu); + nerrs++; + } + + have_node = true; + node = node_sys; + } + + if (vdso_getcpu) { + if (ret_vdso) { + printf("[FAIL]\tvDSO getcpu() failed\n"); + nerrs++; + } else { + if (!have_node) { + have_node = true; + node = node_vdso; + } + + if (cpu_vdso != cpu) { + printf("[FAIL]\tvDSO reported CPU %hu but should be %d\n", cpu_vdso, cpu); + nerrs++; + } else { + printf("[OK]\tvDSO reported correct CPU\n"); + } + + if (node_vdso != node) { + printf("[FAIL]\tvDSO reported node %hu but should be %hu\n", node_vdso, node); + nerrs++; + } else { + printf("[OK]\tvDSO reported correct node\n"); + } + } + } + + if (vgetcpu) { + if (ret_vsys) { + printf("[FAIL]\tvsyscall getcpu() failed\n"); + nerrs++; + } else { + if (!have_node) { + have_node = true; + node = node_vsys; + } + + if (cpu_vsys != cpu) { + printf("[FAIL]\tvsyscall reported CPU %hu but should be %d\n", cpu_vsys, cpu); + nerrs++; + } else { + printf("[OK]\tvsyscall reported correct CPU\n"); + } + + if (node_vsys != node) { + printf("[FAIL]\tvsyscall reported node %hu but should be %hu\n", node_vsys, node); + nerrs++; + } else { + printf("[OK]\tvsyscall reported correct node\n"); + } + } + } + + return nerrs; +} + +static int test_vsys_r(void) +{ +#ifdef __x86_64__ + printf("[RUN]\tChecking read access to the vsyscall page\n"); + bool can_read; + if (sigsetjmp(jmpbuf, 1) == 0) { + *(volatile int *)0xffffffffff600000; + can_read = true; + } else { + can_read = false; + } + + if (can_read && !should_read_vsyscall) { + printf("[FAIL]\tWe have read access, but we shouldn't\n"); + return 1; + } else if (!can_read && should_read_vsyscall) { + printf("[FAIL]\tWe don't have read access, but we should\n"); + return 1; + } else { + printf("[OK]\tgot expected result\n"); + } +#endif + + return 0; +} + + +#ifdef __x86_64__ +#define X86_EFLAGS_TF (1UL << 8) +static volatile sig_atomic_t num_vsyscall_traps; + +static unsigned long get_eflags(void) +{ + unsigned long eflags; + asm volatile ("pushfq\n\tpopq %0" : "=rm" (eflags)); + return eflags; +} + +static void set_eflags(unsigned long eflags) +{ + asm volatile ("pushq %0\n\tpopfq" : : "rm" (eflags) : "flags"); +} + +static void sigtrap(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t *)ctx_void; + unsigned long ip = ctx->uc_mcontext.gregs[REG_RIP]; + + if (((ip ^ 0xffffffffff600000UL) & ~0xfffUL) == 0) + num_vsyscall_traps++; +} + +static int test_native_vsyscall(void) +{ + time_t tmp; + bool is_native; + + if (!vtime) + return 0; + + printf("[RUN]\tchecking for native vsyscall\n"); + sethandler(SIGTRAP, sigtrap, 0); + set_eflags(get_eflags() | X86_EFLAGS_TF); + vtime(&tmp); + set_eflags(get_eflags() & ~X86_EFLAGS_TF); + + /* + * If vsyscalls are emulated, we expect a single trap in the + * vsyscall page -- the call instruction will trap with RIP + * pointing to the entry point before emulation takes over. + * In native mode, we expect two traps, since whatever code + * the vsyscall page contains will be more than just a ret + * instruction. + */ + is_native = (num_vsyscall_traps > 1); + + printf("\tvsyscalls are %s (%d instructions in vsyscall page)\n", + (is_native ? "native" : "emulated"), + (int)num_vsyscall_traps); + + return 0; +} +#endif + +int main(int argc, char **argv) +{ + int nerrs = 0; + + init_vdso(); + nerrs += init_vsys(); + + nerrs += test_gtod(); + nerrs += test_time(); + nerrs += test_getcpu(0); + nerrs += test_getcpu(1); + + sethandler(SIGSEGV, sigsegv, 0); + nerrs += test_vsys_r(); + +#ifdef __x86_64__ + nerrs += test_native_vsyscall(); +#endif + + return nerrs ? 1 : 0; +} From 44f1eae7fe6597381ce550027fdfa2e578a96ad8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Jan 2018 22:13:29 +0100 Subject: [PATCH 096/705] x86/retpoline: Remove compile time warning commit b8b9ce4b5aec8de9e23cabb0a26b78641f9ab1d6 upstream. Remove the compile time warning when CONFIG_RETPOLINE=y and the compiler does not have retpoline support. Linus rationale for this is: It's wrong because it will just make people turn off RETPOLINE, and the asm updates - and return stack clearing - that are independent of the compiler are likely the most important parts because they are likely the ones easiest to target. And it's annoying because most people won't be able to do anything about it. The number of people building their own compiler? Very small. So if their distro hasn't got a compiler yet (and pretty much nobody does), the warning is just annoying crap. It is already properly reported as part of the sysfs interface. The compile-time warning only encourages bad things. Fixes: 76b043848fd2 ("x86/retpoline: Add initial retpoline support") Requested-by: Linus Torvalds Signed-off-by: Thomas Gleixner Cc: David Woodhouse Cc: Peter Zijlstra (Intel) Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/CA+55aFzWgquv4i6Mab6bASqYXg3ErV3XDFEYf=GEcCDQg5uAtw@mail.gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 1e1a7334db0f..cd22cb8ebd42 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -187,8 +187,6 @@ ifdef CONFIG_RETPOLINE RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) ifneq ($(RETPOLINE_CFLAGS),) KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE - else - $(warning CONFIG_RETPOLINE=y, but not supported by the compiler. Toolchain update recommended.) endif endif From 92e8f204947484f642b77836c0ce9f710d83f836 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 15 Jan 2018 11:00:54 -0600 Subject: [PATCH 097/705] objtool: Fix retpoline support for pre-ORC objtool Objtool 1.0 (pre-ORC) produces the following warning when it encounters a retpoline: arch/x86/crypto/camellia-aesni-avx2-asm_64.o: warning: objtool: .altinstr_replacement+0xf: return instruction outside of a callable function That warning is meant to catch GCC bugs and missing ENTRY/ENDPROC annotations, neither of which are applicable to alternatives. Silence the warning for alternative instructions, just like objtool 2.0 already does. Reported-by: David Woodhouse Signed-off-by: Josh Poimboeuf Signed-off-by: Greg Kroah-Hartman --- tools/objtool/builtin-check.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index f789621cbdba..a688a857a7ae 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -1230,6 +1230,14 @@ static int validate_uncallable_instructions(struct objtool_file *file) for_each_insn(file, insn) { if (!insn->visited && insn->type == INSN_RETURN) { + + /* + * Don't warn about call instructions in unvisited + * retpoline alternatives. + */ + if (!strcmp(insn->sec->name, ".altinstr_replacement")) + continue; + WARN_FUNC("return instruction outside of a callable function", insn->sec, insn->offset); warnings++; From 1b92c48a2eeb8e4c77a27c93052ba95dc6e1cdda Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Mon, 15 Jan 2018 11:44:14 -0500 Subject: [PATCH 098/705] x86/pti/efi: broken conversion from efi to kernel page table The page table order must be increased for EFI table in order to avoid a bug where NMI tries to change the page table to kernel page table, while efi page table is active. For more disccussion about this bug, see this thread: http://lkml.iu.edu/hypermail/linux/kernel/1801.1/00951.html Signed-off-by: Pavel Tatashin Reviewed-by: Steven Sistare Acked-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgalloc.h | 11 +++++++++++ arch/x86/mm/pgtable.c | 7 ------- arch/x86/platform/efi/efi_64.c | 2 +- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index b6d425999f99..1178a51b77f3 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -27,6 +27,17 @@ static inline void paravirt_release_pud(unsigned long pfn) {} */ extern gfp_t __userpte_alloc_gfp; +#ifdef CONFIG_PAGE_TABLE_ISOLATION +/* + * Instead of one PGD, we acquire two PGDs. Being order-1, it is + * both 8k in size and 8k-aligned. That lets us just flip bit 12 + * in a pointer to swap between the two 4k halves. + */ +#define PGD_ALLOCATION_ORDER 1 +#else +#define PGD_ALLOCATION_ORDER 0 +#endif + /* * Allocate and free page tables. */ diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 5aaec8effc5f..209b9465e97a 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -345,13 +345,6 @@ static inline void _pgd_free(pgd_t *pgd) } #else -/* - * Instead of one pgd, Kaiser acquires two pgds. Being order-1, it is - * both 8k in size and 8k-aligned. That lets us just flip bit 12 - * in a pointer to swap between the two 4k halves. - */ -#define PGD_ALLOCATION_ORDER kaiser_enabled - static inline pgd_t *_pgd_alloc(void) { return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER); diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 2f25a363068c..dcb2d9d185a2 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -142,7 +142,7 @@ int __init efi_alloc_page_tables(void) return 0; gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO; - efi_pgd = (pgd_t *)__get_free_page(gfp_mask); + efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER); if (!efi_pgd) return -ENOMEM; From b8cf9ff79d63a3ac5567bd7cbb9445bfed193f4d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 17 Jan 2018 09:39:00 +0100 Subject: [PATCH 099/705] Linux 4.9.77 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2637f0ed0a07..aba553531d6a 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 76 +SUBLEVEL = 77 EXTRAVERSION = NAME = Roaring Lionus From c9ca9d9d9b7968d5490415c87a17db4335f846c3 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Mon, 18 Dec 2017 09:28:39 -0700 Subject: [PATCH 100/705] libnvdimm, btt: Fix an incompatibility in the log layout commit 24e3a7fb60a9187e5df90e5fa655ffc94b9c4f77 upstream. Due to a spec misinterpretation, the Linux implementation of the BTT log area had different padding scheme from other implementations, such as UEFI and NVML. This fixes the padding scheme, and defaults to it for new BTT layouts. We attempt to detect the padding scheme in use when probing for an existing BTT. If we detect the older/incompatible scheme, we continue using it. Reported-by: Juston Li Cc: Dan Williams Cc: Fixes: 5212e11fde4d ("nd_btt: atomic sector updates") Signed-off-by: Vishal Verma Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/btt.c | 203 +++++++++++++++++++++++++++++++++++-------- drivers/nvdimm/btt.h | 45 +++++++++- 2 files changed, 212 insertions(+), 36 deletions(-) diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 94733f73d37f..7121453ec047 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -183,13 +183,13 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping, return ret; } -static int btt_log_read_pair(struct arena_info *arena, u32 lane, - struct log_entry *ent) +static int btt_log_group_read(struct arena_info *arena, u32 lane, + struct log_group *log) { - WARN_ON(!ent); + WARN_ON(!log); return arena_read_bytes(arena, - arena->logoff + (2 * lane * LOG_ENT_SIZE), ent, - 2 * LOG_ENT_SIZE); + arena->logoff + (lane * LOG_GRP_SIZE), log, + LOG_GRP_SIZE); } static struct dentry *debugfs_root; @@ -229,6 +229,8 @@ static void arena_debugfs_init(struct arena_info *a, struct dentry *parent, debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff); debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off); debugfs_create_x32("flags", S_IRUGO, d, &a->flags); + debugfs_create_u32("log_index_0", S_IRUGO, d, &a->log_index[0]); + debugfs_create_u32("log_index_1", S_IRUGO, d, &a->log_index[1]); } static void btt_debugfs_init(struct btt *btt) @@ -247,6 +249,11 @@ static void btt_debugfs_init(struct btt *btt) } } +static u32 log_seq(struct log_group *log, int log_idx) +{ + return le32_to_cpu(log->ent[log_idx].seq); +} + /* * This function accepts two log entries, and uses the * sequence number to find the 'older' entry. @@ -256,8 +263,10 @@ static void btt_debugfs_init(struct btt *btt) * * TODO The logic feels a bit kludge-y. make it better.. */ -static int btt_log_get_old(struct log_entry *ent) +static int btt_log_get_old(struct arena_info *a, struct log_group *log) { + int idx0 = a->log_index[0]; + int idx1 = a->log_index[1]; int old; /* @@ -265,23 +274,23 @@ static int btt_log_get_old(struct log_entry *ent) * the next time, the following logic works out to put this * (next) entry into [1] */ - if (ent[0].seq == 0) { - ent[0].seq = cpu_to_le32(1); + if (log_seq(log, idx0) == 0) { + log->ent[idx0].seq = cpu_to_le32(1); return 0; } - if (ent[0].seq == ent[1].seq) + if (log_seq(log, idx0) == log_seq(log, idx1)) return -EINVAL; - if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5) + if (log_seq(log, idx0) + log_seq(log, idx1) > 5) return -EINVAL; - if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) { - if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1) + if (log_seq(log, idx0) < log_seq(log, idx1)) { + if ((log_seq(log, idx1) - log_seq(log, idx0)) == 1) old = 0; else old = 1; } else { - if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1) + if ((log_seq(log, idx0) - log_seq(log, idx1)) == 1) old = 1; else old = 0; @@ -306,17 +315,18 @@ static int btt_log_read(struct arena_info *arena, u32 lane, { int ret; int old_ent, ret_ent; - struct log_entry log[2]; + struct log_group log; - ret = btt_log_read_pair(arena, lane, log); + ret = btt_log_group_read(arena, lane, &log); if (ret) return -EIO; - old_ent = btt_log_get_old(log); + old_ent = btt_log_get_old(arena, &log); if (old_ent < 0 || old_ent > 1) { dev_info(to_dev(arena), "log corruption (%d): lane %d seq [%d, %d]\n", - old_ent, lane, log[0].seq, log[1].seq); + old_ent, lane, log.ent[arena->log_index[0]].seq, + log.ent[arena->log_index[1]].seq); /* TODO set error state? */ return -EIO; } @@ -324,7 +334,7 @@ static int btt_log_read(struct arena_info *arena, u32 lane, ret_ent = (old_flag ? old_ent : (1 - old_ent)); if (ent != NULL) - memcpy(ent, &log[ret_ent], LOG_ENT_SIZE); + memcpy(ent, &log.ent[arena->log_index[ret_ent]], LOG_ENT_SIZE); return ret_ent; } @@ -338,17 +348,13 @@ static int __btt_log_write(struct arena_info *arena, u32 lane, u32 sub, struct log_entry *ent) { int ret; - /* - * Ignore the padding in log_entry for calculating log_half. - * The entry is 'committed' when we write the sequence number, - * and we want to ensure that that is the last thing written. - * We don't bother writing the padding as that would be extra - * media wear and write amplification - */ - unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2; - u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE); + u32 group_slot = arena->log_index[sub]; + unsigned int log_half = LOG_ENT_SIZE / 2; void *src = ent; + u64 ns_off; + ns_off = arena->logoff + (lane * LOG_GRP_SIZE) + + (group_slot * LOG_ENT_SIZE); /* split the 16B write into atomic, durable halves */ ret = arena_write_bytes(arena, ns_off, src, log_half); if (ret) @@ -419,16 +425,16 @@ static int btt_log_init(struct arena_info *arena) { int ret; u32 i; - struct log_entry log, zerolog; + struct log_entry ent, zerolog; memset(&zerolog, 0, sizeof(zerolog)); for (i = 0; i < arena->nfree; i++) { - log.lba = cpu_to_le32(i); - log.old_map = cpu_to_le32(arena->external_nlba + i); - log.new_map = cpu_to_le32(arena->external_nlba + i); - log.seq = cpu_to_le32(LOG_SEQ_INIT); - ret = __btt_log_write(arena, i, 0, &log); + ent.lba = cpu_to_le32(i); + ent.old_map = cpu_to_le32(arena->external_nlba + i); + ent.new_map = cpu_to_le32(arena->external_nlba + i); + ent.seq = cpu_to_le32(LOG_SEQ_INIT); + ret = __btt_log_write(arena, i, 0, &ent); if (ret) return ret; ret = __btt_log_write(arena, i, 1, &zerolog); @@ -490,6 +496,123 @@ static int btt_freelist_init(struct arena_info *arena) return 0; } +static bool ent_is_padding(struct log_entry *ent) +{ + return (ent->lba == 0) && (ent->old_map == 0) && (ent->new_map == 0) + && (ent->seq == 0); +} + +/* + * Detecting valid log indices: We read a log group (see the comments in btt.h + * for a description of a 'log_group' and its 'slots'), and iterate over its + * four slots. We expect that a padding slot will be all-zeroes, and use this + * to detect a padding slot vs. an actual entry. + * + * If a log_group is in the initial state, i.e. hasn't been used since the + * creation of this BTT layout, it will have three of the four slots with + * zeroes. We skip over these log_groups for the detection of log_index. If + * all log_groups are in the initial state (i.e. the BTT has never been + * written to), it is safe to assume the 'new format' of log entries in slots + * (0, 1). + */ +static int log_set_indices(struct arena_info *arena) +{ + bool idx_set = false, initial_state = true; + int ret, log_index[2] = {-1, -1}; + u32 i, j, next_idx = 0; + struct log_group log; + u32 pad_count = 0; + + for (i = 0; i < arena->nfree; i++) { + ret = btt_log_group_read(arena, i, &log); + if (ret < 0) + return ret; + + for (j = 0; j < 4; j++) { + if (!idx_set) { + if (ent_is_padding(&log.ent[j])) { + pad_count++; + continue; + } else { + /* Skip if index has been recorded */ + if ((next_idx == 1) && + (j == log_index[0])) + continue; + /* valid entry, record index */ + log_index[next_idx] = j; + next_idx++; + } + if (next_idx == 2) { + /* two valid entries found */ + idx_set = true; + } else if (next_idx > 2) { + /* too many valid indices */ + return -ENXIO; + } + } else { + /* + * once the indices have been set, just verify + * that all subsequent log groups are either in + * their initial state or follow the same + * indices. + */ + if (j == log_index[0]) { + /* entry must be 'valid' */ + if (ent_is_padding(&log.ent[j])) + return -ENXIO; + } else if (j == log_index[1]) { + ; + /* + * log_index[1] can be padding if the + * lane never got used and it is still + * in the initial state (three 'padding' + * entries) + */ + } else { + /* entry must be invalid (padding) */ + if (!ent_is_padding(&log.ent[j])) + return -ENXIO; + } + } + } + /* + * If any of the log_groups have more than one valid, + * non-padding entry, then the we are no longer in the + * initial_state + */ + if (pad_count < 3) + initial_state = false; + pad_count = 0; + } + + if (!initial_state && !idx_set) + return -ENXIO; + + /* + * If all the entries in the log were in the initial state, + * assume new padding scheme + */ + if (initial_state) + log_index[1] = 1; + + /* + * Only allow the known permutations of log/padding indices, + * i.e. (0, 1), and (0, 2) + */ + if ((log_index[0] == 0) && ((log_index[1] == 1) || (log_index[1] == 2))) + ; /* known index possibilities */ + else { + dev_err(to_dev(arena), "Found an unknown padding scheme\n"); + return -ENXIO; + } + + arena->log_index[0] = log_index[0]; + arena->log_index[1] = log_index[1]; + dev_dbg(to_dev(arena), "log_index_0 = %d\n", log_index[0]); + dev_dbg(to_dev(arena), "log_index_1 = %d\n", log_index[1]); + return 0; +} + static int btt_rtt_init(struct arena_info *arena) { arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL); @@ -545,8 +668,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size, available -= 2 * BTT_PG_SIZE; /* The log takes a fixed amount of space based on nfree */ - logsize = roundup(2 * arena->nfree * sizeof(struct log_entry), - BTT_PG_SIZE); + logsize = roundup(arena->nfree * LOG_GRP_SIZE, BTT_PG_SIZE); available -= logsize; /* Calculate optimal split between map and data area */ @@ -563,6 +685,10 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size, arena->mapoff = arena->dataoff + datasize; arena->logoff = arena->mapoff + mapsize; arena->info2off = arena->logoff + logsize; + + /* Default log indices are (0,1) */ + arena->log_index[0] = 0; + arena->log_index[1] = 1; return arena; } @@ -653,6 +779,13 @@ static int discover_arenas(struct btt *btt) arena->external_lba_start = cur_nlba; parse_arena_meta(arena, super, cur_off); + ret = log_set_indices(arena); + if (ret) { + dev_err(to_dev(arena), + "Unable to deduce log/padding indices\n"); + goto out; + } + ret = btt_freelist_init(arena); if (ret) goto out; diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h index b2f8651e5395..0f80b6b3d4a3 100644 --- a/drivers/nvdimm/btt.h +++ b/drivers/nvdimm/btt.h @@ -26,6 +26,7 @@ #define MAP_ERR_MASK (1 << MAP_ERR_SHIFT) #define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT))) #define MAP_ENT_NORMAL 0xC0000000 +#define LOG_GRP_SIZE sizeof(struct log_group) #define LOG_ENT_SIZE sizeof(struct log_entry) #define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */ #define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */ @@ -44,12 +45,52 @@ enum btt_init_state { INIT_READY }; +/* + * A log group represents one log 'lane', and consists of four log entries. + * Two of the four entries are valid entries, and the remaining two are + * padding. Due to an old bug in the padding location, we need to perform a + * test to determine the padding scheme being used, and use that scheme + * thereafter. + * + * In kernels prior to 4.15, 'log group' would have actual log entries at + * indices (0, 2) and padding at indices (1, 3), where as the correct/updated + * format has log entries at indices (0, 1) and padding at indices (2, 3). + * + * Old (pre 4.15) format: + * +-----------------+-----------------+ + * | ent[0] | ent[1] | + * | 16B | 16B | + * | lba/old/new/seq | pad | + * +-----------------------------------+ + * | ent[2] | ent[3] | + * | 16B | 16B | + * | lba/old/new/seq | pad | + * +-----------------+-----------------+ + * + * New format: + * +-----------------+-----------------+ + * | ent[0] | ent[1] | + * | 16B | 16B | + * | lba/old/new/seq | lba/old/new/seq | + * +-----------------------------------+ + * | ent[2] | ent[3] | + * | 16B | 16B | + * | pad | pad | + * +-----------------+-----------------+ + * + * We detect during start-up which format is in use, and set + * arena->log_index[(0, 1)] with the detected format. + */ + struct log_entry { __le32 lba; __le32 old_map; __le32 new_map; __le32 seq; - __le64 padding[2]; +}; + +struct log_group { + struct log_entry ent[4]; }; struct btt_sb { @@ -117,6 +158,7 @@ struct aligned_lock { * @list: List head for list of arenas * @debugfs_dir: Debugfs dentry * @flags: Arena flags - may signify error states. + * @log_index: Indices of the valid log entries in a log_group * * arena_info is a per-arena handle. Once an arena is narrowed down for an * IO, this struct is passed around for the duration of the IO. @@ -147,6 +189,7 @@ struct arena_info { struct dentry *debugfs_dir; /* Arena flags */ u32 flags; + int log_index[2]; }; /** From bb7119eea22c1764d3aa5edf541872cf5365f172 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 7 Apr 2017 09:34:12 +0200 Subject: [PATCH 101/705] scsi: sg: disable SET_FORCE_LOW_DMA commit 745dfa0d8ec26b24f3304459ff6e9eacc5c8351b upstream. The ioctl SET_FORCE_LOW_DMA has never worked since the initial git check-in, and the respective setting is nowadays handled correctly. So disable it entirely. Signed-off-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Tested-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sg.c | 30 +++++++++--------------------- include/scsi/sg.h | 1 - 2 files changed, 9 insertions(+), 22 deletions(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 184c7db1e0ca..cd9537ddc19f 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -149,7 +149,6 @@ typedef struct sg_fd { /* holds the state of a file descriptor */ struct list_head rq_list; /* head of request list */ struct fasync_struct *async_qp; /* used by asynchronous notification */ Sg_request req_arr[SG_MAX_QUEUE]; /* used as singly-linked list */ - char low_dma; /* as in parent but possibly overridden to 1 */ char force_packid; /* 1 -> pack_id input to read(), 0 -> ignored */ char cmd_q; /* 1 -> allow command queuing, 0 -> don't */ unsigned char next_cmd_len; /* 0: automatic, >0: use on next write() */ @@ -922,24 +921,14 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg) /* strange ..., for backward compatibility */ return sfp->timeout_user; case SG_SET_FORCE_LOW_DMA: - result = get_user(val, ip); - if (result) - return result; - if (val) { - sfp->low_dma = 1; - if ((0 == sfp->low_dma) && !sfp->res_in_use) { - val = (int) sfp->reserve.bufflen; - sg_remove_scat(sfp, &sfp->reserve); - sg_build_reserve(sfp, val); - } - } else { - if (atomic_read(&sdp->detaching)) - return -ENODEV; - sfp->low_dma = sdp->device->host->unchecked_isa_dma; - } + /* + * N.B. This ioctl never worked properly, but failed to + * return an error value. So returning '0' to keep compability + * with legacy applications. + */ return 0; case SG_GET_LOW_DMA: - return put_user((int) sfp->low_dma, ip); + return put_user((int) sdp->device->host->unchecked_isa_dma, ip); case SG_GET_SCSI_ID: if (!access_ok(VERIFY_WRITE, p, sizeof (sg_scsi_id_t))) return -EFAULT; @@ -1860,6 +1849,7 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size) int sg_tablesize = sfp->parentdp->sg_tablesize; int blk_size = buff_size, order; gfp_t gfp_mask = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN; + struct sg_device *sdp = sfp->parentdp; if (blk_size < 0) return -EFAULT; @@ -1885,7 +1875,7 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size) scatter_elem_sz_prev = num; } - if (sfp->low_dma) + if (sdp->device->host->unchecked_isa_dma) gfp_mask |= GFP_DMA; if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) @@ -2148,8 +2138,6 @@ sg_add_sfp(Sg_device * sdp) sfp->timeout = SG_DEFAULT_TIMEOUT; sfp->timeout_user = SG_DEFAULT_TIMEOUT_USER; sfp->force_packid = SG_DEF_FORCE_PACK_ID; - sfp->low_dma = (SG_DEF_FORCE_LOW_DMA == 0) ? - sdp->device->host->unchecked_isa_dma : 1; sfp->cmd_q = SG_DEF_COMMAND_Q; sfp->keep_orphan = SG_DEF_KEEP_ORPHAN; sfp->parentdp = sdp; @@ -2608,7 +2596,7 @@ static void sg_proc_debug_helper(struct seq_file *s, Sg_device * sdp) jiffies_to_msecs(fp->timeout), fp->reserve.bufflen, (int) fp->reserve.k_use_sg, - (int) fp->low_dma); + (int) sdp->device->host->unchecked_isa_dma); seq_printf(s, " cmd_q=%d f_packid=%d k_orphan=%d closed=0\n", (int) fp->cmd_q, (int) fp->force_packid, (int) fp->keep_orphan); diff --git a/include/scsi/sg.h b/include/scsi/sg.h index 3afec7032448..20bc71c3e0b8 100644 --- a/include/scsi/sg.h +++ b/include/scsi/sg.h @@ -197,7 +197,6 @@ typedef struct sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */ #define SG_DEFAULT_RETRIES 0 /* Defaults, commented if they differ from original sg driver */ -#define SG_DEF_FORCE_LOW_DMA 0 /* was 1 -> memory below 16MB on i386 */ #define SG_DEF_FORCE_PACK_ID 0 #define SG_DEF_KEEP_ORPHAN 0 #define SG_DEF_RESERVED_SIZE SG_SCATTER_SZ /* load time option */ From d8a3170db0deca6bfee32ad77f492caba9f6791d Mon Sep 17 00:00:00 2001 From: Li Jinyue Date: Thu, 14 Dec 2017 17:04:54 +0800 Subject: [PATCH 102/705] futex: Prevent overflow by strengthen input validation commit fbe0e839d1e22d88810f3ee3e2f1479be4c0aa4a upstream. UBSAN reports signed integer overflow in kernel/futex.c: UBSAN: Undefined behaviour in kernel/futex.c:2041:18 signed integer overflow: 0 - -2147483648 cannot be represented in type 'int' Add a sanity check to catch negative values of nr_wake and nr_requeue. Signed-off-by: Li Jinyue Signed-off-by: Thomas Gleixner Cc: peterz@infradead.org Cc: dvhart@infradead.org Link: https://lkml.kernel.org/r/1513242294-31786-1-git-send-email-lijinyue@huawei.com Signed-off-by: Greg Kroah-Hartman --- kernel/futex.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/futex.c b/kernel/futex.c index 88bad86180ac..bb2265ae5cbc 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1711,6 +1711,9 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, struct futex_q *this, *next; WAKE_Q(wake_q); + if (nr_wake < 0 || nr_requeue < 0) + return -EINVAL; + if (requeue_pi) { /* * Requeue PI only works on two distinct uaddrs. This From e4ff9f294629b65df3f7982cd4faf86b2856b9d7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 9 Jan 2018 23:11:03 +0100 Subject: [PATCH 103/705] ALSA: seq: Make ioctls race-free commit b3defb791b26ea0683a93a4f49c77ec45ec96f10 upstream. The ALSA sequencer ioctls have no protection against racy calls while the concurrent operations may lead to interfere with each other. As reported recently, for example, the concurrent calls of setting client pool with a combination of write calls may lead to either the unkillable dead-lock or UAF. As a slightly big hammer solution, this patch introduces the mutex to make each ioctl exclusive. Although this may reduce performance via parallel ioctl calls, usually it's not demanded for sequencer usages, hence it should be negligible. Reported-by: Luo Quan Reviewed-by: Kees Cook Reviewed-by: Greg Kroah-Hartman Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/seq/seq_clientmgr.c | 3 +++ sound/core/seq/seq_clientmgr.h | 1 + 2 files changed, 4 insertions(+) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 45ef5915462c..16580a82e1c8 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -221,6 +221,7 @@ static struct snd_seq_client *seq_create_client1(int client_index, int poolsize) rwlock_init(&client->ports_lock); mutex_init(&client->ports_mutex); INIT_LIST_HEAD(&client->ports_list_head); + mutex_init(&client->ioctl_mutex); /* find free slot in the client table */ spin_lock_irqsave(&clients_lock, flags); @@ -2127,7 +2128,9 @@ static long snd_seq_ioctl(struct file *file, unsigned int cmd, return -EFAULT; } + mutex_lock(&client->ioctl_mutex); err = handler->func(client, &buf); + mutex_unlock(&client->ioctl_mutex); if (err >= 0) { /* Some commands includes a bug in 'dir' field. */ if (handler->cmd == SNDRV_SEQ_IOCTL_SET_QUEUE_CLIENT || diff --git a/sound/core/seq/seq_clientmgr.h b/sound/core/seq/seq_clientmgr.h index c6614254ef8a..0611e1e0ed5b 100644 --- a/sound/core/seq/seq_clientmgr.h +++ b/sound/core/seq/seq_clientmgr.h @@ -61,6 +61,7 @@ struct snd_seq_client { struct list_head ports_list_head; rwlock_t ports_lock; struct mutex ports_mutex; + struct mutex ioctl_mutex; int convert32; /* convert 32->64bit */ /* output pool */ From b9e168a0c629761774df687e88fca7e0e47381bf Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 10 Jan 2018 23:48:05 +0100 Subject: [PATCH 104/705] ALSA: pcm: Remove yet superfluous WARN_ON() commit 23b19b7b50fe1867da8d431eea9cd3e4b6328c2c upstream. muldiv32() contains a snd_BUG_ON() (which is morphed as WARN_ON() with debug option) for checking the case of 0 / 0. This would be helpful if this happens only as a logical error; however, since the hw refine is performed with any data set provided by user, the inconsistent values that can trigger such a condition might be passed easily. Actually, syzbot caught this by passing some zero'ed old hw_params ioctl. So, having snd_BUG_ON() there is simply superfluous and rather harmful to give unnecessary confusions. Let's get rid of it. Reported-by: syzbot+7e6ee55011deeebce15d@syzkaller.appspotmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/pcm_lib.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index e685e779a4b8..9306604f5070 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -578,7 +578,6 @@ static inline unsigned int muldiv32(unsigned int a, unsigned int b, { u_int64_t n = (u_int64_t) a * b; if (c == 0) { - snd_BUG_ON(!n); *r = 0; return UINT_MAX; } From fae704d5bd29771974e8cfe5f84480d9871b18cb Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 10 Jan 2018 08:34:28 +0100 Subject: [PATCH 105/705] ALSA: hda - Apply headphone noise quirk for another Dell XPS 13 variant commit e4c9fd10eb21376f44723c40ad12395089251c28 upstream. There is another Dell XPS 13 variant (SSID 1028:082a) that requires the existing fixup for reducing the headphone noise. This patch adds the quirk entry for that. BugLink: http://lkml.kernel.org/r/CAHXyb9ZCZJzVisuBARa+UORcjRERV8yokez=DP1_5O5isTz0ZA@mail.gmail.com Reported-and-tested-by: Francisco G. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 4ef3b0067876..71a058fcf884 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5617,6 +5617,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE), SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME), SND_PCI_QUIRK(0x1028, 0x0798, "Dell Inspiron 17 7000 Gaming", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER), + SND_PCI_QUIRK(0x1028, 0x082a, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), From 4b6e681f595224618feae64cb8299224e218f1eb Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 10 Jan 2018 10:53:18 +0100 Subject: [PATCH 106/705] ALSA: hda - Apply the existing quirk to iMac 14,1 commit 031f335cda879450095873003abb03ae8ed3b74a upstream. iMac 14,1 requires the same quirk as iMac 12,2, using GPIO 2 and 3 for headphone and speaker output amps. Add the codec SSID quirk entry (106b:0600) accordingly. BugLink: http://lkml.kernel.org/r/CAEw6Zyteav09VGHRfD5QwsfuWv5a43r0tFBNbfcHXoNrxVz7ew@mail.gmail.com Reported-by: Freaky Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_cirrus.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index 80bbadc83721..d6e079f4ec09 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -408,6 +408,7 @@ static const struct snd_pci_quirk cs420x_fixup_tbl[] = { /*SND_PCI_QUIRK(0x8086, 0x7270, "IMac 27 Inch", CS420X_IMAC27),*/ /* codec SSID */ + SND_PCI_QUIRK(0x106b, 0x0600, "iMac 14,1", CS420X_IMAC27_122), SND_PCI_QUIRK(0x106b, 0x1c00, "MacBookPro 8,1", CS420X_MBP81), SND_PCI_QUIRK(0x106b, 0x2000, "iMac 12,2", CS420X_IMAC27_122), SND_PCI_QUIRK(0x106b, 0x2800, "MacBookPro 10,1", CS420X_MBP101), From 676109b28cadbb9c6e6fedee733f3b2111923f17 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Jan 2018 23:19:49 +0100 Subject: [PATCH 107/705] timers: Unconditionally check deferrable base commit ed4bbf7910b28ce3c691aef28d245585eaabda06 upstream. When the timer base is checked for expired timers then the deferrable base must be checked as well. This was missed when making the deferrable base independent of base::nohz_active. Fixes: ced6d5c11d3e ("timers: Use deferrable base independent of base::nohz_active") Signed-off-by: Thomas Gleixner Cc: Anna-Maria Gleixner Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Paul McKenney Cc: rt@linutronix.de Signed-off-by: Greg Kroah-Hartman --- kernel/time/timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index e872f7f05e8a..2d5cc7dfee14 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1696,7 +1696,7 @@ void run_local_timers(void) hrtimer_run_queues(); /* Raise the softirq only if required. */ if (time_before(jiffies, base->clk)) { - if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active) + if (!IS_ENABLED(CONFIG_NO_HZ_COMMON)) return; /* CPU is awake, so check the deferrable base. */ base++; From e4dc05ab8f5af1e26064f674fb475f44e687b397 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 29 Dec 2017 18:13:05 -0600 Subject: [PATCH 108/705] af_key: fix buffer overread in verify_address_len() commit 06b335cb51af018d5feeff5dd4fd53847ddb675a upstream. If a message sent to a PF_KEY socket ended with one of the extensions that takes a 'struct sadb_address' but there were not enough bytes remaining in the message for the ->sa_family member of the 'struct sockaddr' which is supposed to follow, then verify_address_len() read past the end of the message, into uninitialized memory. Fix it by returning -EINVAL in this case. This bug was found using syzkaller with KMSAN. Reproducer: #include #include #include int main() { int sock = socket(PF_KEY, SOCK_RAW, PF_KEY_V2); char buf[24] = { 0 }; struct sadb_msg *msg = (void *)buf; struct sadb_address *addr = (void *)(msg + 1); msg->sadb_msg_version = PF_KEY_V2; msg->sadb_msg_type = SADB_DELETE; msg->sadb_msg_len = 3; addr->sadb_address_len = 1; addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC; write(sock, buf, 24); } Reported-by: Alexander Potapenko Signed-off-by: Eric Biggers Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/key/af_key.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/key/af_key.c b/net/key/af_key.c index 94bf810ad242..ea81a58a4ff6 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -401,6 +401,11 @@ static int verify_address_len(const void *p) #endif int len; + if (sp->sadb_address_len < + DIV_ROUND_UP(sizeof(*sp) + offsetofend(typeof(*addr), sa_family), + sizeof(uint64_t))) + return -EINVAL; + switch (addr->sa_family) { case AF_INET: len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin), sizeof(uint64_t)); From 0476e6d0b7521d78cbdc9e99e6cbbf616d849042 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 29 Dec 2017 18:15:23 -0600 Subject: [PATCH 109/705] af_key: fix buffer overread in parse_exthdrs() commit 4e765b4972af7b07adcb1feb16e7a525ce1f6b28 upstream. If a message sent to a PF_KEY socket ended with an incomplete extension header (fewer than 4 bytes remaining), then parse_exthdrs() read past the end of the message, into uninitialized memory. Fix it by returning -EINVAL in this case. Reproducer: #include #include #include int main() { int sock = socket(PF_KEY, SOCK_RAW, PF_KEY_V2); char buf[17] = { 0 }; struct sadb_msg *msg = (void *)buf; msg->sadb_msg_version = PF_KEY_V2; msg->sadb_msg_type = SADB_DELETE; msg->sadb_msg_len = 2; write(sock, buf, 17); } Signed-off-by: Eric Biggers Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/key/af_key.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/key/af_key.c b/net/key/af_key.c index ea81a58a4ff6..6482b001f19a 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -516,6 +516,9 @@ static int parse_exthdrs(struct sk_buff *skb, const struct sadb_msg *hdr, void * uint16_t ext_type; int ext_len; + if (len < sizeof(*ehdr)) + return -EINVAL; + ext_len = ehdr->sadb_ext_len; ext_len *= sizeof(uint64_t); ext_type = ehdr->sadb_ext_type; From d303d0ca9afb8e5f644e1392c684d44308bab96e Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 26 Nov 2017 15:31:04 +0200 Subject: [PATCH 110/705] iser-target: Fix possible use-after-free in connection establishment error commit cd52cb26e7ead5093635e98e07e221e4df482d34 upstream. In case we fail to establish the connection we must drain our pre-posted login recieve work request before continuing safely with connection teardown. Fixes: a060b5629ab0 ("IB/core: generic RDMA READ/WRITE API") Reported-by: Amrani, Ram Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/isert/ib_isert.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 39d28375aa37..0983470929bd 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -747,6 +747,7 @@ isert_connect_error(struct rdma_cm_id *cma_id) { struct isert_conn *isert_conn = cma_id->qp->qp_context; + ib_drain_qp(isert_conn->qp); list_del_init(&isert_conn->node); isert_conn->cm_id = NULL; isert_put_conn(isert_conn); From 997231f9fd7a4a8ad7c862736ab571e99d3800cb Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Mon, 20 Mar 2017 16:42:48 +0100 Subject: [PATCH 111/705] scsi: hpsa: fix volume offline state commit eb94588dabec82e012281608949a860f64752914 upstream. In a previous patch a hpsa_scsi_dev_t.volume_offline update line has been removed, so let us put it back.. Fixes: 85b29008d8 (hpsa: update check for logical volume status) Signed-off-by: Tomas Henzl Acked-by: Don Brace Signed-off-by: Martin K. Petersen Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/hpsa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 99623701fc3d..0b8db8a74d50 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -3857,6 +3857,7 @@ static int hpsa_update_device_info(struct ctlr_info *h, if (h->fw_support & MISC_FW_RAID_OFFLOAD_BASIC) hpsa_get_ioaccel_status(h, scsi3addr, this_device); volume_offline = hpsa_volume_offline(h, scsi3addr); + this_device->volume_offline = volume_offline; if (volume_offline == HPSA_LV_FAILED) { rc = HPSA_LV_FAILED; dev_err(&h->pdev->dev, From 1ad4f2872c3b93216313a18bbf6e9f6d90e573d3 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 10 May 2017 21:03:37 +0800 Subject: [PATCH 112/705] sched/deadline: Zero out positive runtime after throttling constrained tasks commit ae83b56a56f8d9643dedbee86b457fa1c5d42f59 upstream. When a contrained task is throttled by dl_check_constrained_dl(), it may carry the remaining positive runtime, as a result when dl_task_timer() fires and calls replenish_dl_entity(), it will not be replenished correctly due to the positive dl_se->runtime. This patch assigns its runtime to 0 if positive after throttling. Signed-off-by: Xunlei Pang Signed-off-by: Peter Zijlstra (Intel) Acked-by: Daniel Bristot de Oliveira Cc: Juri Lelli Cc: Linus Torvalds Cc: Luca Abeni Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Fixes: df8eac8cafce ("sched/deadline: Throttle a constrained deadline task activated after the deadline) Link: http://lkml.kernel.org/r/1494421417-27550-1-git-send-email-xlpang@redhat.com Signed-off-by: Ingo Molnar Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- kernel/sched/deadline.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index df5c32a0c6ed..3042881169b4 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -723,6 +723,8 @@ static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se) if (unlikely(dl_se->dl_boosted || !start_dl_timer(p))) return; dl_se->dl_throttled = 1; + if (dl_se->runtime > 0) + dl_se->runtime = 0; } } From abf67b1e788194a2d13a8e77f05a44cbf9eae655 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 12 Jan 2018 17:49:25 +0000 Subject: [PATCH 113/705] x86/retpoline: Fill RSB on context switch for affected CPUs commit c995efd5a740d9cbafbf58bde4973e8b50b4d761 upstream. On context switch from a shallow call stack to a deeper one, as the CPU does 'ret' up the deeper side it may encounter RSB entries (predictions for where the 'ret' goes to) which were populated in userspace. This is problematic if neither SMEP nor KPTI (the latter of which marks userspace pages as NX for the kernel) are active, as malicious code in userspace may then be executed speculatively. Overwrite the CPU's return prediction stack with calls which are predicted to return to an infinite loop, to "capture" speculation if this happens. This is required both for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI. On Skylake+ the problem is slightly different, and an *underflow* of the RSB may cause errant branch predictions to occur. So there it's not so much overwrite, as *filling* the RSB to attempt to prevent it getting empty. This is only a partial solution for Skylake+ since there are many other conditions which may result in the RSB becoming empty. The full solution on Skylake+ is to use IBRS, which will prevent the problem even when the RSB becomes empty. With IBRS, the RSB-stuffing will not be required on context switch. [ tglx: Added missing vendor check and slighty massaged comments and changelog ] Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-dwmw@amazon.co.uk Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_32.S | 11 +++++++++ arch/x86/entry/entry_64.S | 11 +++++++++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/bugs.c | 36 ++++++++++++++++++++++++++++++ 4 files changed, 59 insertions(+) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index bdc9aeaf2e45..a76dc738ec61 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -229,6 +229,17 @@ ENTRY(__switch_to_asm) movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* + * When switching from a shallower to a deeper call stack + * the RSB may either underflow or use entries populated + * with userspace addresses. On CPUs where those concerns + * exist, overwrite the RSB with entries which capture + * speculative execution to prevent attack. + */ + FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popl %esi popl %edi diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index b9c901ce6582..3a15023bdf1a 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -427,6 +427,17 @@ ENTRY(__switch_to_asm) movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* + * When switching from a shallower to a deeper call stack + * the RSB may either underflow or use entries populated + * with userspace addresses. On CPUs where those concerns + * exist, overwrite the RSB with entries which capture + * speculative execution to prevent attack. + */ + FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popq %r15 popq %r14 diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 4467568a531b..2f60cb5e9360 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -200,6 +200,7 @@ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 49d25ddf0e9f..8cacf62ec458 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -22,6 +22,7 @@ #include #include #include +#include static void __init spectre_v2_select_mitigation(void); @@ -154,6 +155,23 @@ disable: return SPECTRE_V2_CMD_NONE; } +/* Check for Skylake-like CPUs (for RSB handling) */ +static bool __init is_skylake_era(void) +{ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 6) { + switch (boot_cpu_data.x86_model) { + case INTEL_FAM6_SKYLAKE_MOBILE: + case INTEL_FAM6_SKYLAKE_DESKTOP: + case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_KABYLAKE_MOBILE: + case INTEL_FAM6_KABYLAKE_DESKTOP: + return true; + } + } + return false; +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -212,6 +230,24 @@ retpoline_auto: spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); + + /* + * If neither SMEP or KPTI are available, there is a risk of + * hitting userspace addresses in the RSB after a context switch + * from a shallow call stack to a deeper one. To prevent this fill + * the entire RSB, even when using IBRS. + * + * Skylake era CPUs have a separate issue with *underflow* of the + * RSB, when they will predict 'ret' targets from the generic BTB. + * The proper mitigation for this is IBRS. If IBRS is not supported + * or deactivated in favour of retpolines the RSB fill on context + * switch is required. + */ + if ((!boot_cpu_has(X86_FEATURE_KAISER) && + !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); + pr_info("Filling RSB on context switch\n"); + } } #undef pr_fmt From b73d68788f79a4906a5ef977fca58297a3d22482 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Sat, 13 Jan 2018 17:27:30 -0600 Subject: [PATCH 114/705] x86/retpoline: Add LFENCE to the retpoline/RSB filling RSB macros commit 28d437d550e1e39f805d99f9f8ac399c778827b7 upstream. The PAUSE instruction is currently used in the retpoline and RSB filling macros as a speculation trap. The use of PAUSE was originally suggested because it showed a very, very small difference in the amount of cycles/time used to execute the retpoline as compared to LFENCE. On AMD, the PAUSE instruction is not a serializing instruction, so the pause/jmp loop will use excess power as it is speculated over waiting for return to mispredict to the correct target. The RSB filling macro is applicable to AMD, and, if software is unable to verify that LFENCE is serializing on AMD (possible when running under a hypervisor), the generic retpoline support will be used and, so, is also applicable to AMD. Keep the current usage of PAUSE for Intel, but add an LFENCE instruction to the speculation trap for AMD. The same sequence has been adopted by GCC for the GCC generated retpolines. Signed-off-by: Tom Lendacky Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Acked-by: David Woodhouse Acked-by: Arjan van de Ven Cc: Rik van Riel Cc: Andi Kleen Cc: Paul Turner Cc: Peter Zijlstra Cc: Tim Chen Cc: Jiri Kosina Cc: Dave Hansen Cc: Andy Lutomirski Cc: Josh Poimboeuf Cc: Dan Williams Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: Kees Cook Link: https://lkml.kernel.org/r/20180113232730.31060.36287.stgit@tlendack-t1.amdoffice.net Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 402a11c803c3..7b45d8424150 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -11,7 +11,7 @@ * Fill the CPU return stack buffer. * * Each entry in the RSB, if used for a speculative 'ret', contains an - * infinite 'pause; jmp' loop to capture speculative execution. + * infinite 'pause; lfence; jmp' loop to capture speculative execution. * * This is required in various cases for retpoline and IBRS-based * mitigations for the Spectre variant 2 vulnerability. Sometimes to @@ -38,11 +38,13 @@ call 772f; \ 773: /* speculation trap */ \ pause; \ + lfence; \ jmp 773b; \ 772: \ call 774f; \ 775: /* speculation trap */ \ pause; \ + lfence; \ jmp 775b; \ 774: \ dec reg; \ @@ -73,6 +75,7 @@ call .Ldo_rop_\@ .Lspec_trap_\@: pause + lfence jmp .Lspec_trap_\@ .Ldo_rop_\@: mov \reg, (%_ASM_SP) @@ -165,6 +168,7 @@ " .align 16\n" \ "901: call 903f;\n" \ "902: pause;\n" \ + " lfence;\n" \ " jmp 902b;\n" \ " .align 16\n" \ "903: addl $4, %%esp;\n" \ From 13ccac5de85305cd1388ffa8b1e20b010373d76f Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 15 Jan 2018 08:17:08 -0600 Subject: [PATCH 115/705] objtool: Improve error message for bad file argument commit 385d11b152c4eb638eeb769edcb3249533bb9a00 upstream. If a nonexistent file is supplied to objtool, it complains with a non-helpful error: open: No such file or directory Improve it to: objtool: Can't open 'foo': No such file or directory Reported-by: Markus Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/406a3d00a21225eee2819844048e17f68523ccf6.1516025651.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/objtool/elf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index d897702ce742..faacf0c89976 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "elf.h" #include "warn.h" @@ -370,7 +371,8 @@ struct elf *elf_open(const char *name) elf->fd = open(name, O_RDONLY); if (elf->fd == -1) { - perror("open"); + fprintf(stderr, "objtool: Can't open '%s': %s\n", + name, strerror(errno)); goto err; } From a96cf98dda3f06a5e5eb3d393dba35a9424a7e13 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 16 Jan 2018 16:42:25 +0100 Subject: [PATCH 116/705] x86/cpufeature: Move processor tracing out of scattered features MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4fdec2034b7540dda461c6ba33325dfcff345c64 upstream. Processor tracing is already enumerated in word 9 (CPUID[7,0].EBX), so do not duplicate it in the scattered features word. Besides being more tidy, this will be useful for KVM when it presents processor tracing to the guests. KVM selects host features that are supported by both the host kernel (depending on command line options, CPU errata, or whatever) and KVM. Whenever a full feature word exists, KVM's code is written in the expectation that the CPUID bit number matches the X86_FEATURE_* bit number, but this is not the case for X86_FEATURE_INTEL_PT. Signed-off-by: Paolo Bonzini Cc: Borislav Petkov Cc: Linus Torvalds Cc: Luwei Kang Cc: Peter Zijlstra Cc: Radim Krčmář Cc: Thomas Gleixner Cc: kvm@vger.kernel.org Link: http://lkml.kernel.org/r/1516117345-34561-1-git-send-email-pbonzini@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/kernel/cpu/scattered.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 2f60cb5e9360..8537a21acd8b 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -197,7 +197,6 @@ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ @@ -236,6 +235,7 @@ #define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ #define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ +#define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */ #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ #define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ #define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 1db8dc490b66..b0dd9aec183d 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -31,7 +31,6 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) const struct cpuid_bit *cb; static const struct cpuid_bit cpuid_bits[] = { - { X86_FEATURE_INTEL_PT, CR_EBX,25, 0x00000007, 0 }, { X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 }, { X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 }, { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, From eee0cba7b02f21110a443867401b41dc25a60777 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 16 Jan 2018 12:52:28 -0800 Subject: [PATCH 117/705] module: Add retpoline tag to VERMAGIC commit 6cfb521ac0d5b97470883ff9b7facae264b7ab12 upstream. Add a marker for retpoline to the module VERMAGIC. This catches the case when a non RETPOLINE compiled module gets loaded into a retpoline kernel, making it insecure. It doesn't handle the case when retpoline has been runtime disabled. Even in this case the match of the retcompile status will be enforced. This implies that even with retpoline run time disabled all modules loaded need to be recompiled. Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Acked-by: David Woodhouse Cc: rusty@rustcorp.com.au Cc: arjan.van.de.ven@intel.com Cc: jeyu@kernel.org Cc: torvalds@linux-foundation.org Link: https://lkml.kernel.org/r/20180116205228.4890-1-andi@firstfloor.org Signed-off-by: Greg Kroah-Hartman --- include/linux/vermagic.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h index 6f8fbcf10dfb..a3d04934aa96 100644 --- a/include/linux/vermagic.h +++ b/include/linux/vermagic.h @@ -24,10 +24,16 @@ #ifndef MODULE_ARCH_VERMAGIC #define MODULE_ARCH_VERMAGIC "" #endif +#ifdef RETPOLINE +#define MODULE_VERMAGIC_RETPOLINE "retpoline " +#else +#define MODULE_VERMAGIC_RETPOLINE "" +#endif #define VERMAGIC_STRING \ UTS_RELEASE " " \ MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT \ MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_MODVERSIONS \ - MODULE_ARCH_VERMAGIC + MODULE_ARCH_VERMAGIC \ + MODULE_VERMAGIC_RETPOLINE From 5ab44e8f0f0d6cde6f7b77eff73ca27bce499950 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 12 Jan 2018 14:31:35 -0600 Subject: [PATCH 118/705] x86/mm/pkeys: Fix fill_sig_info_pkey commit beacd6f7ed5e2915959442245b3b2480c2e37490 upstream. SEGV_PKUERR is a signal specific si_code which happens to have the same numeric value as several others: BUS_MCEERR_AR, ILL_ILLTRP, FPE_FLTOVF, TRAP_HWBKPT, CLD_TRAPPED, POLL_ERR, SEGV_THREAD_ID, as such it is not safe to just test the si_code the signal number must also be tested to prevent a false positive in fill_sig_info_pkey. This error was by inspection, and BUS_MCEERR_AR appears to be a real candidate for confusion. So pass in si_signo and check for SIG_SEGV to verify that it is actually a SEGV_PKUERR Fixes: 019132ff3daf ("x86/mm/pkeys: Fill in pkey field in siginfo") Signed-off-by: "Eric W. Biederman" Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Dave Hansen Cc: Oleg Nesterov Cc: Al Viro Link: https://lkml.kernel.org/r/20180112203135.4669-2-ebiederm@xmission.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/fault.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 8b5ff88aa4f8..74dea7f14c20 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -191,14 +191,15 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really * faulted on a pte with its pkey=4. */ -static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey) +static void fill_sig_info_pkey(int si_signo, int si_code, siginfo_t *info, + u32 *pkey) { /* This is effectively an #ifdef */ if (!boot_cpu_has(X86_FEATURE_OSPKE)) return; /* Fault not from Protection Keys: nothing to do */ - if (si_code != SEGV_PKUERR) + if ((si_code != SEGV_PKUERR) || (si_signo != SIGSEGV)) return; /* * force_sig_info_fault() is called from a number of @@ -237,7 +238,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address, lsb = PAGE_SHIFT; info.si_addr_lsb = lsb; - fill_sig_info_pkey(si_code, &info, pkey); + fill_sig_info_pkey(si_signo, si_code, &info, pkey); force_sig_info(si_signo, &info, tsk); } From 02802dfc82a2bd88a359c4ac85807c4d4d08f9bb Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 22 Dec 2017 00:27:55 -0500 Subject: [PATCH 119/705] x86/tsc: Fix erroneous TSC rate on Skylake Xeon commit b511203093489eb1829cb4de86e8214752205ac6 upstream. The INTEL_FAM6_SKYLAKE_X hardcoded crystal_khz value of 25MHZ is problematic: - SKX workstations (with same model # as server variants) use a 24 MHz crystal. This results in a -4.0% time drift rate on SKX workstations. - SKX servers subject the crystal to an EMI reduction circuit that reduces its actual frequency by (approximately) -0.25%. This results in -1 second per 10 minute time drift as compared to network time. This issue can also trigger a timer and power problem, on configurations that use the LAPIC timer (versus the TSC deadline timer). Clock ticks scheduled with the LAPIC timer arrive a few usec before the time they are expected (according to the slow TSC). This causes Linux to poll-idle, when it should be in an idle power saving state. The idle and clock code do not graciously recover from this error, sometimes resulting in significant polling and measurable power impact. Stop using native_calibrate_tsc() for INTEL_FAM6_SKYLAKE_X. native_calibrate_tsc() will return 0, boot will run with tsc_khz = cpu_khz, and the TSC refined calibration will update tsc_khz to correct for the difference. [ tglx: Sanitized change log ] Fixes: 6baf3d61821f ("x86/tsc: Add additional Intel CPU models to the crystal quirk list") Signed-off-by: Len Brown Signed-off-by: Thomas Gleixner Cc: peterz@infradead.org Cc: Prarit Bhargava Link: https://lkml.kernel.org/r/ff6dcea166e8ff8f2f6a03c17beab2cb436aa779.1513920414.git.len.brown@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/tsc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 44bf5cf417d3..d07a9390023e 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -693,7 +693,6 @@ unsigned long native_calibrate_tsc(void) case INTEL_FAM6_KABYLAKE_DESKTOP: crystal_khz = 24000; /* 24.0 MHz */ break; - case INTEL_FAM6_SKYLAKE_X: case INTEL_FAM6_ATOM_DENVERTON: crystal_khz = 25000; /* 25.0 MHz */ break; From 5b13f593565f98a68f8e97415a16589d9826fa79 Mon Sep 17 00:00:00 2001 From: Joe Lawrence Date: Fri, 17 Nov 2017 15:29:21 -0800 Subject: [PATCH 120/705] pipe: avoid round_pipe_size() nr_pages overflow on 32-bit commit d3f14c485867cfb2e0c48aa88c41d0ef4bf5209c upstream. round_pipe_size() contains a right-bit-shift expression which may overflow, which would cause undefined results in a subsequent roundup_pow_of_two() call. static inline unsigned int round_pipe_size(unsigned int size) { unsigned long nr_pages; nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; return roundup_pow_of_two(nr_pages) << PAGE_SHIFT; } PAGE_SIZE is defined as (1UL << PAGE_SHIFT), so: - 4 bytes wide on 32-bit (0 to 0xffffffff) - 8 bytes wide on 64-bit (0 to 0xffffffffffffffff) That means that 32-bit round_pipe_size(), nr_pages may overflow to 0: size=0x00000000 nr_pages=0x0 size=0x00000001 nr_pages=0x1 size=0xfffff000 nr_pages=0xfffff size=0xfffff001 nr_pages=0x0 << ! size=0xffffffff nr_pages=0x0 << ! This is bad because roundup_pow_of_two(n) is undefined when n == 0! 64-bit is not a problem as the unsigned int size is 4 bytes wide (similar to 32-bit) and the larger, 8 byte wide unsigned long, is sufficient to handle the largest value of the bit shift expression: size=0xffffffff nr_pages=100000 Modify round_pipe_size() to return 0 if n == 0 and updates its callers to handle accordingly. Link: http://lkml.kernel.org/r/1507658689-11669-3-git-send-email-joe.lawrence@redhat.com Signed-off-by: Joe Lawrence Reported-by: Mikulas Patocka Reviewed-by: Mikulas Patocka Cc: Al Viro Cc: Jens Axboe Cc: Michael Kerrisk Cc: Randy Dunlap Cc: Josh Poimboeuf Signed-off-by: Andrew Morton Signed-off-by: Dong Jinguang Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/pipe.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 8e0d9f26dfad..9faecf1b4a27 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1018,13 +1018,19 @@ const struct file_operations pipefifo_fops = { /* * Currently we rely on the pipe array holding a power-of-2 number - * of pages. + * of pages. Returns 0 on error. */ static inline unsigned int round_pipe_size(unsigned int size) { unsigned long nr_pages; + if (size < pipe_min_size) + size = pipe_min_size; + nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; + if (nr_pages == 0) + return 0; + return roundup_pow_of_two(nr_pages) << PAGE_SHIFT; } @@ -1040,6 +1046,8 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) long ret = 0; size = round_pipe_size(arg); + if (size == 0) + return -EINVAL; nr_pages = size >> PAGE_SHIFT; if (!nr_pages) @@ -1123,13 +1131,18 @@ out_revert_acct: int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf, size_t *lenp, loff_t *ppos) { + unsigned int rounded_pipe_max_size; int ret; ret = proc_dointvec_minmax(table, write, buf, lenp, ppos); if (ret < 0 || !write) return ret; - pipe_max_size = round_pipe_size(pipe_max_size); + rounded_pipe_max_size = round_pipe_size(pipe_max_size); + if (rounded_pipe_max_size == 0) + return -EINVAL; + + pipe_max_size = rounded_pipe_max_size; return ret; } From c557481a9491fe3c711ce2dd490fecac55950eab Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 16 Jan 2018 12:20:18 +0100 Subject: [PATCH 121/705] x86/apic/vector: Fix off by one in error path commit 45d55e7bac4028af93f5fa324e69958a0b868e96 upstream. Keith reported the following warning: WARNING: CPU: 28 PID: 1420 at kernel/irq/matrix.c:222 irq_matrix_remove_managed+0x10f/0x120 x86_vector_free_irqs+0xa1/0x180 x86_vector_alloc_irqs+0x1e4/0x3a0 msi_domain_alloc+0x62/0x130 The reason for this is that if the vector allocation fails the error handling code tries to free the failed vector as well, which causes the above imbalance warning to trigger. Adjust the error path to handle this correctly. Fixes: b5dc8e6c21e7 ("x86/irq: Use hierarchical irqdomain to manage CPU interrupt vectors") Reported-by: Keith Busch Signed-off-by: Thomas Gleixner Tested-by: Keith Busch Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801161217300.1823@nanos Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/vector.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index f3557a1eb562..b5229abd1629 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -361,14 +361,17 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, irq_data->chip_data = data; irq_data->hwirq = virq + i; err = assign_irq_vector_policy(virq + i, node, data, info); - if (err) + if (err) { + irq_data->chip_data = NULL; + free_apic_chip_data(data); goto error; + } } return 0; error: - x86_vector_free_irqs(domain, virq, i + 1); + x86_vector_free_irqs(domain, virq, i); return err; } From 9792f9b483cddf47ec355fbc165c763fc7c24c32 Mon Sep 17 00:00:00 2001 From: Jiada Wang Date: Sun, 9 Apr 2017 20:02:37 -0700 Subject: [PATCH 122/705] perf tools: Fix build with ARCH=x86_64 commit 7a759cd8e8272ee18922838ee711219c7c796a31 upstream. With commit: 0a943cb10ce78 (tools build: Add HOSTARCH Makefile variable) when building for ARCH=x86_64, ARCH=x86_64 is passed to perf instead of ARCH=x86, so the perf build process searchs header files from tools/arch/x86_64/include, which doesn't exist. The following build failure is seen: In file included from util/event.c:2:0: tools/include/uapi/linux/mman.h:4:27: fatal error: uapi/asm/mman.h: No such file or directory compilation terminated. Fix this issue by using SRCARCH instead of ARCH in perf, just like the main kernel Makefile and tools/objtool's. Signed-off-by: Jiada Wang Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Eugeniu Rosca Cc: Jan Stancek Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rui Teng Cc: Sukadev Bhattiprolu Cc: Wang Nan Fixes: 0a943cb10ce7 ("tools build: Add HOSTARCH Makefile variable") Link: http://lkml.kernel.org/r/1491793357-14977-2-git-send-email-jiada_wang@mentor.com Signed-off-by: Arnaldo Carvalho de Melo Cc: Tuomas Tynkkynen Signed-off-by: Greg Kroah-Hartman --- tools/perf/Makefile.config | 38 ++++++++++++++++++------------------- tools/perf/Makefile.perf | 2 +- tools/perf/arch/Build | 2 +- tools/perf/pmu-events/Build | 4 ++-- tools/perf/tests/Build | 2 +- tools/perf/util/header.c | 2 +- 6 files changed, 25 insertions(+), 25 deletions(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index cffdd9cf3ebf..ff375310efe4 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -19,18 +19,18 @@ CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS) include $(srctree)/tools/scripts/Makefile.arch -$(call detected_var,ARCH) +$(call detected_var,SRCARCH) NO_PERF_REGS := 1 # Additional ARCH settings for ppc -ifeq ($(ARCH),powerpc) +ifeq ($(SRCARCH),powerpc) NO_PERF_REGS := 0 LIBUNWIND_LIBS := -lunwind -lunwind-ppc64 endif # Additional ARCH settings for x86 -ifeq ($(ARCH),x86) +ifeq ($(SRCARCH),x86) $(call detected,CONFIG_X86) ifeq (${IS_64_BIT}, 1) CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE -I$(OUTPUT)arch/x86/include/generated @@ -43,12 +43,12 @@ ifeq ($(ARCH),x86) NO_PERF_REGS := 0 endif -ifeq ($(ARCH),arm) +ifeq ($(SRCARCH),arm) NO_PERF_REGS := 0 LIBUNWIND_LIBS = -lunwind -lunwind-arm endif -ifeq ($(ARCH),arm64) +ifeq ($(SRCARCH),arm64) NO_PERF_REGS := 0 LIBUNWIND_LIBS = -lunwind -lunwind-aarch64 endif @@ -61,7 +61,7 @@ endif # Disable it on all other architectures in case libdw unwind # support is detected in system. Add supported architectures # to the check. -ifneq ($(ARCH),$(filter $(ARCH),x86 arm)) +ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm)) NO_LIBDW_DWARF_UNWIND := 1 endif @@ -115,9 +115,9 @@ endif FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS) FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf -FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi +FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi # include ARCH specific config --include $(src-perf)/arch/$(ARCH)/Makefile +-include $(src-perf)/arch/$(SRCARCH)/Makefile ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET CFLAGS += -DHAVE_ARCH_REGS_QUERY_REGISTER_OFFSET @@ -205,12 +205,12 @@ ifeq ($(DEBUG),0) endif CFLAGS += -I$(src-perf)/util/include -CFLAGS += -I$(src-perf)/arch/$(ARCH)/include +CFLAGS += -I$(src-perf)/arch/$(SRCARCH)/include CFLAGS += -I$(srctree)/tools/include/uapi CFLAGS += -I$(srctree)/tools/include/ -CFLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/uapi -CFLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/ -CFLAGS += -I$(srctree)/tools/arch/$(ARCH)/ +CFLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi +CFLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/ +CFLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/ # $(obj-perf) for generated common-cmds.h # $(obj-perf)/util for generated bison/flex headers @@ -321,7 +321,7 @@ ifndef NO_LIBELF ifndef NO_DWARF ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined) - msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled); + msg := $(warning DWARF register mappings have not been defined for architecture $(SRCARCH), DWARF support disabled); NO_DWARF := 1 else CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS) @@ -346,7 +346,7 @@ ifndef NO_LIBELF CFLAGS += -DHAVE_BPF_PROLOGUE $(call detected,CONFIG_BPF_PROLOGUE) else - msg := $(warning BPF prologue is not supported by architecture $(ARCH), missing regs_query_register_offset()); + msg := $(warning BPF prologue is not supported by architecture $(SRCARCH), missing regs_query_register_offset()); endif else msg := $(warning DWARF support is off, BPF prologue is disabled); @@ -372,7 +372,7 @@ ifdef PERF_HAVE_JITDUMP endif endif -ifeq ($(ARCH),powerpc) +ifeq ($(SRCARCH),powerpc) ifndef NO_DWARF CFLAGS += -DHAVE_SKIP_CALLCHAIN_IDX endif @@ -453,7 +453,7 @@ else endif ifndef NO_LOCAL_LIBUNWIND - ifeq ($(ARCH),$(filter $(ARCH),arm arm64)) + ifeq ($(SRCARCH),$(filter $(SRCARCH),arm arm64)) $(call feature_check,libunwind-debug-frame) ifneq ($(feature-libunwind-debug-frame), 1) msg := $(warning No debug_frame support found in libunwind); @@ -717,7 +717,7 @@ ifeq (${IS_64_BIT}, 1) NO_PERF_READ_VDSO32 := 1 endif endif - ifneq ($(ARCH), x86) + ifneq ($(SRCARCH), x86) NO_PERF_READ_VDSOX32 := 1 endif ifndef NO_PERF_READ_VDSOX32 @@ -746,7 +746,7 @@ ifdef LIBBABELTRACE endif ifndef NO_AUXTRACE - ifeq ($(ARCH),x86) + ifeq ($(SRCARCH),x86) ifeq ($(feature-get_cpuid), 0) msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc); NO_AUXTRACE := 1 @@ -793,7 +793,7 @@ sysconfdir = $(prefix)/etc ETC_PERFCONFIG = etc/perfconfig endif ifndef lib -ifeq ($(ARCH)$(IS_64_BIT), x861) +ifeq ($(SRCARCH)$(IS_64_BIT), x861) lib = lib64 else lib = lib diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index ef52d1e3d431..2b92ffef554b 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -192,7 +192,7 @@ endif ifeq ($(config),0) include $(srctree)/tools/scripts/Makefile.arch --include arch/$(ARCH)/Makefile +-include arch/$(SRCARCH)/Makefile endif # The FEATURE_DUMP_EXPORT holds location of the actual diff --git a/tools/perf/arch/Build b/tools/perf/arch/Build index 109eb75cf7de..d9b6af837c7d 100644 --- a/tools/perf/arch/Build +++ b/tools/perf/arch/Build @@ -1,2 +1,2 @@ libperf-y += common.o -libperf-y += $(ARCH)/ +libperf-y += $(SRCARCH)/ diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build index 9213a1273697..999a4e878162 100644 --- a/tools/perf/pmu-events/Build +++ b/tools/perf/pmu-events/Build @@ -2,7 +2,7 @@ hostprogs := jevents jevents-y += json.o jsmn.o jevents.o pmu-events-y += pmu-events.o -JDIR = pmu-events/arch/$(ARCH) +JDIR = pmu-events/arch/$(SRCARCH) JSON = $(shell [ -d $(JDIR) ] && \ find $(JDIR) -name '*.json' -o -name 'mapfile.csv') # @@ -10,4 +10,4 @@ JSON = $(shell [ -d $(JDIR) ] && \ # directory and create tables in pmu-events.c. # $(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JEVENTS) - $(Q)$(call echo-cmd,gen)$(JEVENTS) $(ARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V) + $(Q)$(call echo-cmd,gen)$(JEVENTS) $(SRCARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V) diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 8a4ce492f7b2..546250a273e7 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -71,7 +71,7 @@ $(OUTPUT)tests/llvm-src-relocation.c: tests/bpf-script-test-relocation.c tests/B $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@ $(Q)echo ';' >> $@ -ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64 powerpc)) +ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc)) perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o endif diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 5337f49db361..28bdb48357f0 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -826,7 +826,7 @@ static int write_group_desc(int fd, struct perf_header *h __maybe_unused, /* * default get_cpuid(): nothing gets recorded - * actual implementation must be in arch/$(ARCH)/util/header.c + * actual implementation must be in arch/$(SRCARCH)/util/header.c */ int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused) { From 607b86e173522b806d6721a4dba0b291178d5e4c Mon Sep 17 00:00:00 2001 From: Nir Perry Date: Thu, 11 Jan 2018 23:43:26 -0800 Subject: [PATCH 123/705] Input: ALPS - fix multi-touch decoding on SS4 plus touchpads commit 4d94e776bd29670f01befa27e12df784fa05fa2e upstream. The fix for handling two-finger scroll (i4a646580f793 - "Input: ALPS - fix two-finger scroll breakage in right side on ALPS touchpad") introduced a minor "typo" that broke decoding of multi-touch events are decoded on some ALPS touchpads. For example, tapping with three-fingers can no longer be used to emulate middle-mouse-button (the kernel doesn't recognize this as the proper event, and doesn't report it correctly to userspace). This affects touchpads that use SS4 "plus" protocol variant, like those found on Dell E7270 & E7470 laptops (tested on E7270). First, probably the code in alps_decode_ss4_v2() for case SS4_PACKET_ID_MULTI used inconsistent indices to "f->mt[]". You can see 0 & 1 are used for the "if" part but 2 & 3 are used for the "else" part. Second, in the previous patch, new macros were introduced to decode X coordinates specific to the SS4 "plus" variant, but the macro to define the maximum X value wasn't changed accordingly. The macros to decode X values for "plus" variant are effectively shifted right by 1 bit, but the max wasn't shifted too. This causes the driver to incorrectly handle "no data" cases, which also interfered with how multi-touch was handled. Fixes: 4a646580f793 ("Input: ALPS - fix two-finger scroll breakage...") Signed-off-by: Nir Perry Reviewed-by: Masaki Ota Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/alps.c | 23 +++++++++++++---------- drivers/input/mouse/alps.h | 10 ++++++---- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index f26807c75be4..af83d2e34913 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -1247,29 +1247,32 @@ static int alps_decode_ss4_v2(struct alps_fields *f, case SS4_PACKET_ID_MULTI: if (priv->flags & ALPS_BUTTONPAD) { if (IS_SS4PLUS_DEV(priv->dev_id)) { - f->mt[0].x = SS4_PLUS_BTL_MF_X_V2(p, 0); - f->mt[1].x = SS4_PLUS_BTL_MF_X_V2(p, 1); + f->mt[2].x = SS4_PLUS_BTL_MF_X_V2(p, 0); + f->mt[3].x = SS4_PLUS_BTL_MF_X_V2(p, 1); + no_data_x = SS4_PLUS_MFPACKET_NO_AX_BL; } else { f->mt[2].x = SS4_BTL_MF_X_V2(p, 0); f->mt[3].x = SS4_BTL_MF_X_V2(p, 1); + no_data_x = SS4_MFPACKET_NO_AX_BL; } + no_data_y = SS4_MFPACKET_NO_AY_BL; f->mt[2].y = SS4_BTL_MF_Y_V2(p, 0); f->mt[3].y = SS4_BTL_MF_Y_V2(p, 1); - no_data_x = SS4_MFPACKET_NO_AX_BL; - no_data_y = SS4_MFPACKET_NO_AY_BL; } else { if (IS_SS4PLUS_DEV(priv->dev_id)) { - f->mt[0].x = SS4_PLUS_STD_MF_X_V2(p, 0); - f->mt[1].x = SS4_PLUS_STD_MF_X_V2(p, 1); + f->mt[2].x = SS4_PLUS_STD_MF_X_V2(p, 0); + f->mt[3].x = SS4_PLUS_STD_MF_X_V2(p, 1); + no_data_x = SS4_PLUS_MFPACKET_NO_AX; } else { - f->mt[0].x = SS4_STD_MF_X_V2(p, 0); - f->mt[1].x = SS4_STD_MF_X_V2(p, 1); + f->mt[2].x = SS4_STD_MF_X_V2(p, 0); + f->mt[3].x = SS4_STD_MF_X_V2(p, 1); + no_data_x = SS4_MFPACKET_NO_AX; } + no_data_y = SS4_MFPACKET_NO_AY; + f->mt[2].y = SS4_STD_MF_Y_V2(p, 0); f->mt[3].y = SS4_STD_MF_Y_V2(p, 1); - no_data_x = SS4_MFPACKET_NO_AX; - no_data_y = SS4_MFPACKET_NO_AY; } f->first_mp = 0; diff --git a/drivers/input/mouse/alps.h b/drivers/input/mouse/alps.h index 793123717145..9bc2babd9256 100644 --- a/drivers/input/mouse/alps.h +++ b/drivers/input/mouse/alps.h @@ -120,10 +120,12 @@ enum SS4_PACKET_ID { #define SS4_IS_5F_DETECTED(_b) ((_b[2] & 0x10) == 0x10) -#define SS4_MFPACKET_NO_AX 8160 /* X-Coordinate value */ -#define SS4_MFPACKET_NO_AY 4080 /* Y-Coordinate value */ -#define SS4_MFPACKET_NO_AX_BL 8176 /* Buttonless X-Coordinate value */ -#define SS4_MFPACKET_NO_AY_BL 4088 /* Buttonless Y-Coordinate value */ +#define SS4_MFPACKET_NO_AX 8160 /* X-Coordinate value */ +#define SS4_MFPACKET_NO_AY 4080 /* Y-Coordinate value */ +#define SS4_MFPACKET_NO_AX_BL 8176 /* Buttonless X-Coord value */ +#define SS4_MFPACKET_NO_AY_BL 4088 /* Buttonless Y-Coord value */ +#define SS4_PLUS_MFPACKET_NO_AX 4080 /* SS4 PLUS, X */ +#define SS4_PLUS_MFPACKET_NO_AX_BL 4088 /* Buttonless SS4 PLUS, X */ /* * enum V7_PACKET_ID - defines the packet type for V7 From 9be13b3357e12398bac998725437be64f0acc6fb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 8 Jan 2018 17:20:18 -0800 Subject: [PATCH 124/705] Input: 88pm860x-ts - fix child-node lookup commit 906bf7daa0618d0ef39f4872ca42218c29a3631f upstream. Fix child node-lookup during probe, which ended up searching the whole device tree depth-first starting at parent rather than just matching on its children. To make things worse, the parent node was prematurely freed, while the child node was leaked. Fixes: 2e57d56747e6 ("mfd: 88pm860x: Device tree support") Signed-off-by: Johan Hovold Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/touchscreen/88pm860x-ts.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/input/touchscreen/88pm860x-ts.c b/drivers/input/touchscreen/88pm860x-ts.c index 251ff2aa0633..7a0dbce4dae9 100644 --- a/drivers/input/touchscreen/88pm860x-ts.c +++ b/drivers/input/touchscreen/88pm860x-ts.c @@ -126,7 +126,7 @@ static int pm860x_touch_dt_init(struct platform_device *pdev, int data, n, ret; if (!np) return -ENODEV; - np = of_find_node_by_name(np, "touch"); + np = of_get_child_by_name(np, "touch"); if (!np) { dev_err(&pdev->dev, "Can't find touch node\n"); return -EINVAL; @@ -144,13 +144,13 @@ static int pm860x_touch_dt_init(struct platform_device *pdev, if (data) { ret = pm860x_reg_write(i2c, PM8607_GPADC_MISC1, data); if (ret < 0) - return -EINVAL; + goto err_put_node; } /* set tsi prebias time */ if (!of_property_read_u32(np, "marvell,88pm860x-tsi-prebias", &data)) { ret = pm860x_reg_write(i2c, PM8607_TSI_PREBIAS, data); if (ret < 0) - return -EINVAL; + goto err_put_node; } /* set prebias & prechg time of pen detect */ data = 0; @@ -161,10 +161,18 @@ static int pm860x_touch_dt_init(struct platform_device *pdev, if (data) { ret = pm860x_reg_write(i2c, PM8607_PD_PREBIAS, data); if (ret < 0) - return -EINVAL; + goto err_put_node; } of_property_read_u32(np, "marvell,88pm860x-resistor-X", res_x); + + of_node_put(np); + return 0; + +err_put_node: + of_node_put(np); + + return -EINVAL; } #else #define pm860x_touch_dt_init(x, y, z) (-1) From eaabab6468b395e5d5fb66d7fd3363f96de3e61f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 8 Jan 2018 17:17:48 -0800 Subject: [PATCH 125/705] Input: twl6040-vibra - fix child-node lookup commit dcaf12a8b0bbdbfcfa2be8dff2c4948d9844b4ad upstream. Fix child-node lookup during probe, which ended up searching the whole device tree depth-first starting at parent rather than just matching on its children. Later sanity checks on node properties (which would likely be missing) should prevent this from causing much trouble however, especially as the original premature free of the parent node has already been fixed separately (but that "fix" was apparently never backported to stable). Fixes: e7ec014a47e4 ("Input: twl6040-vibra - update for device tree support") Fixes: c52c545ead97 ("Input: twl6040-vibra - fix DT node memory management") Signed-off-by: Johan Hovold Acked-by: Peter Ujfalusi Tested-by: H. Nikolaus Schaller (on Pyra OMAP5 hardware) Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/misc/twl6040-vibra.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/input/misc/twl6040-vibra.c b/drivers/input/misc/twl6040-vibra.c index 5690eb7ff954..15e0d352c4cc 100644 --- a/drivers/input/misc/twl6040-vibra.c +++ b/drivers/input/misc/twl6040-vibra.c @@ -248,8 +248,7 @@ static int twl6040_vibra_probe(struct platform_device *pdev) int vddvibr_uV = 0; int error; - of_node_get(twl6040_core_dev->of_node); - twl6040_core_node = of_find_node_by_name(twl6040_core_dev->of_node, + twl6040_core_node = of_get_child_by_name(twl6040_core_dev->of_node, "vibra"); if (!twl6040_core_node) { dev_err(&pdev->dev, "parent of node is missing?\n"); From cb513d1414f910f3c30781cd5491ca57db2cae2f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 8 Jan 2018 17:15:06 -0800 Subject: [PATCH 126/705] Input: twl4030-vibra - fix sibling-node lookup commit 5b189201993ab03001a398de731045bfea90c689 upstream. A helper purported to look up a child node based on its name was using the wrong of-helper and ended up prematurely freeing the parent of-node while searching the whole device tree depth-first starting at the parent node. Fixes: 64b9e4d803b1 ("input: twl4030-vibra: Support for DT booted kernel") Fixes: e661d0a04462 ("Input: twl4030-vibra - fix ERROR: Bad of_node_put() warning") Signed-off-by: Johan Hovold Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/misc/twl4030-vibra.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/input/misc/twl4030-vibra.c b/drivers/input/misc/twl4030-vibra.c index caa5a62c42fb..15929d862459 100644 --- a/drivers/input/misc/twl4030-vibra.c +++ b/drivers/input/misc/twl4030-vibra.c @@ -178,12 +178,14 @@ static SIMPLE_DEV_PM_OPS(twl4030_vibra_pm_ops, twl4030_vibra_suspend, twl4030_vibra_resume); static bool twl4030_vibra_check_coexist(struct twl4030_vibra_data *pdata, - struct device_node *node) + struct device_node *parent) { + struct device_node *node; + if (pdata && pdata->coexist) return true; - node = of_find_node_by_name(node, "codec"); + node = of_get_child_by_name(parent, "codec"); if (node) { of_node_put(node); return true; From 9a50ea0ce7cc2f3b0115942aa313fcbce7f5183a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 18 Jan 2018 15:53:10 -0500 Subject: [PATCH 127/705] tracing: Fix converting enum's from the map in trace_event_eval_update() commit 1ebe1eaf2f02784921759992ae1fde1a9bec8fd0 upstream. Since enums do not get converted by the TRACE_EVENT macro into their values, the event format displaces the enum name and not the value. This breaks tools like perf and trace-cmd that need to interpret the raw binary data. To solve this, an enum map was created to convert these enums into their actual numbers on boot up. This is done by TRACE_EVENTS() adding a TRACE_DEFINE_ENUM() macro. Some enums were not being converted. This was caused by an optization that had a bug in it. All calls get checked against this enum map to see if it should be converted or not, and it compares the call's system to the system that the enum map was created under. If they match, then they call is processed. To cut down on the number of iterations needed to find the maps with a matching system, since calls and maps are grouped by system, when a match is made, the index into the map array is saved, so that the next call, if it belongs to the same system as the previous call, could start right at that array index and not have to scan all the previous arrays. The problem was, the saved index was used as the variable to know if this is a call in a new system or not. If the index was zero, it was assumed that the call is in a new system and would keep incrementing the saved index until it found a matching system. The issue arises when the first matching system was at index zero. The next map, if it belonged to the same system, would then think it was the first match and increment the index to one. If the next call belong to the same system, it would begin its search of the maps off by one, and miss the first enum that should be converted. This left a single enum not converted properly. Also add a comment to describe exactly what that index was for. It took me a bit too long to figure out what I was thinking when debugging this issue. Link: http://lkml.kernel.org/r/717BE572-2070-4C1E-9902-9F2E0FEDA4F8@oracle.com Fixes: 0c564a538aa93 ("tracing: Add TRACE_DEFINE_ENUM() macro to map enums to their values") Reported-by: Chuck Lever Teste-by: Chuck Lever Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 03c0a48c3ac4..9549ed120556 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2200,6 +2200,7 @@ void trace_event_enum_update(struct trace_enum_map **map, int len) { struct trace_event_call *call, *p; const char *last_system = NULL; + bool first = false; int last_i; int i; @@ -2207,15 +2208,28 @@ void trace_event_enum_update(struct trace_enum_map **map, int len) list_for_each_entry_safe(call, p, &ftrace_events, list) { /* events are usually grouped together with systems */ if (!last_system || call->class->system != last_system) { + first = true; last_i = 0; last_system = call->class->system; } + /* + * Since calls are grouped by systems, the likelyhood that the + * next call in the iteration belongs to the same system as the + * previous call is high. As an optimization, we skip seaching + * for a map[] that matches the call's system if the last call + * was from the same system. That's what last_i is for. If the + * call has the same system as the previous call, then last_i + * will be the index of the first map[] that has a matching + * system. + */ for (i = last_i; i < len; i++) { if (call->class->system == map[i]->system) { /* Save the first system if need be */ - if (!last_i) + if (first) { last_i = i; + first = false; + } update_event_printk(call, map[i]); } } From 969e2145eb4a069a60e8b090aa7e8b8aa0efc365 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 12 Jan 2018 11:12:05 +0100 Subject: [PATCH 128/705] phy: work around 'phys' references to usb-nop-xceiv devices commit b7563e2796f8b23c98afcfea7363194227fa089d upstream. Stefan Wahren reports a problem with a warning fix that was merged for v4.15: we had lots of device nodes with a 'phys' property pointing to a device node that is not compliant with the binding documented in Documentation/devicetree/bindings/phy/phy-bindings.txt This generally works because USB HCD drivers that support both the generic phy subsystem and the older usb-phy subsystem ignore most errors from phy_get() and related calls and then use the usb-phy driver instead. However, it turns out that making the usb-nop-xceiv device compatible with the generic-phy binding changes the phy_get() return code from -EINVAL to -EPROBE_DEFER, and the dwc2 usb controller driver for bcm2835 now returns -EPROBE_DEFER from its probe function rather than ignoring the failure, breaking all USB support on raspberry-pi when CONFIG_GENERIC_PHY is enabled. The same code is used in the dwc3 driver and the usb_add_hcd() function, so a reasonable assumption would be that many other platforms are affected as well. I have reviewed all the related patches and concluded that "usb-nop-xceiv" is the only USB phy that is affected by the change, and since it is by far the most commonly referenced phy, all the other USB phy drivers appear to be used in ways that are are either safe in DT (they don't use the 'phys' property), or in the driver (they already ignore -EPROBE_DEFER from generic-phy when usb-phy is available). To work around the problem, this adds a special case to _of_phy_get() so we ignore any PHY node that is compatible with "usb-nop-xceiv", as we know that this can never load no matter how much we defer. In the future, we might implement a generic-phy driver for "usb-nop-xceiv" and then remove this workaround. Since we generally want older kernels to also want to work with the fixed devicetree files, it would be good to backport the patch into stable kernels as well (3.13+ are possibly affected), even though they don't contain any of the patches that may have caused regressions. Fixes: 014d6da6cb25 ARM: dts: bcm283x: Fix DTC warnings about missing phy-cells Fixes: c5bbf358b790 arm: dts: nspire: Add missing #phy-cells to usb-nop-xceiv Fixes: 44e5dced2ef6 arm: dts: marvell: Add missing #phy-cells to usb-nop-xceiv Fixes: f568f6f554b8 ARM: dts: omap: Add missing #phy-cells to usb-nop-xceiv Fixes: d745d5f277bf ARM: dts: imx51-zii-rdu1: Add missing #phy-cells to usb-nop-xceiv Fixes: 915fbe59cbf2 ARM: dts: imx: Add missing #phy-cells to usb-nop-xceiv Link: https://marc.info/?l=linux-usb&m=151518314314753&w=2 Link: https://patchwork.kernel.org/patch/10158145/ Cc: Felipe Balbi Cc: Eric Anholt Tested-by: Stefan Wahren Acked-by: Rob Herring Tested-by: Hans Verkuil Acked-by: Kishon Vijay Abraham I Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/phy/phy-core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c index a268f4d6f3e9..48a365e303e5 100644 --- a/drivers/phy/phy-core.c +++ b/drivers/phy/phy-core.c @@ -395,6 +395,10 @@ static struct phy *_of_phy_get(struct device_node *np, int index) if (ret) return ERR_PTR(-ENODEV); + /* This phy type handled by the usb-phy subsystem for now */ + if (of_device_is_compatible(args.np, "usb-nop-xceiv")) + return ERR_PTR(-ENODEV); + mutex_lock(&phy_provider_mutex); phy_provider = of_phy_provider_lookup(args.np); if (IS_ERR(phy_provider) || !try_module_get(phy_provider->owner)) { From 1f32f15ec73c76e5136ca9c42bc26c9526700c44 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 19 Jan 2018 14:32:08 +0100 Subject: [PATCH 129/705] ARM: sunxi_defconfig: Enable CMA commit c13e7f313da33d1488355440f1a10feb1897480a upstream. The DRM driver most notably, but also out of tree drivers (for now) like the VPU or GPU drivers, are quite big consumers of large, contiguous memory buffers. However, the sunxi_defconfig doesn't enable CMA in order to mitigate that, which makes them almost unusable. Enable it to make sure it somewhat works. Signed-off-by: Maxime Ripard Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/arm/configs/sunxi_defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig index 714da336ec86..5d49b60841e3 100644 --- a/arch/arm/configs/sunxi_defconfig +++ b/arch/arm/configs/sunxi_defconfig @@ -11,6 +11,7 @@ CONFIG_SMP=y CONFIG_NR_CPUS=8 CONFIG_AEABI=y CONFIG_HIGHMEM=y +CONFIG_CMA=y CONFIG_ARM_APPENDED_DTB=y CONFIG_ARM_ATAG_DTB_COMPAT=y CONFIG_CPU_FREQ=y @@ -35,6 +36,7 @@ CONFIG_CAN_SUN4I=y # CONFIG_WIRELESS is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y +CONFIG_DMA_CMA=y CONFIG_BLK_DEV_SD=y CONFIG_ATA=y CONFIG_AHCI_SUNXI=y From 19f47eafe10c2ced548c381970e2cf7b3db77ba7 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Thu, 4 Jan 2018 17:53:12 +0100 Subject: [PATCH 130/705] ARM: dts: kirkwood: fix pin-muxing of MPP7 on OpenBlocks A7 commit 56aeb07c914a616ab84357d34f8414a69b140cdf upstream. MPP7 is currently muxed as "gpio", but this function doesn't exist for MPP7, only "gpo" is available. This causes the following error: kirkwood-pinctrl f1010000.pin-controller: unsupported function gpio on pin mpp7 pinctrl core: failed to register map default (6): invalid type given kirkwood-pinctrl f1010000.pin-controller: error claiming hogs: -22 kirkwood-pinctrl f1010000.pin-controller: could not claim hogs: -22 kirkwood-pinctrl f1010000.pin-controller: unable to register pinctrl driver kirkwood-pinctrl: probe of f1010000.pin-controller failed with error -22 So the pinctrl driver is not probed, all device drivers (including the UART driver) do a -EPROBE_DEFER, and therefore the system doesn't really boot (well, it boots, but with no UART, and no devices that require pin-muxing). Back when the Device Tree file for this board was introduced, the definition was already wrong. The pinctrl driver also always described as "gpo" this function for MPP7. However, between Linux 4.10 and 4.11, a hog pin failing to be muxed was turned from a simple warning to a hard error that caused the entire pinctrl driver probe to bail out. This is probably the result of commit 6118714275f0a ("pinctrl: core: Fix pinctrl_register_and_init() with pinctrl_enable()"). This commit fixes the Device Tree to use the proper "gpo" function for MPP7, which fixes the boot of OpenBlocks A7, which was broken since Linux 4.11. Fixes: f24b56cbcd9d ("ARM: kirkwood: add support for OpenBlocks A7 platform") Signed-off-by: Thomas Petazzoni Reviewed-by: Andrew Lunn Signed-off-by: Gregory CLEMENT Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/kirkwood-openblocks_a7.dts | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/kirkwood-openblocks_a7.dts b/arch/arm/boot/dts/kirkwood-openblocks_a7.dts index cf2f5240e176..27cc913ca0f5 100644 --- a/arch/arm/boot/dts/kirkwood-openblocks_a7.dts +++ b/arch/arm/boot/dts/kirkwood-openblocks_a7.dts @@ -53,7 +53,8 @@ }; pinctrl: pin-controller@10000 { - pinctrl-0 = <&pmx_dip_switches &pmx_gpio_header>; + pinctrl-0 = <&pmx_dip_switches &pmx_gpio_header + &pmx_gpio_header_gpo>; pinctrl-names = "default"; pmx_uart0: pmx-uart0 { @@ -85,11 +86,16 @@ * ground. */ pmx_gpio_header: pmx-gpio-header { - marvell,pins = "mpp17", "mpp7", "mpp29", "mpp28", + marvell,pins = "mpp17", "mpp29", "mpp28", "mpp35", "mpp34", "mpp40"; marvell,function = "gpio"; }; + pmx_gpio_header_gpo: pxm-gpio-header-gpo { + marvell,pins = "mpp7"; + marvell,function = "gpo"; + }; + pmx_gpio_init: pmx-init { marvell,pins = "mpp38"; marvell,function = "gpio"; From 23d68eddd857d9bbc68ebd8d22255999c155823a Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Mon, 15 Jan 2018 16:31:19 +0100 Subject: [PATCH 131/705] can: peak: fix potential bug in packet fragmentation commit d8a243af1a68395e07ac85384a2740d4134c67f4 upstream. In some rare conditions when running one PEAK USB-FD interface over a non high-speed USB controller, one useless USB fragment might be sent. This patch fixes the way a USB command is fragmented when its length is greater than 64 bytes and when the underlying USB controller is not a high-speed one. Signed-off-by: Stephane Grosjean Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/peak_usb/pcan_usb_fd.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c index 304732550f0a..7f5ec40e2b4d 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c @@ -184,7 +184,7 @@ static int pcan_usb_fd_send_cmd(struct peak_usb_device *dev, void *cmd_tail) void *cmd_head = pcan_usb_fd_cmd_buffer(dev); int err = 0; u8 *packet_ptr; - int i, n = 1, packet_len; + int packet_len; ptrdiff_t cmd_len; /* usb device unregistered? */ @@ -201,17 +201,13 @@ static int pcan_usb_fd_send_cmd(struct peak_usb_device *dev, void *cmd_tail) } packet_ptr = cmd_head; + packet_len = cmd_len; /* firmware is not able to re-assemble 512 bytes buffer in full-speed */ - if ((dev->udev->speed != USB_SPEED_HIGH) && - (cmd_len > PCAN_UFD_LOSPD_PKT_SIZE)) { - packet_len = PCAN_UFD_LOSPD_PKT_SIZE; - n += cmd_len / packet_len; - } else { - packet_len = cmd_len; - } + if (unlikely(dev->udev->speed != USB_SPEED_HIGH)) + packet_len = min(packet_len, PCAN_UFD_LOSPD_PKT_SIZE); - for (i = 0; i < n; i++) { + do { err = usb_bulk_msg(dev->udev, usb_sndbulkpipe(dev->udev, PCAN_USBPRO_EP_CMDOUT), @@ -224,7 +220,12 @@ static int pcan_usb_fd_send_cmd(struct peak_usb_device *dev, void *cmd_tail) } packet_ptr += packet_len; - } + cmd_len -= packet_len; + + if (cmd_len < PCAN_UFD_LOSPD_PKT_SIZE) + packet_len = cmd_len; + + } while (packet_len > 0); return err; } From 43c3e093c26d7707b36241d579a35eaa0f4cd91f Mon Sep 17 00:00:00 2001 From: Xi Kangjie Date: Thu, 18 Jan 2018 16:34:00 -0800 Subject: [PATCH 132/705] scripts/gdb/linux/tasks.py: fix get_thread_info commit 883d50f56d263f70fd73c0d96b09eb36c34e9305 upstream. Since kernel 4.9, the thread_info has been moved into task_struct, no longer locates at the bottom of kernel stack. See commits c65eacbe290b ("sched/core: Allow putting thread_info into task_struct") and 15f4eae70d36 ("x86: Move thread_info into task_struct"). Before fix: (gdb) set $current = $lx_current() (gdb) p $lx_thread_info($current) $1 = {flags = 1470918301} (gdb) p $current.thread_info $2 = {flags = 2147483648} After fix: (gdb) p $lx_thread_info($current) $1 = {flags = 2147483648} (gdb) p $current.thread_info $2 = {flags = 2147483648} Link: http://lkml.kernel.org/r/20180118210159.17223-1-imxikangjie@gmail.com Fixes: 15f4eae70d36 ("x86: Move thread_info into task_struct") Signed-off-by: Xi Kangjie Acked-by: Jan Kiszka Acked-by: Kieran Bingham Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- scripts/gdb/linux/tasks.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/gdb/linux/tasks.py b/scripts/gdb/linux/tasks.py index 1bf949c43b76..f6ab3ccf698f 100644 --- a/scripts/gdb/linux/tasks.py +++ b/scripts/gdb/linux/tasks.py @@ -96,6 +96,8 @@ def get_thread_info(task): thread_info_addr = task.address + ia64_task_size thread_info = thread_info_addr.cast(thread_info_ptr_type) else: + if task.type.fields()[0].type == thread_info_type.get_type(): + return task['thread_info'] thread_info = task['stack'].cast(thread_info_ptr_type) return thread_info.dereference() From 8a3f4baaa4c3fa73821c775d684bdc0120ef8e59 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 18 Jan 2018 16:34:05 -0800 Subject: [PATCH 133/705] proc: fix coredump vs read /proc/*/stat race commit 8bb2ee192e482c5d500df9f2b1b26a560bd3026f upstream. do_task_stat() accesses IP and SP of a task without bumping reference count of a stack (which became an entity with independent lifetime at some point). Steps to reproduce: #include #include #include #include #include #include #include #include int main(void) { setrlimit(RLIMIT_CORE, &(struct rlimit){}); while (1) { char buf[64]; char buf2[4096]; pid_t pid; int fd; pid = fork(); if (pid == 0) { *(volatile int *)0 = 0; } snprintf(buf, sizeof(buf), "/proc/%u/stat", pid); fd = open(buf, O_RDONLY); read(fd, buf2, sizeof(buf2)); close(fd); waitpid(pid, NULL, 0); } return 0; } BUG: unable to handle kernel paging request at 0000000000003fd8 IP: do_task_stat+0x8b4/0xaf0 PGD 800000003d73e067 P4D 800000003d73e067 PUD 3d558067 PMD 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 0 PID: 1417 Comm: a.out Not tainted 4.15.0-rc8-dirty #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1.fc27 04/01/2014 RIP: 0010:do_task_stat+0x8b4/0xaf0 Call Trace: proc_single_show+0x43/0x70 seq_read+0xe6/0x3b0 __vfs_read+0x1e/0x120 vfs_read+0x84/0x110 SyS_read+0x3d/0xa0 entry_SYSCALL_64_fastpath+0x13/0x6c RIP: 0033:0x7f4d7928cba0 RSP: 002b:00007ffddb245158 EFLAGS: 00000246 Code: 03 b7 a0 01 00 00 4c 8b 4c 24 70 4c 8b 44 24 78 4c 89 74 24 18 e9 91 f9 ff ff f6 45 4d 02 0f 84 fd f7 ff ff 48 8b 45 40 48 89 ef <48> 8b 80 d8 3f 00 00 48 89 44 24 20 e8 9b 97 eb ff 48 89 44 24 RIP: do_task_stat+0x8b4/0xaf0 RSP: ffffc90000607cc8 CR2: 0000000000003fd8 John Ogness said: for my tests I added an else case to verify that the race is hit and correctly mitigated. Link: http://lkml.kernel.org/r/20180116175054.GA11513@avx2 Signed-off-by: Alexey Dobriyan Reported-by: "Kohli, Gaurav" Tested-by: John Ogness Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/proc/array.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/proc/array.c b/fs/proc/array.c index c932ec454625..794b52a6c20d 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -423,8 +423,11 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, * safe because the task has stopped executing permanently. */ if (permitted && (task->flags & PF_DUMPCORE)) { - eip = KSTK_EIP(task); - esp = KSTK_ESP(task); + if (try_get_task_stack(task)) { + eip = KSTK_EIP(task); + esp = KSTK_ESP(task); + put_task_stack(task); + } } } From d314f3bc7f3d0853e38364c95b1d30fb4e33e989 Mon Sep 17 00:00:00 2001 From: Xinyu Lin Date: Sun, 17 Dec 2017 20:13:39 +0800 Subject: [PATCH 134/705] libata: apply MAX_SEC_1024 to all LITEON EP1 series devices commit db5ff909798ef0099004ad50a0ff5fde92426fd1 upstream. LITEON EP1 has the same timeout issues as CX1 series devices. Revert max_sectors to the value of 1024. Fixes: e0edc8c54646 ("libata: apply MAX_SEC_1024 to all CX1-JB*-HP devices") Signed-off-by: Xinyu Lin Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 33e363dcc63b..aee39524375c 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4322,6 +4322,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { * https://bugzilla.kernel.org/show_bug.cgi?id=121671 */ { "LITEON CX1-JB*-HP", NULL, ATA_HORKAGE_MAX_SEC_1024 }, + { "LITEON EP1-*", NULL, ATA_HORKAGE_MAX_SEC_1024 }, /* Devices we expect to fail diagnostics */ From ca2d736867200b931ca61383af2fd68bb5fd2ecb Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 11 Jan 2018 09:53:35 +0900 Subject: [PATCH 135/705] workqueue: avoid hard lockups in show_workqueue_state() commit 62635ea8c18f0f62df4cc58379e4f1d33afd5801 upstream. show_workqueue_state() can print out a lot of messages while being in atomic context, e.g. sysrq-t -> show_workqueue_state(). If the console device is slow it may end up triggering NMI hard lockup watchdog. Signed-off-by: Sergey Senozhatsky Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- kernel/workqueue.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 181c2ad0cb54..ebfea5f94b66 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -48,6 +48,7 @@ #include #include #include +#include #include "workqueue_internal.h" @@ -4424,6 +4425,12 @@ void show_workqueue_state(void) if (pwq->nr_active || !list_empty(&pwq->delayed_works)) show_pwq(pwq); spin_unlock_irqrestore(&pwq->pool->lock, flags); + /* + * We could be printing a lot from atomic context, e.g. + * sysrq-t -> show_workqueue_state(). Avoid triggering + * hard lockup. + */ + touch_nmi_watchdog(); } } @@ -4451,6 +4458,12 @@ void show_workqueue_state(void) pr_cont("\n"); next_pool: spin_unlock_irqrestore(&pool->lock, flags); + /* + * We could be printing a lot from atomic context, e.g. + * sysrq-t -> show_workqueue_state(). Avoid triggering + * hard lockup. + */ + touch_nmi_watchdog(); } rcu_read_unlock_sched(); From cabf6294a6dc615324f8269b5839de2020dbdaf3 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 20 Dec 2017 09:56:06 +0000 Subject: [PATCH 136/705] dm btree: fix serious bug in btree_split_beneath() commit bc68d0a43560e950850fc69b58f0f8254b28f6d6 upstream. When inserting a new key/value pair into a btree we walk down the spine of btree nodes performing the following 2 operations: i) space for a new entry ii) adjusting the first key entry if the new key is lower than any in the node. If the _root_ node is full, the function btree_split_beneath() allocates 2 new nodes, and redistibutes the root nodes entries between them. The root node is left with 2 entries corresponding to the 2 new nodes. btree_split_beneath() then adjusts the spine to point to one of the two new children. This means the first key is never adjusted if the new key was lower, ie. operation (ii) gets missed out. This can result in the new key being 'lost' for a period; until another low valued key is inserted that will uncover it. This is a serious bug, and quite hard to make trigger in normal use. A reproducing test case ("thin create devices-in-reverse-order") is available as part of the thin-provision-tools project: https://github.com/jthornber/thin-provisioning-tools/blob/master/functional-tests/device-mapper/dm-tests.scm#L593 Fix the issue by changing btree_split_beneath() so it no longer adjusts the spine. Instead it unlocks both the new nodes, and lets the main loop in btree_insert_raw() relock the appropriate one and make any neccessary adjustments. Reported-by: Monty Pavel Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/persistent-data/dm-btree.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index 7a75b5010f73..e4ececd3df00 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -678,23 +678,8 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key) pn->keys[1] = rn->keys[0]; memcpy_disk(value_ptr(pn, 1), &val, sizeof(__le64)); - /* - * rejig the spine. This is ugly, since it knows too - * much about the spine - */ - if (s->nodes[0] != new_parent) { - unlock_block(s->info, s->nodes[0]); - s->nodes[0] = new_parent; - } - if (key < le64_to_cpu(rn->keys[0])) { - unlock_block(s->info, right); - s->nodes[1] = left; - } else { - unlock_block(s->info, left); - s->nodes[1] = right; - } - s->count = 2; - + unlock_block(s->info, left); + unlock_block(s->info, right); return 0; } From 2904adc5b1c08894f954bff9e30eb5facb3b6591 Mon Sep 17 00:00:00 2001 From: Dennis Yang Date: Tue, 12 Dec 2017 18:21:40 +0800 Subject: [PATCH 137/705] dm thin metadata: THIN_MAX_CONCURRENT_LOCKS should be 6 commit 490ae017f54e55bde382d45ea24bddfb6d1a0aaf upstream. For btree removal, there is a corner case that a single thread could takes 6 locks which is more than THIN_MAX_CONCURRENT_LOCKS(5) and leads to deadlock. A btree removal might eventually call rebalance_children()->rebalance3() to rebalance entries of three neighbor child nodes when shadow_spine has already acquired two write locks. In rebalance3(), it tries to shadow and acquire the write locks of all three child nodes. However, shadowing a child node requires acquiring a read lock of the original child node and a write lock of the new block. Although the read lock will be released after block shadowing, shadowing the third child node in rebalance3() could still take the sixth lock. (2 write locks for shadow_spine + 2 write locks for the first two child nodes's shadow + 1 write lock for the last child node's shadow + 1 read lock for the last child node) Signed-off-by: Dennis Yang Acked-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-thin-metadata.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 4477bf930cf4..e976f4f39334 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -81,10 +81,14 @@ #define SECTOR_TO_BLOCK_SHIFT 3 /* + * For btree insert: * 3 for btree insert + * 2 for btree lookup used within space map + * For btree remove: + * 2 for shadow spine + + * 4 for rebalance 3 child node */ -#define THIN_MAX_CONCURRENT_LOCKS 5 +#define THIN_MAX_CONCURRENT_LOCKS 6 /* This should be plenty */ #define SPACE_MAP_ROOT_SIZE 128 From ddfaa7acd7a27355200457eb56767cf5fcafc1d0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 16 Jan 2018 10:23:47 +0000 Subject: [PATCH 138/705] arm64: KVM: Fix SMCCC handling of unimplemented SMC/HVC calls commit acfb3b883f6d6a4b5d27ad7fdded11f6a09ae6dd upstream. KVM doesn't follow the SMCCC when it comes to unimplemented calls, and inject an UNDEF instead of returning an error. Since firmware calls are now used for security mitigation, they are becoming more common, and the undef is counter productive. Instead, let's follow the SMCCC which states that -1 must be returned to the caller when getting an unknown function number. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/handle_exit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 85baadab02d3..2e6e9e99977b 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -44,7 +44,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) ret = kvm_psci_call(vcpu); if (ret < 0) { - kvm_inject_undefined(vcpu); + vcpu_set_reg(vcpu, 0, ~0UL); return 1; } @@ -53,7 +53,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - kvm_inject_undefined(vcpu); + vcpu_set_reg(vcpu, 0, ~0UL); return 1; } From 0d92cf7f29e6ea8565546c5685b34e633300d8e8 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 26 Dec 2017 23:43:54 -0600 Subject: [PATCH 139/705] x86/cpu, x86/pti: Do not enable PTI on AMD processors commit 694d99d40972f12e59a3696effee8a376b79d7c8 upstream. AMD processors are not subject to the types of attacks that the kernel page table isolation feature protects against. The AMD microarchitecture does not allow memory references, including speculative references, that access higher privileged data when running in a lesser privileged mode when that access would result in a page fault. Disable page table isolation by default on AMD processors by not setting the X86_BUG_CPU_INSECURE feature, which controls whether X86_FEATURE_PTI is set. Signed-off-by: Tom Lendacky Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Dave Hansen Cc: Andy Lutomirski Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20171227054354.20369.94587.stgit@tlendack-t1.amdoffice.net Cc: Nick Lowe Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7b9ae04ddf5d..d198ae02f2b7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -883,8 +883,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_cap(X86_FEATURE_ALWAYS); - /* Assume for now that ALL x86 CPUs are insecure */ - setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); + if (c->x86_vendor != X86_VENDOR_AMD) + setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); setup_force_cpu_bug(X86_BUG_SPECTRE_V1); setup_force_cpu_bug(X86_BUG_SPECTRE_V2); From 87ac29717de8abaa3199eda5ef3c04e2924a6fdc Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 5 Dec 2016 12:56:38 -0700 Subject: [PATCH 140/705] usbip: fix warning in vhci_hcd_probe/lockdep_init_map commit 918b8ac55b6c809b70aa05c279087109584e393e upstream. vhci_hcd calls sysfs_create_group() with dynamically allocated sysfs attributes triggering the lock-class key not persistent warning. Call sysfs_attr_init() for dynamically allocated sysfs attributes to fix it. vhci_hcd vhci_hcd: USB/IP Virtual Host Controller vhci_hcd vhci_hcd: new USB bus registered, assigned bus number 2 BUG: key ffff88006a7e8d18 not in .data! ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1 at kernel/locking/lockdep.c:3131 lockdep_init_map+0x60c/0x770 DEBUG_LOCKS_WARN_ON(1)[ 1.567044] Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.9.0-rc7+ #58 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 ffff88006bce6eb8 ffffffff81f96c8a ffffffff00000a02 1ffff1000d79cd6a ffffed000d79cd62 000000046bce6ed8 0000000041b58ab3 ffffffff8598af40 ffffffff81f969f8 0000000000000000 0000000041b58ab3 0000000000000200 Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0x292/0x398 lib/dump_stack.c:51 [] __warn+0x19f/0x1e0 kernel/panic.c:550 [] warn_slowpath_fmt+0xc5/0x110 kernel/panic.c:565 [] lockdep_init_map+0x60c/0x770 kernel/locking/lockdep.c:3131 [] __kernfs_create_file+0x114/0x2a0 fs/kernfs/file.c:954 [] sysfs_add_file_mode_ns+0x225/0x520 fs/sysfs/file.c:305 [< inline >] create_files fs/sysfs/group.c:64 [] internal_create_group+0x239/0x8f0 fs/sysfs/group.c:134 [] sysfs_create_group+0x1f/0x30 fs/sysfs/group.c:156 [] vhci_start+0x5b4/0x7a0 drivers/usb/usbip/vhci_hcd.c:978 [] usb_add_hcd+0x8da/0x1c60 drivers/usb/core/hcd.c:2867 [] vhci_hcd_probe+0x97/0x130 drivers/usb/usbip/vhci_hcd.c:1103 --- --- ---[ end trace c33c7b202cf3aac8 ]--- Signed-off-by: Shuah Khan Reported-by: Andrey Konovalov Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vhci_sysfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/usbip/vhci_sysfs.c b/drivers/usb/usbip/vhci_sysfs.c index c404017c1b5a..b96e5b189269 100644 --- a/drivers/usb/usbip/vhci_sysfs.c +++ b/drivers/usb/usbip/vhci_sysfs.c @@ -361,6 +361,7 @@ static void set_status_attr(int id) status->attr.attr.name = status->name; status->attr.attr.mode = S_IRUGO; status->attr.show = status_show; + sysfs_attr_init(&status->attr.attr); } static int init_status_attrs(void) From c5aa687060a85d849574ad014639f93b1dc98ea0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 18 Jan 2018 16:28:26 +0100 Subject: [PATCH 141/705] x86/mce: Make machine check speculation protected commit 6f41c34d69eb005e7848716bbcafc979b35037d5 upstream. The machine check idtentry uses an indirect branch directly from the low level code. This evades the speculation protection. Replace it by a direct call into C code and issue the indirect call there so the compiler can apply the proper speculation protection. Signed-off-by: Thomas Gleixner Reviewed-by:Borislav Petkov Reviewed-by: David Woodhouse Niced-by: Peter Zijlstra Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801181626290.1847@nanos Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 2 +- arch/x86/include/asm/traps.h | 1 + arch/x86/kernel/cpu/mcheck/mce.c | 5 +++++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 3a15023bdf1a..e729e1528584 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1064,7 +1064,7 @@ idtentry async_page_fault do_async_page_fault has_error_code=1 #endif #ifdef CONFIG_X86_MCE -idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip) +idtentry machine_check do_mce has_error_code=0 paranoid=1 #endif /* diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 01fd0a7f48cd..688315b7a922 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -92,6 +92,7 @@ dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long); #ifdef CONFIG_X86_32 dotraplinkage void do_iret_error(struct pt_regs *, long); #endif +dotraplinkage void do_mce(struct pt_regs *, long); static inline int get_si_code(unsigned long condition) { diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 8ca5f8ad008e..fe5cd6ea1f0e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1754,6 +1754,11 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code) void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; +dotraplinkage void do_mce(struct pt_regs *regs, long error_code) +{ + machine_check_vector(regs, error_code); +} + /* * Called for each booted CPU to set up machine checks. * Must be called with preempt off: From 09402d83395f6b377841bcf9460aeee37501486d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 19 Jan 2018 01:14:21 +0900 Subject: [PATCH 142/705] retpoline: Introduce start/end markers of indirect thunk commit 736e80a4213e9bbce40a7c050337047128b472ac upstream. Introduce start/end markers of __x86_indirect_thunk_* functions. To make it easy, consolidate .text.__x86.indirect_thunk.* sections to one .text.__x86.indirect_thunk section and put it in the end of kernel text section and adds __indirect_thunk_start/end so that other subsystem (e.g. kprobes) can identify it. Signed-off-by: Masami Hiramatsu Signed-off-by: Thomas Gleixner Acked-by: David Woodhouse Cc: Andi Kleen Cc: Peter Zijlstra Cc: Ananth N Mavinakayanahalli Cc: Arjan van de Ven Cc: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/151629206178.10241.6828804696410044771.stgit@devbox Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 3 +++ arch/x86/kernel/vmlinux.lds.S | 7 +++++++ arch/x86/lib/retpoline.S | 2 +- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 7b45d8424150..19ba5ad19c65 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -194,6 +194,9 @@ enum spectre_v2_mitigation { SPECTRE_V2_IBRS, }; +extern char __indirect_thunk_start[]; +extern char __indirect_thunk_end[]; + /* * On VMEXIT we must ensure that no RSB predictions learned in the guest * can be followed in the host, by overwriting the RSB completely. Both diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index dbf67f64d5ec..c7194e97c3d4 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -105,6 +105,13 @@ SECTIONS SOFTIRQENTRY_TEXT *(.fixup) *(.gnu.warning) + +#ifdef CONFIG_RETPOLINE + __indirect_thunk_start = .; + *(.text.__x86.indirect_thunk) + __indirect_thunk_end = .; +#endif + /* End of text section */ _etext = .; } :text = 0x9090 diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index cb45c6cb465f..d3415dc30f82 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -9,7 +9,7 @@ #include .macro THUNK reg - .section .text.__x86.indirect_thunk.\reg + .section .text.__x86.indirect_thunk ENTRY(__x86_indirect_thunk_\reg) CFI_STARTPROC From 36ad6ba501d626f5bac505e6f70ffc54292b76d9 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 19 Jan 2018 01:14:51 +0900 Subject: [PATCH 143/705] kprobes/x86: Blacklist indirect thunk functions for kprobes commit c1804a236894ecc942da7dc6c5abe209e56cba93 upstream. Mark __x86_indirect_thunk_* functions as blacklist for kprobes because those functions can be called from anywhere in the kernel including blacklist functions of kprobes. Signed-off-by: Masami Hiramatsu Signed-off-by: Thomas Gleixner Acked-by: David Woodhouse Cc: Andi Kleen Cc: Peter Zijlstra Cc: Ananth N Mavinakayanahalli Cc: Arjan van de Ven Cc: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/151629209111.10241.5444852823378068683.stgit@devbox Signed-off-by: Greg Kroah-Hartman --- arch/x86/lib/retpoline.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index d3415dc30f82..dfb2ba91b670 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -25,7 +25,8 @@ ENDPROC(__x86_indirect_thunk_\reg) * than one per register with the correct names. So we do it * the simple and nasty way... */ -#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg) +#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) +#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) #define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg) GENERATE_THUNK(_ASM_AX) From 4b71be496642d3c7908a0e40a3073ce5bcf87330 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 19 Jan 2018 01:15:20 +0900 Subject: [PATCH 144/705] kprobes/x86: Disable optimizing on the function jumps to indirect thunk commit c86a32c09f8ced67971a2310e3b0dda4d1749007 upstream. Since indirect jump instructions will be replaced by jump to __x86_indirect_thunk_*, those jmp instruction must be treated as an indirect jump. Since optprobe prohibits to optimize probes in the function which uses an indirect jump, it also needs to find out the function which jump to __x86_indirect_thunk_* and disable optimization. Add a check that the jump target address is between the __indirect_thunk_start/end when optimizing kprobe. Signed-off-by: Masami Hiramatsu Signed-off-by: Thomas Gleixner Acked-by: David Woodhouse Cc: Andi Kleen Cc: Peter Zijlstra Cc: Ananth N Mavinakayanahalli Cc: Arjan van de Ven Cc: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/151629212062.10241.6991266100233002273.stgit@devbox Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/kprobes/opt.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 4d74f7386a61..dc20da1c78f0 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "common.h" @@ -192,7 +193,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src) } /* Check whether insn is indirect jump */ -static int insn_is_indirect_jump(struct insn *insn) +static int __insn_is_indirect_jump(struct insn *insn) { return ((insn->opcode.bytes[0] == 0xff && (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ @@ -226,6 +227,26 @@ static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) return (start <= target && target <= start + len); } +static int insn_is_indirect_jump(struct insn *insn) +{ + int ret = __insn_is_indirect_jump(insn); + +#ifdef CONFIG_RETPOLINE + /* + * Jump to x86_indirect_thunk_* is treated as an indirect jump. + * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with + * older gcc may use indirect jump. So we add this check instead of + * replace indirect-jump check. + */ + if (!ret) + ret = insn_jump_into_range(insn, + (unsigned long)__indirect_thunk_start, + (unsigned long)__indirect_thunk_end - + (unsigned long)__indirect_thunk_start); +#endif + return ret; +} + /* Decode whole function to ensure any instructions don't jump into target */ static int can_optimize(unsigned long paddr) { From b9f8b5935394362e68147cb2792c5f74ff3fc4ca Mon Sep 17 00:00:00 2001 From: "zhenwei.pi" Date: Thu, 18 Jan 2018 09:04:52 +0800 Subject: [PATCH 145/705] x86/pti: Document fix wrong index commit 98f0fceec7f84d80bc053e49e596088573086421 upstream. In section <2. Runtime Cost>, fix wrong index. Signed-off-by: zhenwei.pi Signed-off-by: Thomas Gleixner Cc: dave.hansen@linux.intel.com Link: https://lkml.kernel.org/r/1516237492-27739-1-git-send-email-zhenwei.pi@youruncloud.com Signed-off-by: Greg Kroah-Hartman --- Documentation/x86/pti.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt index d11eff61fc9a..5cd58439ad2d 100644 --- a/Documentation/x86/pti.txt +++ b/Documentation/x86/pti.txt @@ -78,7 +78,7 @@ this protection comes at a cost: non-PTI SYSCALL entry code, so requires mapping fewer things into the userspace page tables. The downside is that stacks must be switched at entry time. - d. Global pages are disabled for all kernel structures not + c. Global pages are disabled for all kernel structures not mapped into both kernel and userspace page tables. This feature of the MMU allows different processes to share TLB entries mapping the kernel. Losing the feature means more From 06d7342d8498b7426ab50368436417cdf6ed1923 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 17 Jan 2018 14:53:28 -0800 Subject: [PATCH 146/705] x86/retpoline: Optimize inline assembler for vmexit_fill_RSB commit 3f7d875566d8e79c5e0b2c9a413e91b2c29e0854 upstream. The generated assembler for the C fill RSB inline asm operations has several issues: - The C code sets up the loop register, which is then immediately overwritten in __FILL_RETURN_BUFFER with the same value again. - The C code also passes in the iteration count in another register, which is not used at all. Remove these two unnecessary operations. Just rely on the single constant passed to the macro for the iterations. Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Acked-by: David Woodhouse Cc: dave.hansen@intel.com Cc: gregkh@linuxfoundation.org Cc: torvalds@linux-foundation.org Cc: arjan@linux.intel.com Link: https://lkml.kernel.org/r/20180117225328.15414-1-andi@firstfloor.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 19ba5ad19c65..4ad41087ce0e 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -206,16 +206,17 @@ extern char __indirect_thunk_end[]; static inline void vmexit_fill_RSB(void) { #ifdef CONFIG_RETPOLINE - unsigned long loops = RSB_CLEAR_LOOPS / 2; + unsigned long loops; asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE ALTERNATIVE("jmp 910f", __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), X86_FEATURE_RETPOLINE) "910:" - : "=&r" (loops), ASM_CALL_CONSTRAINT - : "r" (loops) : "memory" ); + : "=r" (loops), ASM_CALL_CONSTRAINT + : : "memory" ); #endif } + #endif /* __ASSEMBLY__ */ #endif /* __NOSPEC_BRANCH_H__ */ From 60249fe9050bad1bd814dfb1c4ed5fc9155ef5cb Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 29 Oct 2017 16:27:21 +0100 Subject: [PATCH 147/705] MIPS: AR7: ensure the port type's FCR value is used commit 0a5191efe06b5103909206e4fbcff81d30283f8e upstream. Since commit aef9a7bd9b67 ("serial/uart/8250: Add tunable RX interrupt trigger I/F of FIFO buffers"), the port's default FCR value isn't used in serial8250_do_set_termios anymore, but copied over once in serial8250_config_port and then modified as needed. Unfortunately, serial8250_config_port will never be called if the port is shared between kernel and userspace, and the port's flag doesn't have UPF_BOOT_AUTOCONF, which would trigger a serial8250_config_port as well. This causes garbled output from userspace: [ 5.220000] random: procd urandom read with 49 bits of entropy available ers [kee Fix this by forcing it to be configured on boot, resulting in the expected output: [ 5.250000] random: procd urandom read with 50 bits of entropy available Press the [f] key and hit [enter] to enter failsafe mode Press the [1], [2], [3] or [4] key and hit [enter] to select the debug level Fixes: aef9a7bd9b67 ("serial/uart/8250: Add tunable RX interrupt trigger I/F of FIFO buffers") Signed-off-by: Jonas Gorski Cc: Greg Kroah-Hartman Cc: Yoshihiro YUNOMAE Cc: Florian Fainelli Cc: Nicolas Schichan Cc: linux-mips@linux-mips.org Cc: linux-serial@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/17544/ Signed-off-by: Ralf Baechle Cc: James Hogan Signed-off-by: Greg Kroah-Hartman --- arch/mips/ar7/platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c index 3446b6fb3acb..9da4e2292fc7 100644 --- a/arch/mips/ar7/platform.c +++ b/arch/mips/ar7/platform.c @@ -576,7 +576,7 @@ static int __init ar7_register_uarts(void) uart_port.type = PORT_AR7; uart_port.uartclk = clk_get_rate(bus_clk) / 2; uart_port.iotype = UPIO_MEM32; - uart_port.flags = UPF_FIXED_TYPE; + uart_port.flags = UPF_FIXED_TYPE | UPF_BOOT_AUTOCONF; uart_port.regshift = 2; uart_port.line = 0; From 79584a4221253611a4d033087f373b046350df9f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 23 Jan 2018 19:57:10 +0100 Subject: [PATCH 148/705] Linux 4.9.78 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index aba553531d6a..8a6f158a1176 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 77 +SUBLEVEL = 78 EXTRAVERSION = NAME = Roaring Lionus From c36c940cd4aab393c09d457e2414423ac92bd339 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 9 Dec 2016 10:24:05 -0800 Subject: [PATCH 149/705] x86/asm/32: Make sync_core() handle missing CPUID on all 32-bit kernels commit 1c52d859cb2d417e7216d3e56bb7fea88444cec9 upstream. We support various non-Intel CPUs that don't have the CPUID instruction, so the M486 test was wrong. For now, fix it with a big hammer: handle missing CPUID on all 32-bit CPUs. Reported-by: One Thousand Gnomes Signed-off-by: Andy Lutomirski Cc: Juergen Gross Cc: Peter Zijlstra Cc: Brian Gerst Cc: Matthew Whitehead Cc: Borislav Petkov Cc: Henrique de Moraes Holschuh Cc: Andrew Cooper Cc: Boris Ostrovsky Cc: xen-devel Link: http://lkml.kernel.org/r/685bd083a7c036f7769510b6846315b17d6ba71f.1481307769.git.luto@kernel.org Signed-off-by: Thomas Gleixner Cc: "Zhang, Ning A" Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index e40b19ca486e..353f038ec645 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -596,7 +596,7 @@ static inline void sync_core(void) { int tmp; -#ifdef CONFIG_M486 +#ifdef CONFIG_X86_32 /* * Do a CPUID if available, otherwise do a jump. The jump * can conveniently enough be the jump around CPUID. From fb39345e73141e4ae4529168965c37024cb3acb3 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Mon, 22 Jan 2018 15:44:51 -0500 Subject: [PATCH 150/705] orangefs: use list_for_each_entry_safe in purge_waiting_ops commit 0afc0decf247f65b7aba666a76a0a68adf4bc435 upstream. set_op_state_purged can delete the op. Signed-off-by: Martin Brandenburg Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/orangefs/waitqueue.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index abcfa3fa9992..f61b00887481 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -28,10 +28,10 @@ static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s */ void purge_waiting_ops(void) { - struct orangefs_kernel_op_s *op; + struct orangefs_kernel_op_s *op, *tmp; spin_lock(&orangefs_request_list_lock); - list_for_each_entry(op, &orangefs_request_list, list) { + list_for_each_entry_safe(op, tmp, &orangefs_request_list, list) { gossip_debug(GOSSIP_WAIT_DEBUG, "pvfs2-client-core: purging op tag %llu %s\n", llu(op->tag), From 5c26ee198fcacda157918a9b211ba1f111e1ed9b Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Mon, 22 Jan 2018 15:44:52 -0500 Subject: [PATCH 151/705] orangefs: initialize op on loop restart in orangefs_devreq_read commit a0ec1ded22e6a6bc41981fae22406835b006a66e upstream. In orangefs_devreq_read, there is a loop which picks an op off the list of pending ops. If the loop fails to find an op, there is nothing to read, and it returns EAGAIN. If the op has been given up on, the loop is restarted via a goto. The bug is that the variable which the found op is written to is not reinitialized, so if there are no more eligible ops on the list, the code runs again on the already handled op. This is triggered by interrupting a process while the op is being copied to the client-core. It's a fairly small window, but it's there. Signed-off-by: Martin Brandenburg Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/orangefs/devorangefs-req.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index fe2cbeb90772..939aa066e1ca 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -161,7 +161,7 @@ static ssize_t orangefs_devreq_read(struct file *file, struct orangefs_kernel_op_s *op, *temp; __s32 proto_ver = ORANGEFS_KERNEL_PROTO_VERSION; static __s32 magic = ORANGEFS_DEVREQ_MAGIC; - struct orangefs_kernel_op_s *cur_op = NULL; + struct orangefs_kernel_op_s *cur_op; unsigned long ret; /* We do not support blocking IO. */ @@ -181,6 +181,7 @@ static ssize_t orangefs_devreq_read(struct file *file, } restart: + cur_op = NULL; /* Get next op (if any) from top of list. */ spin_lock(&orangefs_request_list_lock); list_for_each_entry_safe(op, temp, &orangefs_request_list, list) { From ce601a07bc504b4748f8e7a34896684f79514e51 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Thu, 7 Dec 2017 14:16:49 -0700 Subject: [PATCH 152/705] usbip: prevent vhci_hcd driver from leaking a socket pointer address commit 2f2d0088eb93db5c649d2a5e34a3800a8a935fc5 upstream. When a client has a USB device attached over IP, the vhci_hcd driver is locally leaking a socket pointer address via the /sys/devices/platform/vhci_hcd/status file (world-readable) and in debug output when "usbip --debug port" is run. Fix it to not leak. The socket pointer address is not used at the moment and it was made visible as a convenient way to find IP address from socket pointer address by looking up /proc/net/{tcp,tcp6}. As this opens a security hole, the fix replaces socket pointer address with sockfd. Reported-by: Secunia Research Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/usbip_common.h | 1 + drivers/usb/usbip/vhci_sysfs.c | 25 +++++++++++++++---------- tools/usb/usbip/libsrc/vhci_driver.c | 8 ++++---- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/usb/usbip/usbip_common.h b/drivers/usb/usbip/usbip_common.h index 9f490375ac92..f0b955f8504e 100644 --- a/drivers/usb/usbip/usbip_common.h +++ b/drivers/usb/usbip/usbip_common.h @@ -271,6 +271,7 @@ struct usbip_device { /* lock for status */ spinlock_t lock; + int sockfd; struct socket *tcp_socket; struct task_struct *tcp_rx; diff --git a/drivers/usb/usbip/vhci_sysfs.c b/drivers/usb/usbip/vhci_sysfs.c index b96e5b189269..c287ccc78fde 100644 --- a/drivers/usb/usbip/vhci_sysfs.c +++ b/drivers/usb/usbip/vhci_sysfs.c @@ -49,13 +49,17 @@ static ssize_t status_show_vhci(int pdev_nr, char *out) /* * output example: - * port sta spd dev socket local_busid - * 0000 004 000 00000000 c5a7bb80 1-2.3 - * 0001 004 000 00000000 d8cee980 2-3.4 + * port sta spd dev sockfd local_busid + * 0000 004 000 00000000 000003 1-2.3 + * 0001 004 000 00000000 000004 2-3.4 * - * IP address can be retrieved from a socket pointer address by looking - * up /proc/net/{tcp,tcp6}. Also, a userland program may remember a - * port number and its peer IP address. + * Output includes socket fd instead of socket pointer address to + * avoid leaking kernel memory address in: + * /sys/devices/platform/vhci_hcd.0/status and in debug output. + * The socket pointer address is not used at the moment and it was + * made visible as a convenient way to find IP address from socket + * pointer address by looking up /proc/net/{tcp,tcp6}. As this opens + * a security hole, the change is made to use sockfd instead. */ for (i = 0; i < VHCI_HC_PORTS; i++) { struct vhci_device *vdev = &vhci->vdev[i]; @@ -68,13 +72,13 @@ static ssize_t status_show_vhci(int pdev_nr, char *out) if (vdev->ud.status == VDEV_ST_USED) { out += sprintf(out, "%03u %08x ", vdev->speed, vdev->devid); - out += sprintf(out, "%16p %s", - vdev->ud.tcp_socket, + out += sprintf(out, "%06u %s", + vdev->ud.sockfd, dev_name(&vdev->udev->dev)); } else { out += sprintf(out, "000 00000000 "); - out += sprintf(out, "0000000000000000 0-0"); + out += sprintf(out, "000000 0-0"); } out += sprintf(out, "\n"); @@ -125,7 +129,7 @@ static ssize_t status_show(struct device *dev, int pdev_nr; out += sprintf(out, - "port sta spd dev socket local_busid\n"); + "port sta spd dev sockfd local_busid\n"); pdev_nr = status_name_to_id(attr->attr.name); if (pdev_nr < 0) @@ -324,6 +328,7 @@ static ssize_t store_attach(struct device *dev, struct device_attribute *attr, vdev->devid = devid; vdev->speed = speed; + vdev->ud.sockfd = sockfd; vdev->ud.tcp_socket = socket; vdev->ud.status = VDEV_ST_NOTASSIGNED; diff --git a/tools/usb/usbip/libsrc/vhci_driver.c b/tools/usb/usbip/libsrc/vhci_driver.c index ad9204773533..1274f326242c 100644 --- a/tools/usb/usbip/libsrc/vhci_driver.c +++ b/tools/usb/usbip/libsrc/vhci_driver.c @@ -55,12 +55,12 @@ static int parse_status(const char *value) while (*c != '\0') { int port, status, speed, devid; - unsigned long socket; + int sockfd; char lbusid[SYSFS_BUS_ID_SIZE]; - ret = sscanf(c, "%d %d %d %x %lx %31s\n", + ret = sscanf(c, "%d %d %d %x %u %31s\n", &port, &status, &speed, - &devid, &socket, lbusid); + &devid, &sockfd, lbusid); if (ret < 5) { dbg("sscanf failed: %d", ret); @@ -69,7 +69,7 @@ static int parse_status(const char *value) dbg("port %d status %d speed %d devid %x", port, status, speed, devid); - dbg("socket %lx lbusid %s", socket, lbusid); + dbg("sockfd %u lbusid %s", sockfd, lbusid); /* if a device is connected, look at it */ From 853c39f239eb89cfd91ee03257c624aaac0575c6 Mon Sep 17 00:00:00 2001 From: Jonathan Dieter Date: Mon, 27 Feb 2017 10:31:04 +0200 Subject: [PATCH 153/705] usbip: Fix implicit fallthrough warning commit cfd6ed4537a9e938fa76facecd4b9cd65b6d1563 upstream. GCC 7 now warns when switch statements fall through implicitly, and with -Werror enabled in configure.ac, that makes these tools unbuildable. We fix this by notifying the compiler that this particular case statement is meant to fall through. Reviewed-by: Peter Senna Tschudin Signed-off-by: Jonathan Dieter Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/usb/usbip/src/usbip.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/usb/usbip/src/usbip.c b/tools/usb/usbip/src/usbip.c index d7599d943529..73d8eee8130b 100644 --- a/tools/usb/usbip/src/usbip.c +++ b/tools/usb/usbip/src/usbip.c @@ -176,6 +176,8 @@ int main(int argc, char *argv[]) break; case '?': printf("usbip: invalid option\n"); + /* Terminate after printing error */ + /* FALLTHRU */ default: usbip_usage(); goto out; From 69e78e7214e39c084fcafdfb83b4d13d21b0008b Mon Sep 17 00:00:00 2001 From: Jonathan Dieter Date: Mon, 27 Feb 2017 10:31:03 +0200 Subject: [PATCH 154/705] usbip: Fix potential format overflow in userspace tools commit e5dfa3f902b9a642ae8c6997d57d7c41e384a90b upstream. The usbip userspace tools call sprintf()/snprintf() and don't check for the return value which can lead the paths to overflow, truncating the final file in the path. More urgently, GCC 7 now warns that these aren't checked with -Wformat-overflow, and with -Werror enabled in configure.ac, that makes these tools unbuildable. This patch fixes these problems by replacing sprintf() with snprintf() in one place and adding checks for the return value of snprintf(). Reviewed-by: Peter Senna Tschudin Signed-off-by: Jonathan Dieter Acked-by: Shuah Khan Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/usb/usbip/libsrc/usbip_common.c | 9 ++++++- tools/usb/usbip/libsrc/usbip_host_common.c | 28 ++++++++++++++++++---- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/tools/usb/usbip/libsrc/usbip_common.c b/tools/usb/usbip/libsrc/usbip_common.c index ac73710473de..1517a232ab18 100644 --- a/tools/usb/usbip/libsrc/usbip_common.c +++ b/tools/usb/usbip/libsrc/usbip_common.c @@ -215,9 +215,16 @@ int read_usb_interface(struct usbip_usb_device *udev, int i, struct usbip_usb_interface *uinf) { char busid[SYSFS_BUS_ID_SIZE]; + int size; struct udev_device *sif; - sprintf(busid, "%s:%d.%d", udev->busid, udev->bConfigurationValue, i); + size = snprintf(busid, sizeof(busid), "%s:%d.%d", + udev->busid, udev->bConfigurationValue, i); + if (size < 0 || (unsigned int)size >= sizeof(busid)) { + err("busid length %i >= %lu or < 0", size, + (long unsigned)sizeof(busid)); + return -1; + } sif = udev_device_new_from_subsystem_sysname(udev_context, "usb", busid); if (!sif) { diff --git a/tools/usb/usbip/libsrc/usbip_host_common.c b/tools/usb/usbip/libsrc/usbip_host_common.c index 9d415228883d..6ff7b601f854 100644 --- a/tools/usb/usbip/libsrc/usbip_host_common.c +++ b/tools/usb/usbip/libsrc/usbip_host_common.c @@ -40,13 +40,20 @@ struct udev *udev_context; static int32_t read_attr_usbip_status(struct usbip_usb_device *udev) { char status_attr_path[SYSFS_PATH_MAX]; + int size; int fd; int length; char status; int value = 0; - snprintf(status_attr_path, SYSFS_PATH_MAX, "%s/usbip_status", - udev->path); + size = snprintf(status_attr_path, sizeof(status_attr_path), + "%s/usbip_status", udev->path); + if (size < 0 || (unsigned int)size >= sizeof(status_attr_path)) { + err("usbip_status path length %i >= %lu or < 0", size, + (long unsigned)sizeof(status_attr_path)); + return -1; + } + fd = open(status_attr_path, O_RDONLY); if (fd < 0) { @@ -218,6 +225,7 @@ int usbip_export_device(struct usbip_exported_device *edev, int sockfd) { char attr_name[] = "usbip_sockfd"; char sockfd_attr_path[SYSFS_PATH_MAX]; + int size; char sockfd_buff[30]; int ret; @@ -237,10 +245,20 @@ int usbip_export_device(struct usbip_exported_device *edev, int sockfd) } /* only the first interface is true */ - snprintf(sockfd_attr_path, sizeof(sockfd_attr_path), "%s/%s", - edev->udev.path, attr_name); + size = snprintf(sockfd_attr_path, sizeof(sockfd_attr_path), "%s/%s", + edev->udev.path, attr_name); + if (size < 0 || (unsigned int)size >= sizeof(sockfd_attr_path)) { + err("exported device path length %i >= %lu or < 0", size, + (long unsigned)sizeof(sockfd_attr_path)); + return -1; + } - snprintf(sockfd_buff, sizeof(sockfd_buff), "%d\n", sockfd); + size = snprintf(sockfd_buff, sizeof(sockfd_buff), "%d\n", sockfd); + if (size < 0 || (unsigned int)size >= sizeof(sockfd_buff)) { + err("socket length %i >= %lu or < 0", size, + (long unsigned)sizeof(sockfd_buff)); + return -1; + } ret = write_sysfs_attribute(sockfd_attr_path, sockfd_buff, strlen(sockfd_buff)); From 40bf2c0c1c9ec9c3a17afac43fcd18b39759defd Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 16 Jan 2018 19:30:14 +0100 Subject: [PATCH 155/705] can: af_can: can_rcv(): replace WARN_ONCE by pr_warn_once commit 8cb68751c115d176ec851ca56ecfbb411568c9e8 upstream. If an invalid CAN frame is received, from a driver or from a tun interface, a Kernel warning is generated. This patch replaces the WARN_ONCE by a simple pr_warn_once, so that a kernel, bootet with panic_on_warn, does not panic. A printk seems to be more appropriate here. Reported-by: syzbot+4386709c0c1284dca827@syzkaller.appspotmail.com Suggested-by: Dmitry Vyukov Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde Signed-off-by: Oliver Hartkopp Signed-off-by: Greg Kroah-Hartman --- net/can/af_can.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/can/af_can.c b/net/can/af_can.c index 5488e4a6ccd0..6ff7df79e006 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -722,13 +722,12 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, if (unlikely(!net_eq(dev_net(dev), &init_net))) goto drop; - if (WARN_ONCE(dev->type != ARPHRD_CAN || - skb->len != CAN_MTU || - cfd->len > CAN_MAX_DLEN, - "PF_CAN: dropped non conform CAN skbuf: " - "dev type %d, len %d, datalen %d\n", - dev->type, skb->len, cfd->len)) + if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU || + cfd->len > CAN_MAX_DLEN)) { + pr_warn_once("PF_CAN: dropped non conform CAN skbuf: dev type %d, len %d, datalen %d\n", + dev->type, skb->len, cfd->len); goto drop; + } can_receive(skb, dev); return NET_RX_SUCCESS; From 41e4aa17bc02430db764b748e990bdc392347f0d Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 16 Jan 2018 19:30:14 +0100 Subject: [PATCH 156/705] can: af_can: canfd_rcv(): replace WARN_ONCE by pr_warn_once commit d4689846881d160a4d12a514e991a740bcb5d65a upstream. If an invalid CANFD frame is received, from a driver or from a tun interface, a Kernel warning is generated. This patch replaces the WARN_ONCE by a simple pr_warn_once, so that a kernel, bootet with panic_on_warn, does not panic. A printk seems to be more appropriate here. Reported-by: syzbot+e3b775f40babeff6e68b@syzkaller.appspotmail.com Suggested-by: Dmitry Vyukov Acked-by: Oliver Hartkopp Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Oliver Hartkopp Signed-off-by: Greg Kroah-Hartman --- net/can/af_can.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/can/af_can.c b/net/can/af_can.c index 6ff7df79e006..ac1552d8b4ad 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -745,13 +745,12 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, if (unlikely(!net_eq(dev_net(dev), &init_net))) goto drop; - if (WARN_ONCE(dev->type != ARPHRD_CAN || - skb->len != CANFD_MTU || - cfd->len > CANFD_MAX_DLEN, - "PF_CAN: dropped non conform CAN FD skbuf: " - "dev type %d, len %d, datalen %d\n", - dev->type, skb->len, cfd->len)) + if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU || + cfd->len > CANFD_MAX_DLEN)) { + pr_warn_once("PF_CAN: dropped non conform CAN FD skbuf: dev type %d, len %d, datalen %d\n", + dev->type, skb->len, cfd->len); goto drop; + } can_receive(skb, dev); return NET_RX_SUCCESS; From 45ee9d5e97a4c6814a166c4ddf5c7c3764084270 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Thu, 4 Jan 2018 18:24:33 +0000 Subject: [PATCH 157/705] KVM: arm/arm64: Check pagesize when allocating a hugepage at Stage 2 commit c507babf10ead4d5c8cca704539b170752a8ac84 upstream. KVM only supports PMD hugepages at stage 2 but doesn't actually check that the provided hugepage memory pagesize is PMD_SIZE before populating stage 2 entries. In cases where the backing hugepage size is smaller than PMD_SIZE (such as when using contiguous hugepages), KVM can end up creating stage 2 mappings that extend beyond the supplied memory. Fix this by checking for the pagesize of userspace vma before creating PMD hugepage at stage 2. Fixes: 66b3923a1a0f77a ("arm64: hugetlb: add support for PTE contiguous bit") Signed-off-by: Punit Agrawal Cc: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 2206e0e00934..2a35c1963f6d 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1284,7 +1284,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } - if (is_vm_hugetlb_page(vma) && !logging_active) { + if (vma_kernel_pagesize(vma) && !logging_active) { hugetlb = true; gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; } else { From 318e17d09cbc40593ce42194ece3ecd1a464cc5e Mon Sep 17 00:00:00 2001 From: Janakarajan Natarajan Date: Tue, 25 Apr 2017 16:44:03 -0500 Subject: [PATCH 158/705] Prevent timer value 0 for MWAITX commit 88d879d29f9cc0de2d930b584285638cdada6625 upstream. Newer hardware has uncovered a bug in the software implementation of using MWAITX for the delay function. A value of 0 for the timer is meant to indicate that a timeout will not be used to exit MWAITX. On newer hardware this can result in MWAITX never returning, resulting in NMI soft lockup messages being printed. On older hardware, some of the other conditions under which MWAITX can exit masked this issue. The AMD APM does not currently document this and will be updated. Please refer to http://marc.info/?l=kvm&m=148950623231140 for information regarding NMI soft lockup messages on an AMD Ryzen 1800X. This has been root-caused as a 0 passed to MWAITX causing it to wait indefinitely. This change has the added benefit of avoiding the unnecessary setup of MONITORX/MWAITX when the delay value is zero. Signed-off-by: Janakarajan Natarajan Link: http://lkml.kernel.org/r/1493156643-29366-1-git-send-email-Janakarajan.Natarajan@amd.com Signed-off-by: Thomas Gleixner Signed-off-by: Davidlohr Bueso Signed-off-by: Greg Kroah-Hartman --- arch/x86/lib/delay.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 073d1f1a620b..9758524ee99f 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -93,6 +93,13 @@ static void delay_mwaitx(unsigned long __loops) { u64 start, end, delay, loops = __loops; + /* + * Timer value of 0 causes MWAITX to wait indefinitely, unless there + * is a store on the memory monitored by MONITORX. + */ + if (loops == 0) + return; + start = rdtsc_ordered(); for (;;) { From f5aaa5a2836d86e3b6559200422c153a4dfb6d66 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 28 Oct 2016 09:45:28 +0100 Subject: [PATCH 159/705] drivers: base: cacheinfo: fix x86 with CONFIG_OF enabled commit fac51482577d5e05bbb0efa8d602a3c2111098bf upstream. With CONFIG_OF enabled on x86, we get the following error on boot: " Failed to find cpu0 device node Unable to detect cache hierarchy from DT for CPU 0 " and the cacheinfo fails to get populated in the corresponding sysfs entries. This is because cache_setup_of_node looks for of_node for setting up the shared cpu_map without checking that it's already populated in the architecture specific callback. In order to indicate that the shared cpu_map is already populated, this patch introduces a boolean `cpu_map_populated` in struct cpu_cacheinfo that can be used by the generic code to skip cache_shared_cpu_map_setup. This patch also sets that boolean for x86. Cc: Greg Kroah-Hartman Signed-off-by: Sudeep Holla Signed-off-by: Mian Yousaf Kaukab Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/intel_cacheinfo.c | 2 ++ drivers/base/cacheinfo.c | 3 +++ include/linux/cacheinfo.h | 1 + 3 files changed, 6 insertions(+) diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index de6626c18e42..be6337156502 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -934,6 +934,8 @@ static int __populate_cache_leaves(unsigned int cpu) ci_leaf_init(this_leaf++, &id4_regs); __cache_cpumap_setup(cpu, idx, &id4_regs); } + this_cpu_ci->cpu_map_populated = true; + return 0; } diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index e9fd32e91668..ecde8957835a 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -106,6 +106,9 @@ static int cache_shared_cpu_map_setup(unsigned int cpu) unsigned int index; int ret; + if (this_cpu_ci->cpu_map_populated) + return 0; + ret = cache_setup_of_node(cpu); if (ret) return ret; diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 2189935075b4..a951fd10aaaa 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -71,6 +71,7 @@ struct cpu_cacheinfo { struct cacheinfo *info_list; unsigned int num_levels; unsigned int num_leaves; + bool cpu_map_populated; }; /* From 1d8c402e0c46ce630746e081c904068af455b1e5 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 28 Oct 2016 09:45:29 +0100 Subject: [PATCH 160/705] drivers: base: cacheinfo: fix boot error message when acpi is enabled commit 55877ef45fbd7f975d078426866b7d1a2435dcc3 upstream. ARM64 enables both CONFIG_OF and CONFIG_ACPI and the firmware can pass both ACPI tables and the device tree. Based on the kernel parameter, one of the two will be chosen. If acpi is enabled, then device tree is not unflattened. Currently ARM64 platforms report: " Failed to find cpu0 device node Unable to detect cache hierarchy from DT for CPU 0 " which is incorrect when booting with ACPI. Also latest ACPI v6.1 has no support for cache properties/hierarchy. This patch adds check for unflattened device tree and also returns as "not supported" if ACPI is runtime enabled. It also removes the reference to DT from the error message as the cache hierarchy can be detected from the firmware(OF/DT/ACPI) Cc: Greg Kroah-Hartman Signed-off-by: Sudeep Holla Signed-off-by: Mian Yousaf Kaukab Signed-off-by: Greg Kroah-Hartman --- drivers/base/cacheinfo.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index ecde8957835a..70e13cf06ed0 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -16,6 +16,7 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ +#include #include #include #include @@ -104,12 +105,16 @@ static int cache_shared_cpu_map_setup(unsigned int cpu) struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *this_leaf, *sib_leaf; unsigned int index; - int ret; + int ret = 0; if (this_cpu_ci->cpu_map_populated) return 0; - ret = cache_setup_of_node(cpu); + if (of_have_populated_dt()) + ret = cache_setup_of_node(cpu); + else if (!acpi_disabled) + /* No cache property/hierarchy support yet in ACPI */ + ret = -ENOTSUPP; if (ret) return ret; @@ -206,8 +211,7 @@ static int detect_cache_attributes(unsigned int cpu) */ ret = cache_shared_cpu_map_setup(cpu); if (ret) { - pr_warn("Unable to detect cache hierarchy from DT for CPU %d\n", - cpu); + pr_warn("Unable to detect cache hierarchy for CPU %d\n", cpu); goto free_ci; } return 0; From c57664bd12997742da5e12556e328e1ec0b5b654 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 10 Jul 2017 15:49:51 -0700 Subject: [PATCH 161/705] mm/mmap.c: do not blow on PROT_NONE MAP_FIXED holes in the stack commit 561b5e0709e4a248c67d024d4d94b6e31e3edf2f upstream. Commit 1be7107fbe18 ("mm: larger stack guard gap, between vmas") has introduced a regression in some rust and Java environments which are trying to implement their own stack guard page. They are punching a new MAP_FIXED mapping inside the existing stack Vma. This will confuse expand_{downwards,upwards} into thinking that the stack expansion would in fact get us too close to an existing non-stack vma which is a correct behavior wrt safety. It is a real regression on the other hand. Let's work around the problem by considering PROT_NONE mapping as a part of the stack. This is a gros hack but overflowing to such a mapping would trap anyway an we only can hope that usespace knows what it is doing and handle it propely. Fixes: 1be7107fbe18 ("mm: larger stack guard gap, between vmas") Link: http://lkml.kernel.org/r/20170705182849.GA18027@dhcp22.suse.cz Signed-off-by: Michal Hocko Debugged-by: Vlastimil Babka Cc: Ben Hutchings Cc: Willy Tarreau Cc: Oleg Nesterov Cc: Rik van Riel Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/mmap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 5b48adb4aa56..45ac5b973459 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2240,7 +2240,8 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) gap_addr = TASK_SIZE; next = vma->vm_next; - if (next && next->vm_start < gap_addr) { + if (next && next->vm_start < gap_addr && + (next->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) { if (!(next->vm_flags & VM_GROWSUP)) return -ENOMEM; /* Check that both stack segments have the same anon_vma? */ @@ -2324,7 +2325,8 @@ int expand_downwards(struct vm_area_struct *vma, if (gap_addr > address) return -ENOMEM; prev = vma->vm_prev; - if (prev && prev->vm_end > gap_addr) { + if (prev && prev->vm_end > gap_addr && + (prev->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) { if (!(prev->vm_flags & VM_GROWSDOWN)) return -ENOMEM; /* Check that both stack segments have the same anon_vma? */ From bc0e2174b092638f1146d792a8be71ab90a7e56d Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 12 May 2017 15:46:26 -0700 Subject: [PATCH 162/705] hwpoison, memcg: forcibly uncharge LRU pages commit 18365225f0440d09708ad9daade2ec11275c3df9 upstream. Laurent Dufour has noticed that hwpoinsoned pages are kept charged. In his particular case he has hit a bad_page("page still charged to cgroup") when onlining a hwpoison page. While this looks like something that shouldn't happen in the first place because onlining hwpages and returning them to the page allocator makes only little sense it shows a real problem. hwpoison pages do not get freed usually so we do not uncharge them (at least not since commit 0a31bc97c80c ("mm: memcontrol: rewrite uncharge API")). Each charge pins memcg (since e8ea14cc6ead ("mm: memcontrol: take a css reference for each charged page")) as well and so the mem_cgroup and the associated state will never go away. Fix this leak by forcibly uncharging a LRU hwpoisoned page in delete_from_lru_cache(). We also have to tweak uncharge_list because it cannot rely on zero ref count for these pages. [akpm@linux-foundation.org: coding-style fixes] Fixes: 0a31bc97c80c ("mm: memcontrol: rewrite uncharge API") Link: http://lkml.kernel.org/r/20170502185507.GB19165@dhcp22.suse.cz Signed-off-by: Michal Hocko Reported-by: Laurent Dufour Tested-by: Laurent Dufour Reviewed-by: Balbir Singh Reviewed-by: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memcontrol.c | 2 +- mm/memory-failure.c | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2a800c4a39bd..50088150fc17 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5531,7 +5531,7 @@ static void uncharge_list(struct list_head *page_list) next = page->lru.next; VM_BUG_ON_PAGE(PageLRU(page), page); - VM_BUG_ON_PAGE(page_count(page), page); + VM_BUG_ON_PAGE(!PageHWPoison(page) && page_count(page), page); if (!page->mem_cgroup) continue; diff --git a/mm/memory-failure.c b/mm/memory-failure.c index ce7d416edab7..5aa71a82ca73 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -535,6 +535,13 @@ static int delete_from_lru_cache(struct page *p) */ ClearPageActive(p); ClearPageUnevictable(p); + + /* + * Poisoned page might never drop its ref count to 0 so we have + * to uncharge it manually from its memcg. + */ + mem_cgroup_uncharge(p); + /* * drop the page count elevated by isolate_lru_page() */ From 714c19ef57a5271c14ab6d1b5878f5662a0b3034 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Mon, 10 Jul 2017 15:49:44 -0700 Subject: [PATCH 163/705] cma: fix calculation of aligned offset commit e048cb32f69038aa1c8f11e5c1b331be4181659d upstream. The align_offset parameter is used by bitmap_find_next_zero_area_off() to represent the offset of map's base from the previous alignment boundary; the function ensures that the returned index, plus the align_offset, honors the specified align_mask. The logic introduced by commit b5be83e308f7 ("mm: cma: align to physical address, not CMA region position") has the cma driver calculate the offset to the *next* alignment boundary. In most cases, the base alignment is greater than that specified when making allocations, resulting in a zero offset whether we align up or down. In the example given with the commit, the base alignment (8MB) was half the requested alignment (16MB) so the math also happened to work since the offset is 8MB in both directions. However, when requesting allocations with an alignment greater than twice that of the base, the returned index would not be correctly aligned. Also, the align_order arguments of cma_bitmap_aligned_mask() and cma_bitmap_aligned_offset() should not be negative so the argument type was made unsigned. Fixes: b5be83e308f7 ("mm: cma: align to physical address, not CMA region position") Link: http://lkml.kernel.org/r/20170628170742.2895-1-opendmb@gmail.com Signed-off-by: Angus Clark Signed-off-by: Doug Berger Acked-by: Gregory Fong Cc: Doug Berger Cc: Angus Clark Cc: Laura Abbott Cc: Vlastimil Babka Cc: Greg Kroah-Hartman Cc: Lucas Stach Cc: Catalin Marinas Cc: Shiraz Hashim Cc: Jaewon Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Vlastimil Babka Signed-off-by: Greg Kroah-Hartman --- mm/cma.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/mm/cma.c b/mm/cma.c index c960459eda7e..397687fc51f9 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -54,7 +54,7 @@ unsigned long cma_get_size(const struct cma *cma) } static unsigned long cma_bitmap_aligned_mask(const struct cma *cma, - int align_order) + unsigned int align_order) { if (align_order <= cma->order_per_bit) return 0; @@ -62,17 +62,14 @@ static unsigned long cma_bitmap_aligned_mask(const struct cma *cma, } /* - * Find a PFN aligned to the specified order and return an offset represented in - * order_per_bits. + * Find the offset of the base PFN from the specified align_order. + * The value returned is represented in order_per_bits. */ static unsigned long cma_bitmap_aligned_offset(const struct cma *cma, - int align_order) + unsigned int align_order) { - if (align_order <= cma->order_per_bit) - return 0; - - return (ALIGN(cma->base_pfn, (1UL << align_order)) - - cma->base_pfn) >> cma->order_per_bit; + return (cma->base_pfn & ((1UL << align_order) - 1)) + >> cma->order_per_bit; } static unsigned long cma_bitmap_pages_to_bits(const struct cma *cma, From 685cce58f1c2e4fb5dbc891edf3e1bbca217ed3e Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 15 Nov 2017 17:38:30 -0800 Subject: [PATCH 164/705] mm, page_alloc: fix potential false positive in __zone_watermark_ok commit b050e3769c6b4013bb937e879fc43bf1847ee819 upstream. Since commit 97a16fc82a7c ("mm, page_alloc: only enforce watermarks for order-0 allocations"), __zone_watermark_ok() check for high-order allocations will shortcut per-migratetype free list checks for ALLOC_HARDER allocations, and return true as long as there's free page of any migratetype. The intention is that ALLOC_HARDER can allocate from MIGRATE_HIGHATOMIC free lists, while normal allocations can't. However, as a side effect, the watermark check will then also return true when there are pages only on the MIGRATE_ISOLATE list, or (prior to CMA conversion to ZONE_MOVABLE) on the MIGRATE_CMA list. Since the allocation cannot actually obtain isolated pages, and might not be able to obtain CMA pages, this can result in a false positive. The condition should be rare and perhaps the outcome is not a fatal one. Still, it's better if the watermark check is correct. There also shouldn't be a performance tradeoff here. Link: http://lkml.kernel.org/r/20171102125001.23708-1-vbabka@suse.cz Fixes: 97a16fc82a7c ("mm, page_alloc: only enforce watermarks for order-0 allocations") Signed-off-by: Vlastimil Babka Acked-by: Mel Gorman Cc: Joonsoo Kim Cc: Rik van Riel Cc: David Rientjes Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fbc38888252b..546713b3f762 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2821,9 +2821,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, if (!area->nr_free) continue; - if (alloc_harder) - return true; - for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) { if (!list_empty(&area->free_list[mt])) return true; @@ -2835,6 +2832,9 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, return true; } #endif + if (alloc_harder && + !list_empty(&area->free_list[MIGRATE_HIGHATOMIC])) + return true; } return false; } From 542cde0e3cc27bd4d6cbfa596d9278d3c97bc193 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 14 Dec 2016 15:06:07 -0800 Subject: [PATCH 165/705] ipc: msg, make msgrcv work with LONG_MIN commit 999898355e08ae3b92dfd0a08db706e0c6703d30 upstream. When LONG_MIN is passed to msgrcv, one would expect to recieve any message. But convert_mode does *msgtyp = -*msgtyp and -LONG_MIN is undefined. In particular, with my gcc -LONG_MIN produces -LONG_MIN again. So handle this case properly by assigning LONG_MAX to *msgtyp if LONG_MIN was specified as msgtyp to msgrcv. This code: long msg[] = { 100, 200 }; int m = msgget(IPC_PRIVATE, IPC_CREAT | 0644); msgsnd(m, &msg, sizeof(msg), 0); msgrcv(m, &msg, sizeof(msg), LONG_MIN, 0); produces currently nothing: msgget(IPC_PRIVATE, IPC_CREAT|0644) = 65538 msgsnd(65538, {100, "\310\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"}, 16, 0) = 0 msgrcv(65538, ... Except a UBSAN warning: UBSAN: Undefined behaviour in ipc/msg.c:745:13 negation of -9223372036854775808 cannot be represented in type 'long int': With the patch, I see what I expect: msgget(IPC_PRIVATE, IPC_CREAT|0644) = 0 msgsnd(0, {100, "\310\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"}, 16, 0) = 0 msgrcv(0, {100, "\310\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"}, 16, -9223372036854775808, 0) = 16 Link: http://lkml.kernel.org/r/20161024082633.10148-1-jslaby@suse.cz Signed-off-by: Jiri Slaby Cc: Davidlohr Bueso Cc: Manfred Spraul Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- ipc/msg.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ipc/msg.c b/ipc/msg.c index e12307d0c920..ff10d43b5184 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -763,7 +763,10 @@ static inline int convert_mode(long *msgtyp, int msgflg) if (*msgtyp == 0) return SEARCH_ANY; if (*msgtyp < 0) { - *msgtyp = -*msgtyp; + if (*msgtyp == LONG_MIN) /* -LONG_MIN is undefined */ + *msgtyp = LONG_MAX; + else + *msgtyp = -*msgtyp; return SEARCH_LESSEQUAL; } if (msgflg & MSG_EXCEPT) From 3a53accd9c397f836858defa475720a65b5dd662 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 30 Dec 2016 02:27:31 +0100 Subject: [PATCH 166/705] ACPI / scan: Prefer devices without _HID/_CID for _ADR matching commit c2a6bbaf0c5f90463a7011a295bbdb7e33c80b51 upstream. The way acpi_find_child_device() works currently is that, if there are two (or more) devices with the same _ADR value in the same namespace scope (which is not specifically allowed by the spec and the OS behavior in that case is not defined), the first one of them found to be present (with the help of _STA) will be returned. This covers the majority of cases, but is not sufficient if some of the devices in question have a _HID (or _CID) returning some valid ACPI/PNP device IDs (which is disallowed by the spec) and the ASL writers' expectation appears to be that the OS will match devices without a valid ACPI/PNP device ID against a given bus address first. To cover this special case as well, modify find_child_checks() to prefer devices without ACPI/PNP device IDs over devices that have them. Suggested-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki Tested-by: Hans de Goede Signed-off-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/glue.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 73c9c7fa9001..f06317d6fc38 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -99,13 +99,13 @@ static int find_child_checks(struct acpi_device *adev, bool check_children) return -ENODEV; /* - * If the device has a _HID (or _CID) returning a valid ACPI/PNP - * device ID, it is better to make it look less attractive here, so that - * the other device with the same _ADR value (that may not have a valid - * device ID) can be matched going forward. [This means a second spec - * violation in a row, so whatever we do here is best effort anyway.] + * If the device has a _HID returning a valid ACPI/PNP device ID, it is + * better to make it look less attractive here, so that the other device + * with the same _ADR value (that may not have a valid device ID) can be + * matched going forward. [This means a second spec violation in a row, + * so whatever we do here is best effort anyway.] */ - return sta_present && list_empty(&adev->pnp.ids) ? + return sta_present && !adev->pnp.type.platform_id ? FIND_CHILD_MAX_SCORE : FIND_CHILD_MIN_SCORE; } From 2915f16bdce204621695e7a0dfcd5f73b120cccb Mon Sep 17 00:00:00 2001 From: Seunghun Han Date: Wed, 26 Apr 2017 16:18:08 +0800 Subject: [PATCH 167/705] ACPICA: Namespace: fix operand cache leak commit 3b2d69114fefa474fca542e51119036dceb4aa6f upstream. ACPICA commit a23325b2e583556eae88ed3f764e457786bf4df6 I found some ACPI operand cache leaks in ACPI early abort cases. Boot log of ACPI operand cache leak is as follows: >[ 0.174332] ACPI: Added _OSI(Module Device) >[ 0.175504] ACPI: Added _OSI(Processor Device) >[ 0.176010] ACPI: Added _OSI(3.0 _SCP Extensions) >[ 0.177032] ACPI: Added _OSI(Processor Aggregator Device) >[ 0.178284] ACPI: SCI (IRQ16705) allocation failed >[ 0.179352] ACPI Exception: AE_NOT_ACQUIRED, Unable to install System Control Interrupt handler (20160930/evevent-131) >[ 0.180008] ACPI: Unable to start the ACPI Interpreter >[ 0.181125] ACPI Error: Could not remove SCI handler (20160930/evmisc-281) >[ 0.184068] kmem_cache_destroy Acpi-Operand: Slab cache still has objects >[ 0.185358] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.10.0-rc3 #2 >[ 0.186820] Hardware name: innotek gmb_h virtual_box/virtual_box, BIOS virtual_box 12/01/2006 >[ 0.188000] Call Trace: >[ 0.188000] ? dump_stack+0x5c/0x7d >[ 0.188000] ? kmem_cache_destroy+0x224/0x230 >[ 0.188000] ? acpi_sleep_proc_init+0x22/0x22 >[ 0.188000] ? acpi_os_delete_cache+0xa/0xd >[ 0.188000] ? acpi_ut_delete_caches+0x3f/0x7b >[ 0.188000] ? acpi_terminate+0x5/0xf >[ 0.188000] ? acpi_init+0x288/0x32e >[ 0.188000] ? __class_create+0x4c/0x80 >[ 0.188000] ? video_setup+0x7a/0x7a >[ 0.188000] ? do_one_initcall+0x4e/0x1b0 >[ 0.188000] ? kernel_init_freeable+0x194/0x21a >[ 0.188000] ? rest_init+0x80/0x80 >[ 0.188000] ? kernel_init+0xa/0x100 >[ 0.188000] ? ret_from_fork+0x25/0x30 When early abort is occurred due to invalid ACPI information, Linux kernel terminates ACPI by calling acpi_terminate() function. The function calls acpi_ns_terminate() function to delete namespace data and ACPI operand cache (acpi_gbl_module_code_list). But the deletion code in acpi_ns_terminate() function is wrapped in ACPI_EXEC_APP definition, therefore the code is only executed when the definition exists. If the define doesn't exist, ACPI operand cache (acpi_gbl_module_code_list) is leaked, and stack dump is shown in kernel log. This causes a security threat because the old kernel (<= 4.9) shows memory locations of kernel functions in stack dump, therefore kernel ASLR can be neutralized. To fix ACPI operand leak for enhancing security, I made a patch which removes the ACPI_EXEC_APP define in acpi_ns_terminate() function for executing the deletion code unconditionally. Link: https://github.com/acpica/acpica/commit/a23325b2 Signed-off-by: Seunghun Han Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki Acked-by: Lee, Chun-Yi Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpica/nsutils.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/acpi/acpica/nsutils.c b/drivers/acpi/acpica/nsutils.c index 691814dfed31..943702dd9517 100644 --- a/drivers/acpi/acpica/nsutils.c +++ b/drivers/acpi/acpica/nsutils.c @@ -594,25 +594,20 @@ struct acpi_namespace_node *acpi_ns_validate_handle(acpi_handle handle) void acpi_ns_terminate(void) { acpi_status status; + union acpi_operand_object *prev; + union acpi_operand_object *next; ACPI_FUNCTION_TRACE(ns_terminate); -#ifdef ACPI_EXEC_APP - { - union acpi_operand_object *prev; - union acpi_operand_object *next; + /* Delete any module-level code blocks */ - /* Delete any module-level code blocks */ - - next = acpi_gbl_module_code_list; - while (next) { - prev = next; - next = next->method.mutex; - prev->method.mutex = NULL; /* Clear the Mutex (cheated) field */ - acpi_ut_remove_reference(prev); - } + next = acpi_gbl_module_code_list; + while (next) { + prev = next; + next = next->method.mutex; + prev->method.mutex = NULL; /* Clear the Mutex (cheated) field */ + acpi_ut_remove_reference(prev); } -#endif /* * Free the entire namespace -- all nodes and all objects From 2c3184ea80322347287bc7e57f782d77f478e73c Mon Sep 17 00:00:00 2001 From: Kevin Cernekee Date: Sun, 3 Dec 2017 12:12:45 -0800 Subject: [PATCH 168/705] netfilter: nfnetlink_cthelper: Add missing permission checks commit 4b380c42f7d00a395feede754f0bc2292eebe6e5 upstream. The capability check in nfnetlink_rcv() verifies that the caller has CAP_NET_ADMIN in the namespace that "owns" the netlink socket. However, nfnl_cthelper_list is shared by all net namespaces on the system. An unprivileged user can create user and net namespaces in which he holds CAP_NET_ADMIN to bypass the netlink_net_capable() check: $ nfct helper list nfct v1.4.4: netlink error: Operation not permitted $ vpnns -- nfct helper list { .name = ftp, .queuenum = 0, .l3protonum = 2, .l4protonum = 6, .priv_data_len = 24, .status = enabled, }; Add capable() checks in nfnetlink_cthelper, as this is cleaner than trying to generalize the solution. Signed-off-by: Kevin Cernekee Signed-off-by: Pablo Neira Ayuso Acked-by: Michal Kubecek Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nfnetlink_cthelper.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 28d065394c09..3f499126727c 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -392,6 +393,9 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, struct nfnl_cthelper *nlcth; int ret = 0; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE]) return -EINVAL; @@ -595,6 +599,9 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl, struct nfnl_cthelper *nlcth; bool tuple_set = false; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = nfnl_cthelper_dump_table, @@ -661,6 +668,9 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, struct nfnl_cthelper *nlcth, *n; int j = 0, ret; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (tb[NFCTH_NAME]) helper_name = nla_data(tb[NFCTH_NAME]); From 898eeca02a55e354c42a7aa5cdfebf16c3742f44 Mon Sep 17 00:00:00 2001 From: Kevin Cernekee Date: Tue, 5 Dec 2017 15:42:41 -0800 Subject: [PATCH 169/705] netfilter: xt_osf: Add missing permission checks commit 916a27901de01446bcf57ecca4783f6cff493309 upstream. The capability check in nfnetlink_rcv() verifies that the caller has CAP_NET_ADMIN in the namespace that "owns" the netlink socket. However, xt_osf_fingers is shared by all net namespaces on the system. An unprivileged user can create user and net namespaces in which he holds CAP_NET_ADMIN to bypass the netlink_net_capable() check: vpnns -- nfnl_osf -f /tmp/pf.os vpnns -- nfnl_osf -f /tmp/pf.os -d These non-root operations successfully modify the systemwide OS fingerprint list. Add new capable() checks so that they can't. Signed-off-by: Kevin Cernekee Signed-off-by: Pablo Neira Ayuso Acked-by: Michal Kubecek Signed-off-by: Greg Kroah-Hartman --- net/netfilter/xt_osf.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index 2455b69b5810..b589a62e68a2 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -69,6 +70,9 @@ static int xt_osf_add_callback(struct net *net, struct sock *ctnl, struct xt_osf_finger *kf = NULL, *sf; int err = 0; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (!osf_attrs[OSF_ATTR_FINGER]) return -EINVAL; @@ -113,6 +117,9 @@ static int xt_osf_remove_callback(struct net *net, struct sock *ctnl, struct xt_osf_finger *sf; int err = -ENOENT; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (!osf_attrs[OSF_ATTR_FINGER]) return -EINVAL; From b7d25282b75ea7da56ca7b1a11f7a9c4970fc5e0 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 22 Jun 2017 16:47:34 -0400 Subject: [PATCH 170/705] reiserfs: fix race in prealloc discard commit 08db141b5313ac2f64b844fb5725b8d81744b417 upstream. The main loop in __discard_prealloc is protected by the reiserfs write lock which is dropped across schedules like the BKL it replaced. The problem is that it checks the value, calls a routine that schedules, and then adjusts the state. As a result, two threads that are calling reiserfs_prealloc_discard at the same time can race when one calls reiserfs_free_prealloc_block, the lock is dropped, and the other calls reiserfs_free_prealloc_block with the same block number. In the right circumstances, it can cause the prealloc count to go negative. Signed-off-by: Jeff Mahoney Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/reiserfs/bitmap.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index dc198bc64c61..73705d4bb069 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -513,9 +513,17 @@ static void __discard_prealloc(struct reiserfs_transaction_handle *th, "inode has negative prealloc blocks count."); #endif while (ei->i_prealloc_count > 0) { - reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block); - ei->i_prealloc_block++; + b_blocknr_t block_to_free; + + /* + * reiserfs_free_prealloc_block can drop the write lock, + * which could allow another caller to free the same block. + * We can protect against it by modifying the prealloc + * state before calling it. + */ + block_to_free = ei->i_prealloc_block++; ei->i_prealloc_count--; + reiserfs_free_prealloc_block(th, inode, block_to_free); dirty = 1; } if (dirty) From 0ccfbd4d6f02ff010dcc59e439f19864ad3a4efa Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 22 Jun 2017 16:35:04 -0400 Subject: [PATCH 171/705] reiserfs: don't preallocate blocks for extended attributes commit 54930dfeb46e978b447af0fb8ab4e181c1bf9d7a upstream. Most extended attributes will fit in a single block. More importantly, we drop the reference to the inode while holding the transaction open so the preallocated blocks aren't released. As a result, the inode may be evicted before it's removed from the transaction's prealloc list which can cause memory corruption. Signed-off-by: Jeff Mahoney Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/reiserfs/bitmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index 73705d4bb069..edc8ef78b63f 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -1136,7 +1136,7 @@ static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint) hint->prealloc_size = 0; if (!hint->formatted_node && hint->preallocate) { - if (S_ISREG(hint->inode->i_mode) + if (S_ISREG(hint->inode->i_mode) && !IS_PRIVATE(hint->inode) && hint->inode->i_size >= REISERFS_SB(hint->th->t_super)->s_alloc_options. preallocmin * hint->inode->i_sb->s_blocksize) From 7b50205cf8b9c7bd10572c97f7ca9fecfd9aec7b Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 13 Jun 2017 13:35:51 +0200 Subject: [PATCH 172/705] fs/fcntl: f_setown, avoid undefined behaviour commit fc3dc67471461c0efcb1ed22fb7595121d65fad9 upstream. fcntl(0, F_SETOWN, 0x80000000) triggers: UBSAN: Undefined behaviour in fs/fcntl.c:118:7 negation of -2147483648 cannot be represented in type 'int': CPU: 1 PID: 18261 Comm: syz-executor Not tainted 4.8.1-0-syzkaller #1 ... Call Trace: ... [] ? f_setown+0x1d8/0x200 [] ? SyS_fcntl+0x999/0xf30 [] ? entry_SYSCALL_64_fastpath+0x23/0xc1 Fix that by checking the arg parameter properly (against INT_MAX) before "who = -who". And return immediatelly with -EINVAL in case it is wrong. Note that according to POSIX we can return EINVAL: http://pubs.opengroup.org/onlinepubs/9699919799/functions/fcntl.html [EINVAL] The cmd argument is F_SETOWN and the value of the argument is not valid as a process or process group identifier. [v2] returns an error, v1 used to fail silently [v3] implement proper check for the bad value INT_MIN Signed-off-by: Jiri Slaby Cc: Jeff Layton Cc: "J. Bruce Fields" Cc: Alexander Viro Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Jeff Layton Signed-off-by: Greg Kroah-Hartman --- fs/fcntl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/fcntl.c b/fs/fcntl.c index 1493ceb0477d..ec03cf620fd7 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -114,6 +114,10 @@ void f_setown(struct file *filp, unsigned long arg, int force) int who = arg; type = PIDTYPE_PID; if (who < 0) { + /* avoid overflow below */ + if (who == INT_MIN) + return; + type = PIDTYPE_PGID; who = -who; } From c41bb027ed63db9e7a6b2c4c3c5e2e4df1ccb8cf Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 9 Oct 2017 13:33:19 +0200 Subject: [PATCH 173/705] scsi: libiscsi: fix shifting of DID_REQUEUE host byte commit eef9ffdf9cd39b2986367bc8395e2772bc1284ba upstream. The SCSI host byte should be shifted left by 16 in order to have scsi_decide_disposition() do the right thing (.i.e. requeue the command). Signed-off-by: Johannes Thumshirn Fixes: 661134ad3765 ("[SCSI] libiscsi, bnx2i: make bound ep check common") Cc: Lee Duncan Cc: Hannes Reinecke Cc: Bart Van Assche Cc: Chris Leech Acked-by: Lee Duncan Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/libiscsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index a530f08592cd..4abd3fce5ab6 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -1727,7 +1727,7 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc) if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) { reason = FAILURE_SESSION_IN_RECOVERY; - sc->result = DID_REQUEUE; + sc->result = DID_REQUEUE << 16; goto fault; } From e62b0c661f65e985327f4bc542688630265b0311 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 24 Jan 2018 15:28:17 +0100 Subject: [PATCH 174/705] Revert "module: Add retpoline tag to VERMAGIC" commit 5132ede0fe8092b043dae09a7cc32b8ae7272baa upstream. This reverts commit 6cfb521ac0d5b97470883ff9b7facae264b7ab12. Turns out distros do not want to make retpoline as part of their "ABI", so this patch should not have been merged. Sorry Andi, this was my fault, I suggested it when your original patch was the "correct" way of doing this instead. Reported-by: Jiri Kosina Fixes: 6cfb521ac0d5 ("module: Add retpoline tag to VERMAGIC") Acked-by: Andi Kleen Cc: Thomas Gleixner Cc: David Woodhouse Cc: rusty@rustcorp.com.au Cc: arjan.van.de.ven@intel.com Cc: jeyu@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/vermagic.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h index a3d04934aa96..6f8fbcf10dfb 100644 --- a/include/linux/vermagic.h +++ b/include/linux/vermagic.h @@ -24,16 +24,10 @@ #ifndef MODULE_ARCH_VERMAGIC #define MODULE_ARCH_VERMAGIC "" #endif -#ifdef RETPOLINE -#define MODULE_VERMAGIC_RETPOLINE "retpoline " -#else -#define MODULE_VERMAGIC_RETPOLINE "" -#endif #define VERMAGIC_STRING \ UTS_RELEASE " " \ MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT \ MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_MODVERSIONS \ - MODULE_ARCH_VERMAGIC \ - MODULE_VERMAGIC_RETPOLINE + MODULE_ARCH_VERMAGIC From 19a7db1e2ef38865a704ea4dfd178b02a8026ada Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 3 May 2017 14:51:51 -0700 Subject: [PATCH 175/705] mm: fix 100% CPU kswapd busyloop on unreclaimable nodes commit c73322d098e4b6f5f0f0fa1330bf57e218775539 upstream. Patch series "mm: kswapd spinning on unreclaimable nodes - fixes and cleanups". Jia reported a scenario in which the kswapd of a node indefinitely spins at 100% CPU usage. We have seen similar cases at Facebook. The kernel's current method of judging its ability to reclaim a node (or whether to back off and sleep) is based on the amount of scanned pages in proportion to the amount of reclaimable pages. In Jia's and our scenarios, there are no reclaimable pages in the node, however, and the condition for backing off is never met. Kswapd busyloops in an attempt to restore the watermarks while having nothing to work with. This series reworks the definition of an unreclaimable node based not on scanning but on whether kswapd is able to actually reclaim pages in MAX_RECLAIM_RETRIES (16) consecutive runs. This is the same criteria the page allocator uses for giving up on direct reclaim and invoking the OOM killer. If it cannot free any pages, kswapd will go to sleep and leave further attempts to direct reclaim invocations, which will either make progress and re-enable kswapd, or invoke the OOM killer. Patch #1 fixes the immediate problem Jia reported, the remainder are smaller fixlets, cleanups, and overall phasing out of the old method. Patch #6 is the odd one out. It's a nice cleanup to get_scan_count(), and directly related to #5, but in itself not relevant to the series. If the whole series is too ambitious for 4.11, I would consider the first three patches fixes, the rest cleanups. This patch (of 9): Jia He reports a problem with kswapd spinning at 100% CPU when requesting more hugepages than memory available in the system: $ echo 4000 >/proc/sys/vm/nr_hugepages top - 13:42:59 up 3:37, 1 user, load average: 1.09, 1.03, 1.01 Tasks: 1 total, 1 running, 0 sleeping, 0 stopped, 0 zombie %Cpu(s): 0.0 us, 12.5 sy, 0.0 ni, 85.5 id, 2.0 wa, 0.0 hi, 0.0 si, 0.0 st KiB Mem: 31371520 total, 30915136 used, 456384 free, 320 buffers KiB Swap: 6284224 total, 115712 used, 6168512 free. 48192 cached Mem PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 76 root 20 0 0 0 0 R 100.0 0.000 217:17.29 kswapd3 At that time, there are no reclaimable pages left in the node, but as kswapd fails to restore the high watermarks it refuses to go to sleep. Kswapd needs to back away from nodes that fail to balance. Up until commit 1d82de618ddd ("mm, vmscan: make kswapd reclaim in terms of nodes") kswapd had such a mechanism. It considered zones whose theoretically reclaimable pages it had reclaimed six times over as unreclaimable and backed away from them. This guard was erroneously removed as the patch changed the definition of a balanced node. However, simply restoring this code wouldn't help in the case reported here: there *are* no reclaimable pages that could be scanned until the threshold is met. Kswapd would stay awake anyway. Introduce a new and much simpler way of backing off. If kswapd runs through MAX_RECLAIM_RETRIES (16) cycles without reclaiming a single page, make it back off from the node. This is the same number of shots direct reclaim takes before declaring OOM. Kswapd will go to sleep on that node until a direct reclaimer manages to reclaim some pages, thus proving the node reclaimable again. [hannes@cmpxchg.org: check kswapd failure against the cumulative nr_reclaimed count] Link: http://lkml.kernel.org/r/20170306162410.GB2090@cmpxchg.org [shakeelb@google.com: fix condition for throttle_direct_reclaim] Link: http://lkml.kernel.org/r/20170314183228.20152-1-shakeelb@google.com Link: http://lkml.kernel.org/r/20170228214007.5621-2-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Signed-off-by: Shakeel Butt Reported-by: Jia He Tested-by: Jia He Acked-by: Michal Hocko Acked-by: Hillf Danton Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Cc: Dmitry Shmidt Signed-off-by: Greg Kroah-Hartman --- include/linux/mmzone.h | 2 ++ mm/internal.h | 6 ++++++ mm/page_alloc.c | 9 ++------ mm/vmscan.c | 47 ++++++++++++++++++++++++++++-------------- mm/vmstat.c | 2 +- 5 files changed, 43 insertions(+), 23 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 490f5a83f947..e3d7754f25f0 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -633,6 +633,8 @@ typedef struct pglist_data { int kswapd_order; enum zone_type kswapd_classzone_idx; + int kswapd_failures; /* Number of 'reclaimed == 0' runs */ + #ifdef CONFIG_COMPACTION int kcompactd_max_order; enum zone_type kcompactd_classzone_idx; diff --git a/mm/internal.h b/mm/internal.h index 34a5459e5989..3e2d01694747 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -73,6 +73,12 @@ static inline void set_page_refcounted(struct page *page) extern unsigned long highest_memmap_pfn; +/* + * Maximum number of reclaim retries without progress before the OOM + * killer is consider the only way forward. + */ +#define MAX_RECLAIM_RETRIES 16 + /* * in mm/vmscan.c: */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 546713b3f762..94018ea5f935 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3421,12 +3421,6 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask) return false; } -/* - * Maximum number of reclaim retries without any progress before OOM killer - * is consider as the only way to move forward. - */ -#define MAX_RECLAIM_RETRIES 16 - /* * Checks whether it makes sense to retry the reclaim to make a forward progress * for the given allocation request. @@ -4385,7 +4379,8 @@ void show_free_areas(unsigned int filter) K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), K(node_page_state(pgdat, NR_UNSTABLE_NFS)), node_page_state(pgdat, NR_PAGES_SCANNED), - !pgdat_reclaimable(pgdat) ? "yes" : "no"); + pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ? + "yes" : "no"); } for_each_populated_zone(zone) { diff --git a/mm/vmscan.c b/mm/vmscan.c index 30a88b945a44..f118dc23f662 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2606,6 +2606,15 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) } while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, sc->nr_scanned - nr_scanned, sc)); + /* + * Kswapd gives up on balancing particular nodes after too + * many failures to reclaim anything from them and goes to + * sleep. On reclaim progress, reset the failure counter. A + * successful direct reclaim run will revive a dormant kswapd. + */ + if (reclaimable) + pgdat->kswapd_failures = 0; + return reclaimable; } @@ -2680,10 +2689,6 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) GFP_KERNEL | __GFP_HARDWALL)) continue; - if (sc->priority != DEF_PRIORITY && - !pgdat_reclaimable(zone->zone_pgdat)) - continue; /* Let kswapd poll it */ - /* * If we already have plenty of memory free for * compaction in this zone, don't free any more. @@ -2820,7 +2825,7 @@ retry: return 0; } -static bool pfmemalloc_watermark_ok(pg_data_t *pgdat) +static bool allow_direct_reclaim(pg_data_t *pgdat) { struct zone *zone; unsigned long pfmemalloc_reserve = 0; @@ -2828,6 +2833,9 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat) int i; bool wmark_ok; + if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) + return true; + for (i = 0; i <= ZONE_NORMAL; i++) { zone = &pgdat->node_zones[i]; if (!managed_zone(zone) || @@ -2908,7 +2916,7 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, /* Throttle based on the first usable node */ pgdat = zone->zone_pgdat; - if (pfmemalloc_watermark_ok(pgdat)) + if (allow_direct_reclaim(pgdat)) goto out; break; } @@ -2930,14 +2938,14 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, */ if (!(gfp_mask & __GFP_FS)) { wait_event_interruptible_timeout(pgdat->pfmemalloc_wait, - pfmemalloc_watermark_ok(pgdat), HZ); + allow_direct_reclaim(pgdat), HZ); goto check_pending; } /* Throttle until kswapd wakes the process */ wait_event_killable(zone->zone_pgdat->pfmemalloc_wait, - pfmemalloc_watermark_ok(pgdat)); + allow_direct_reclaim(pgdat)); check_pending: if (fatal_signal_pending(current)) @@ -3116,7 +3124,7 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx) /* * The throttled processes are normally woken up in balance_pgdat() as - * soon as pfmemalloc_watermark_ok() is true. But there is a potential + * soon as allow_direct_reclaim() is true. But there is a potential * race between when kswapd checks the watermarks and a process gets * throttled. There is also a potential race if processes get * throttled, kswapd wakes, a large process exits thereby balancing the @@ -3130,6 +3138,10 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx) if (waitqueue_active(&pgdat->pfmemalloc_wait)) wake_up_all(&pgdat->pfmemalloc_wait); + /* Hopeless node, leave it to direct reclaim */ + if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) + return true; + for (i = 0; i <= classzone_idx; i++) { struct zone *zone = pgdat->node_zones + i; @@ -3216,9 +3228,9 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) count_vm_event(PAGEOUTRUN); do { + unsigned long nr_reclaimed = sc.nr_reclaimed; bool raise_priority = true; - sc.nr_reclaimed = 0; sc.reclaim_idx = classzone_idx; /* @@ -3297,7 +3309,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) * able to safely make forward progress. Wake them */ if (waitqueue_active(&pgdat->pfmemalloc_wait) && - pfmemalloc_watermark_ok(pgdat)) + allow_direct_reclaim(pgdat)) wake_up_all(&pgdat->pfmemalloc_wait); /* Check if kswapd should be suspending */ @@ -3308,10 +3320,14 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) * Raise priority if scanning rate is too low or there was no * progress in reclaiming pages */ - if (raise_priority || !sc.nr_reclaimed) + nr_reclaimed = sc.nr_reclaimed - nr_reclaimed; + if (raise_priority || !nr_reclaimed) sc.priority--; } while (sc.priority >= 1); + if (!sc.nr_reclaimed) + pgdat->kswapd_failures++; + out: /* * Return the order kswapd stopped reclaiming at as @@ -3511,6 +3527,10 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx) if (!waitqueue_active(&pgdat->kswapd_wait)) return; + /* Hopeless node, leave it to direct reclaim */ + if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) + return; + /* Only wake kswapd if all zones are unbalanced */ for (z = 0; z <= classzone_idx; z++) { zone = pgdat->node_zones + z; @@ -3781,9 +3801,6 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order) sum_zone_node_page_state(pgdat->node_id, NR_SLAB_RECLAIMABLE) <= pgdat->min_slab_pages) return NODE_RECLAIM_FULL; - if (!pgdat_reclaimable(pgdat)) - return NODE_RECLAIM_FULL; - /* * Do not scan if the allocation should not be delayed. */ diff --git a/mm/vmstat.c b/mm/vmstat.c index 6a088df04b29..3863b5d6d598 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1421,7 +1421,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, "\n node_unreclaimable: %u" "\n start_pfn: %lu" "\n node_inactive_ratio: %u", - !pgdat_reclaimable(zone->zone_pgdat), + pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES, zone->zone_start_pfn, zone->zone_pgdat->inactive_ratio); seq_putc(m, '\n'); From 42f0aba58e00aedc395460e621896532d009c64f Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Fri, 19 Jan 2018 09:43:39 -0800 Subject: [PATCH 176/705] Input: trackpoint - force 3 buttons if 0 button is reported commit f5d07b9e98022d50720e38aa936fc11c67868ece upstream. Lenovo introduced trackpoint compatible sticks with minimum PS/2 commands. They supposed to reply with 0x02, 0x03, or 0x04 in response to the "Read Extended ID" command, so we would know not to try certain extended commands. Unfortunately even some trackpoints reporting the original IBM version (0x01 firmware 0x0e) now respond with incorrect data to the "Get Extended Buttons" command: thinkpad_acpi: ThinkPad BIOS R0DET87W (1.87 ), EC unknown thinkpad_acpi: Lenovo ThinkPad E470, model 20H1004SGE psmouse serio2: trackpoint: IBM TrackPoint firmware: 0x0e, buttons: 0/0 Since there are no trackpoints without buttons, let's assume the trackpoint has 3 buttons when we get 0 response to the extended buttons query. Signed-off-by: Aaron Ma Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=196253 Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/trackpoint.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/mouse/trackpoint.c b/drivers/input/mouse/trackpoint.c index 7e2dc5e56632..0b49f29bf0da 100644 --- a/drivers/input/mouse/trackpoint.c +++ b/drivers/input/mouse/trackpoint.c @@ -383,6 +383,9 @@ int trackpoint_detect(struct psmouse *psmouse, bool set_properties) if (trackpoint_read(&psmouse->ps2dev, TP_EXT_BTN, &button_info)) { psmouse_warn(psmouse, "failed to get extended button data, assuming 3 buttons\n"); button_info = 0x33; + } else if (!button_info) { + psmouse_warn(psmouse, "got 0 in extended button data, assuming 3 buttons\n"); + button_info = 0x33; } psmouse->private = kzalloc(sizeof(struct trackpoint_data), GFP_KERNEL); From d680db722516dbf2d564d2118127270b88b1b663 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Thu, 25 Jan 2018 19:39:44 -0500 Subject: [PATCH 177/705] orangefs: fix deadlock; do not write i_size in read_iter commit 6793f1c450b1533a5e9c2493490de771d38b24f9 upstream. After do_readv_writev, the inode cache is invalidated anyway, so i_size will never be read. It will be fetched from the server which will also know about updates from other machines. Fixes deadlock on 32-bit SMP. See https://marc.info/?l=linux-fsdevel&m=151268557427760&w=2 Signed-off-by: Martin Brandenburg Cc: Al Viro Cc: Mike Marshall Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/orangefs/file.c | 7 ++----- fs/orangefs/orangefs-kernel.h | 11 ----------- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 02cc6139ec90..5b2cbe567365 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -446,7 +446,7 @@ ssize_t orangefs_inode_read(struct inode *inode, static ssize_t orangefs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; - loff_t pos = *(&iocb->ki_pos); + loff_t pos = iocb->ki_pos; ssize_t rc = 0; BUG_ON(iocb->private); @@ -485,9 +485,6 @@ static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *ite } } - if (file->f_pos > i_size_read(file->f_mapping->host)) - orangefs_i_size_write(file->f_mapping->host, file->f_pos); - rc = generic_write_checks(iocb, iter); if (rc <= 0) { @@ -501,7 +498,7 @@ static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *ite * pos to the end of the file, so we will wait till now to set * pos... */ - pos = *(&iocb->ki_pos); + pos = iocb->ki_pos; rc = do_readv_writev(ORANGEFS_IO_WRITE, file, diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 45dd8f27b2ac..f28381a7cd12 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -570,17 +570,6 @@ do { \ sys_attr.mask = ORANGEFS_ATTR_SYS_ALL_SETABLE; \ } while (0) -static inline void orangefs_i_size_write(struct inode *inode, loff_t i_size) -{ -#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) - inode_lock(inode); -#endif - i_size_write(inode, i_size); -#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) - inode_unlock(inode); -#endif -} - static inline void orangefs_set_timeout(struct dentry *dentry) { unsigned long time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000; From 1be7d46e775c2fbec57bbce7190b66fbb7c58d1b Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Sun, 20 Aug 2017 13:26:04 +0200 Subject: [PATCH 178/705] um: link vmlinux with -no-pie commit 883354afbc109c57f925ccc19840055193da0cc0 upstream. Debian's gcc defaults to pie. The global Makefile already defines the -fno-pie option. Link UML dynamic kernel image also with -no-pie to fix the build. Signed-off-by: Thomas Meyer Signed-off-by: Richard Weinberger Cc: Bernie Innocenti Signed-off-by: Greg Kroah-Hartman --- arch/um/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/um/Makefile b/arch/um/Makefile index 0ca46ededfc7..9c150ccb35d2 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -117,7 +117,7 @@ archheaders: archprepare: include/generated/user_constants.h LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static -LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib +LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib $(call cc-option, -no-pie) CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \ $(call cc-option, -fno-stack-protector,) \ From 9a0be5afbfbb1d14efdc98a6615fc52082243bd1 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 26 Jan 2018 16:23:02 +0000 Subject: [PATCH 179/705] vsyscall: Fix permissions for emulate mode with KAISER/PTI The backport of KAISER to 4.4 turned vsyscall emulate mode into native mode. Add a vsyscall_pgprot variable to hold the correct page protections, like Borislav and Hugh did for 3.2 and 3.18. Cc: Borislav Petkov Cc: Hugh Dickins Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/vsyscall/vsyscall_64.c | 7 ++++--- arch/x86/include/asm/vsyscall.h | 1 + arch/x86/mm/kaiser.c | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 6bb7e92c6d50..0174290b2857 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -46,6 +46,7 @@ static enum { EMULATE, NATIVE, NONE } vsyscall_mode = #else EMULATE; #endif +unsigned long vsyscall_pgprot = __PAGE_KERNEL_VSYSCALL; static int __init vsyscall_setup(char *str) { @@ -336,11 +337,11 @@ void __init map_vsyscall(void) extern char __vsyscall_page; unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); + if (vsyscall_mode != NATIVE) + vsyscall_pgprot = __PAGE_KERNEL_VVAR; if (vsyscall_mode != NONE) __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, - vsyscall_mode == NATIVE - ? PAGE_KERNEL_VSYSCALL - : PAGE_KERNEL_VVAR); + __pgprot(vsyscall_pgprot)); BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != (unsigned long)VSYSCALL_ADDR); diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index 4865e10dbb55..9ee85066f407 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -13,6 +13,7 @@ extern void map_vsyscall(void); */ extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); extern bool vsyscall_enabled(void); +extern unsigned long vsyscall_pgprot; #else static inline void map_vsyscall(void) {} static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c index a8ade08a9bf5..ec678aafa3f8 100644 --- a/arch/x86/mm/kaiser.c +++ b/arch/x86/mm/kaiser.c @@ -344,7 +344,7 @@ void __init kaiser_init(void) if (vsyscall_enabled()) kaiser_add_user_map_early((void *)VSYSCALL_ADDR, PAGE_SIZE, - __PAGE_KERNEL_VSYSCALL); + vsyscall_pgprot); for_each_possible_cpu(cpu) { void *percpu_vaddr = __per_cpu_user_mapped_start + From 5f6c581bcb3cd5a248acbbf8a91930d13d6474f1 Mon Sep 17 00:00:00 2001 From: Greg KH Date: Wed, 8 Mar 2017 19:03:44 +0100 Subject: [PATCH 180/705] eventpoll.h: add missing epoll event masks commit 7e040726850a106587485c21bdacc0bfc8a0cbed upstream. [resend due to me forgetting to cc: linux-api the first time around I posted these back on Feb 23] From: Greg Kroah-Hartman For some reason these values are not in the uapi header file, so any libc has to define it themselves. To prevent them from needing to do this, just have the kernel provide the correct values. Reported-by: Elliott Hughes Signed-off-by: Greg Hackmann Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/eventpoll.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h index 1c3154913a39..bc96b14dfb2c 100644 --- a/include/uapi/linux/eventpoll.h +++ b/include/uapi/linux/eventpoll.h @@ -26,6 +26,19 @@ #define EPOLL_CTL_DEL 2 #define EPOLL_CTL_MOD 3 +/* Epoll event masks */ +#define EPOLLIN 0x00000001 +#define EPOLLPRI 0x00000002 +#define EPOLLOUT 0x00000004 +#define EPOLLERR 0x00000008 +#define EPOLLHUP 0x00000010 +#define EPOLLRDNORM 0x00000040 +#define EPOLLRDBAND 0x00000080 +#define EPOLLWRNORM 0x00000100 +#define EPOLLWRBAND 0x00000200 +#define EPOLLMSG 0x00000400 +#define EPOLLRDHUP 0x00002000 + /* Set exclusive wakeup mode for the target file descriptor */ #define EPOLLEXCLUSIVE (1 << 28) From 5bb5ae9718f64c3fe09f2210a7a5628055529425 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Fri, 26 Jan 2018 15:14:16 +0300 Subject: [PATCH 181/705] dccp: don't restart ccid2_hc_tx_rto_expire() if sk in closed state [ Upstream commit dd5684ecae3bd8e44b644f50e2c12c7e57fdfef5 ] ccid2_hc_tx_rto_expire() timer callback always restarts the timer again and can run indefinitely (unless it is stopped outside), and after commit 120e9dabaf55 ("dccp: defer ccid_hc_tx_delete() at dismantle time"), which moved ccid_hc_tx_delete() (also includes sk_stop_timer()) from dccp_destroy_sock() to sk_destruct(), this started to happen quite often. The timer prevents releasing the socket, as a result, sk_destruct() won't be called. Found with LTP/dccp_ipsec tests running on the bonding device, which later couldn't be unloaded after the tests were completed: unregister_netdevice: waiting for bond0 to become free. Usage count = 148 Fixes: 2a91aa396739 ("[DCCP] CCID2: Initial CCID2 (TCP-Like) implementation") Signed-off-by: Alexey Kodanev Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/ccids/ccid2.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 5e3a7302f774..7753681195c1 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -140,6 +140,9 @@ static void ccid2_hc_tx_rto_expire(unsigned long data) ccid2_pr_debug("RTO_EXPIRE\n"); + if (sk->sk_state == DCCP_CLOSED) + goto out; + /* back-off timer */ hc->tx_rto <<= 1; if (hc->tx_rto > DCCP_RTO_MAX) From 8b0d3e81cdecb92b12c9d7924749a6f62f855790 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 22 Jan 2018 20:06:42 +0000 Subject: [PATCH 182/705] ipv6: Fix getsockopt() for sockets with default IPV6_AUTOFLOWLABEL [ Upstream commit e9191ffb65d8e159680ce0ad2224e1acbde6985c ] Commit 513674b5a2c9 ("net: reevalulate autoflowlabel setting after sysctl setting") removed the initialisation of ipv6_pinfo::autoflowlabel and added a second flag to indicate whether this field or the net namespace default should be used. The getsockopt() handling for this case was not updated, so it currently returns 0 for all sockets for which IPV6_AUTOFLOWLABEL is not explicitly enabled. Fix it to return the effective value, whether that has been set at the socket or net namespace level. Fixes: 513674b5a2c9 ("net: reevalulate autoflowlabel setting after sysctl ...") Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ipv6.h | 1 + net/ipv6/ip6_output.c | 2 +- net/ipv6/ipv6_sockglue.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 615ce0abba9c..e64210c98c2b 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -290,6 +290,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, int flags); int ip6_flowlabel_init(void); void ip6_flowlabel_cleanup(void); +bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np); static inline void fl6_sock_release(struct ip6_flowlabel *fl) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 388584b8ff31..6d000c0001fa 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -156,7 +156,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } -static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) +bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) { if (!np->autoflowlabel_set) return ip6_default_np_autolabel(net); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 6e3871c7f8f7..bcea985dd76b 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -1316,7 +1316,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_AUTOFLOWLABEL: - val = np->autoflowlabel; + val = ip6_autoflowlabel(sock_net(sk), np); break; default: From fb50d8c9169ee0e90cd7a733d65cb86876d795c7 Mon Sep 17 00:00:00 2001 From: Mike Maloney Date: Wed, 10 Jan 2018 12:45:10 -0500 Subject: [PATCH 183/705] ipv6: fix udpv6 sendmsg crash caused by too small MTU [ Upstream commit 749439bfac6e1a2932c582e2699f91d329658196 ] The logic in __ip6_append_data() assumes that the MTU is at least large enough for the headers. A device's MTU may be adjusted after being added while sendmsg() is processing data, resulting in __ip6_append_data() seeing any MTU. For an mtu smaller than the size of the fragmentation header, the math results in a negative 'maxfraglen', which causes problems when refragmenting any previous skb in the skb_write_queue, leaving it possibly malformed. Instead sendmsg returns EINVAL when the mtu is calculated to be less than IPV6_MIN_MTU. Found by syzkaller: kernel BUG at ./include/linux/skbuff.h:2064! invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 1 PID: 14216 Comm: syz-executor5 Not tainted 4.13.0-rc4+ #2 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 task: ffff8801d0b68580 task.stack: ffff8801ac6b8000 RIP: 0010:__skb_pull include/linux/skbuff.h:2064 [inline] RIP: 0010:__ip6_make_skb+0x18cf/0x1f70 net/ipv6/ip6_output.c:1617 RSP: 0018:ffff8801ac6bf570 EFLAGS: 00010216 RAX: 0000000000010000 RBX: 0000000000000028 RCX: ffffc90003cce000 RDX: 00000000000001b8 RSI: ffffffff839df06f RDI: ffff8801d9478ca0 RBP: ffff8801ac6bf780 R08: ffff8801cc3f1dbc R09: 0000000000000000 R10: ffff8801ac6bf7a0 R11: 43cb4b7b1948a9e7 R12: ffff8801cc3f1dc8 R13: ffff8801cc3f1d40 R14: 0000000000001036 R15: dffffc0000000000 FS: 00007f43d740c700(0000) GS:ffff8801dc100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f7834984000 CR3: 00000001d79b9000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ip6_finish_skb include/net/ipv6.h:911 [inline] udp_v6_push_pending_frames+0x255/0x390 net/ipv6/udp.c:1093 udpv6_sendmsg+0x280d/0x31a0 net/ipv6/udp.c:1363 inet_sendmsg+0x11f/0x5e0 net/ipv4/af_inet.c:762 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg+0xca/0x110 net/socket.c:643 SYSC_sendto+0x352/0x5a0 net/socket.c:1750 SyS_sendto+0x40/0x50 net/socket.c:1718 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x4512e9 RSP: 002b:00007f43d740bc08 EFLAGS: 00000216 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00000000007180a8 RCX: 00000000004512e9 RDX: 000000000000002e RSI: 0000000020d08000 RDI: 0000000000000005 RBP: 0000000000000086 R08: 00000000209c1000 R09: 000000000000001c R10: 0000000000040800 R11: 0000000000000216 R12: 00000000004b9c69 R13: 00000000ffffffff R14: 0000000000000005 R15: 00000000202c2000 Code: 9e 01 fe e9 c5 e8 ff ff e8 7f 9e 01 fe e9 4a ea ff ff 48 89 f7 e8 52 9e 01 fe e9 aa eb ff ff e8 a8 b6 cf fd 0f 0b e8 a1 b6 cf fd <0f> 0b 49 8d 45 78 4d 8d 45 7c 48 89 85 78 fe ff ff 49 8d 85 ba RIP: __skb_pull include/linux/skbuff.h:2064 [inline] RSP: ffff8801ac6bf570 RIP: __ip6_make_skb+0x18cf/0x1f70 net/ipv6/ip6_output.c:1617 RSP: ffff8801ac6bf570 Reported-by: syzbot Signed-off-by: Mike Maloney Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_output.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 6d000c0001fa..af98bbe7af0f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1260,14 +1260,16 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, v6_cork->tclass = ipc6->tclass; if (rt->dst.flags & DST_XFRM_TUNNEL) mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? - rt->dst.dev->mtu : dst_mtu(&rt->dst); + READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); else mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? - rt->dst.dev->mtu : dst_mtu(rt->dst.path); + READ_ONCE(rt->dst.dev->mtu) : dst_mtu(rt->dst.path); if (np->frag_size < mtu) { if (np->frag_size) mtu = np->frag_size; } + if (mtu < IPV6_MIN_MTU) + return -EINVAL; cork->base.fragsize = mtu; if (dst_allfrag(rt->dst.path)) cork->base.flags |= IPCORK_ALLFRAG; From c2ceff11b46e6eed2a3138811864aaa645754127 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 11 Jan 2018 22:31:18 -0800 Subject: [PATCH 184/705] ipv6: ip6_make_skb() needs to clear cork.base.dst [ Upstream commit 95ef498d977bf44ac094778fd448b98af158a3e6 ] In my last patch, I missed fact that cork.base.dst was not initialized in ip6_make_skb() : If ip6_setup_cork() returns an error, we might attempt a dst_release() on some random pointer. Fixes: 862c03ee1deb ("ipv6: fix possible mem leaks in ipv6_make_skb()") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_output.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index af98bbe7af0f..2e3db3619858 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1800,6 +1800,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk, cork.base.flags = 0; cork.base.addr = 0; cork.base.opt = NULL; + cork.base.dst = NULL; v6_cork.opt = NULL; err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6); if (err) { From 283498b4ca3534777cd28d17ebffd2e8f3fb0716 Mon Sep 17 00:00:00 2001 From: Yuiko Oshino Date: Mon, 15 Jan 2018 13:24:28 -0500 Subject: [PATCH 185/705] lan78xx: Fix failure in USB Full Speed [ Upstream commit a5b1379afbfabf91e3a689e82ac619a7157336b3 ] Fix initialize the uninitialized tx_qlen to an appropriate value when USB Full Speed is used. Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Signed-off-by: Yuiko Oshino Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/lan78xx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 9c257ffedb15..c53385a0052f 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2197,6 +2197,7 @@ static int lan78xx_reset(struct lan78xx_net *dev) buf = DEFAULT_BURST_CAP_SIZE / FS_USB_PKT_SIZE; dev->rx_urb_size = DEFAULT_BURST_CAP_SIZE; dev->rx_qlen = 4; + dev->tx_qlen = 4; } ret = lan78xx_write_reg(dev, BURST_CAP, buf); From 0ae16964f2154266f411b639ef101869ac3ae0b5 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 19 Jan 2018 11:50:46 +0100 Subject: [PATCH 186/705] net: igmp: fix source address check for IGMPv3 reports [ Upstream commit ad23b750933ea7bf962678972a286c78a8fa36aa ] Commit "net: igmp: Use correct source address on IGMPv3 reports" introduced a check to validate the source address of locally generated IGMPv3 packets. Instead of checking the local interface address directly, it uses inet_ifa_match(fl4->saddr, ifa), which checks if the address is on the local subnet (or equal to the point-to-point address if used). This breaks for point-to-point interfaces, so check against ifa->ifa_local directly. Cc: Kevin Cernekee Fixes: a46182b00290 ("net: igmp: Use correct source address on IGMPv3 reports") Reported-by: Sebastian Gottschall Signed-off-by: Felix Fietkau Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/igmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 7bff0c65046f..9c7a4cea1628 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -332,7 +332,7 @@ static __be32 igmpv3_get_srcaddr(struct net_device *dev, return htonl(INADDR_ANY); for_ifa(in_dev) { - if (inet_ifa_match(fl4->saddr, ifa)) + if (fl4->saddr == ifa->ifa_local) return fl4->saddr; } endfor_ifa(in_dev); From a44d91150f3365968f6d83abfb76c4dd92bda168 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Jan 2018 19:59:19 -0800 Subject: [PATCH 187/705] net: qdisc_pkt_len_init() should be more robust [ Upstream commit 7c68d1a6b4db9012790af7ac0f0fdc0d2083422a ] Without proper validation of DODGY packets, we might very well feed qdisc_pkt_len_init() with invalid GSO packets. tcp_hdrlen() might access out-of-bound data, so let's use skb_header_pointer() and proper checks. Whole story is described in commit d0c081b49137 ("flow_dissector: properly cap thoff field") We have the goal of validating DODGY packets earlier in the stack, so we might very well revert this fix in the future. Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Cc: Jason Wang Reported-by: syzbot+9da69ebac7dddd804552@syzkaller.appspotmail.com Acked-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 09007a71c8dd..67b5d4d8acb1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3083,10 +3083,21 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) hdr_len = skb_transport_header(skb) - skb_mac_header(skb); /* + transport layer */ - if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) - hdr_len += tcp_hdrlen(skb); - else - hdr_len += sizeof(struct udphdr); + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { + const struct tcphdr *th; + struct tcphdr _tcphdr; + + th = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_tcphdr), &_tcphdr); + if (likely(th)) + hdr_len += __tcp_hdrlen(th); + } else { + struct udphdr _udphdr; + + if (skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_udphdr), &_udphdr)) + hdr_len += sizeof(struct udphdr); + } if (shinfo->gso_type & SKB_GSO_DODGY) gso_segs = DIV_ROUND_UP(skb->len - hdr_len, From cf67be7a1a21c4d15593d7bacff5a59e30749b74 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Thu, 18 Jan 2018 16:14:26 -0500 Subject: [PATCH 188/705] net: tcp: close sock if net namespace is exiting [ Upstream commit 4ee806d51176ba7b8ff1efd81f271d7252e03a1d ] When a tcp socket is closed, if it detects that its net namespace is exiting, close immediately and do not wait for FIN sequence. For normal sockets, a reference is taken to their net namespace, so it will never exit while the socket is open. However, kernel sockets do not take a reference to their net namespace, so it may begin exiting while the kernel socket is still open. In this case if the kernel socket is a tcp socket, it will stay open trying to complete its close sequence. The sock's dst(s) hold a reference to their interface, which are all transferred to the namespace's loopback interface when the real interfaces are taken down. When the namespace tries to take down its loopback interface, it hangs waiting for all references to the loopback interface to release, which results in messages like: unregister_netdevice: waiting for lo to become free. Usage count = 1 These messages continue until the socket finally times out and closes. Since the net namespace cleanup holds the net_mutex while calling its registered pernet callbacks, any new net namespace initialization is blocked until the current net namespace finishes exiting. After this change, the tcp socket notices the exiting net namespace, and closes immediately, releasing its dst(s) and their reference to the loopback interface, which lets the net namespace continue exiting. Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1711407 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=97811 Signed-off-by: Dan Streetman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/net_namespace.h | 10 ++++++++++ net/ipv4/tcp.c | 3 +++ net/ipv4/tcp_timer.c | 15 +++++++++++++++ 3 files changed, 28 insertions(+) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 0940598c002f..23102da24dd9 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -213,6 +213,11 @@ int net_eq(const struct net *net1, const struct net *net2) return net1 == net2; } +static inline int check_net(const struct net *net) +{ + return atomic_read(&net->count) != 0; +} + void net_drop_ns(void *); #else @@ -237,6 +242,11 @@ int net_eq(const struct net *net1, const struct net *net2) return 1; } +static inline int check_net(const struct net *net) +{ + return 1; +} + #define net_drop_ns NULL #endif diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 05d2bde00864..7efa6b062049 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2215,6 +2215,9 @@ adjudge_to_death: tcp_send_active_reset(sk, GFP_ATOMIC); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); + } else if (!check_net(sock_net(sk))) { + /* Not possible to send reset; just close */ + tcp_set_state(sk, TCP_CLOSE); } } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 74db43b47917..69523389f067 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -50,11 +50,19 @@ static void tcp_write_err(struct sock *sk) * to prevent DoS attacks. It is called when a retransmission timeout * or zero probe timeout occurs on orphaned socket. * + * Also close if our net namespace is exiting; in that case there is no + * hope of ever communicating again since all netns interfaces are already + * down (or about to be down), and we need to release our dst references, + * which have been moved to the netns loopback interface, so the namespace + * can finish exiting. This condition is only possible if we are a kernel + * socket, as those do not hold references to the namespace. + * * Criteria is still not confirmed experimentally and may change. * We kill the socket, if: * 1. If number of orphaned sockets exceeds an administratively configured * limit. * 2. If we have strong memory pressure. + * 3. If our net namespace is exiting. */ static int tcp_out_of_resources(struct sock *sk, bool do_reset) { @@ -83,6 +91,13 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); return 1; } + + if (!check_net(sock_net(sk))) { + /* Not possible to send reset; just close */ + tcp_done(sk); + return 1; + } + return 0; } From 1bd21b158e07e0b8c5a2ce832305a0ebfe42c480 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 22 Jan 2018 18:06:37 +0100 Subject: [PATCH 189/705] pppoe: take ->needed_headroom of lower device into account on xmit [ Upstream commit 02612bb05e51df8489db5e94d0cf8d1c81f87b0c ] In pppoe_sendmsg(), reserving dev->hard_header_len bytes of headroom was probably fine before the introduction of ->needed_headroom in commit f5184d267c1a ("net: Allow netdevices to specify needed head/tailroom"). But now, virtual devices typically advertise the size of their overhead in dev->needed_headroom, so we must also take it into account in skb_reserve(). Allocation size of skb is also updated to take dev->needed_tailroom into account and replace the arbitrary 32 bytes with the real size of a PPPoE header. This issue was discovered by syzbot, who connected a pppoe socket to a gre device which had dev->header_ops->create == ipgre_header and dev->hard_header_len == 0. Therefore, PPPoE didn't reserve any headroom, and dev_hard_header() crashed when ipgre_header() tried to prepend its header to skb->data. skbuff: skb_under_panic: text:000000001d390b3a len:31 put:24 head:00000000d8ed776f data:000000008150e823 tail:0x7 end:0xc0 dev:gre0 ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:104! invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 1 PID: 3670 Comm: syzkaller801466 Not tainted 4.15.0-rc7-next-20180115+ #97 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:skb_panic+0x162/0x1f0 net/core/skbuff.c:100 RSP: 0018:ffff8801d9bd7840 EFLAGS: 00010282 RAX: 0000000000000083 RBX: ffff8801d4f083c0 RCX: 0000000000000000 RDX: 0000000000000083 RSI: 1ffff1003b37ae92 RDI: ffffed003b37aefc RBP: ffff8801d9bd78a8 R08: 1ffff1003b37ae8a R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: ffffffff86200de0 R13: ffffffff84a981ad R14: 0000000000000018 R15: ffff8801d2d34180 FS: 00000000019c4880(0000) GS:ffff8801db300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000208bc000 CR3: 00000001d9111001 CR4: 00000000001606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: skb_under_panic net/core/skbuff.c:114 [inline] skb_push+0xce/0xf0 net/core/skbuff.c:1714 ipgre_header+0x6d/0x4e0 net/ipv4/ip_gre.c:879 dev_hard_header include/linux/netdevice.h:2723 [inline] pppoe_sendmsg+0x58e/0x8b0 drivers/net/ppp/pppoe.c:890 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg+0xca/0x110 net/socket.c:640 sock_write_iter+0x31a/0x5d0 net/socket.c:909 call_write_iter include/linux/fs.h:1775 [inline] do_iter_readv_writev+0x525/0x7f0 fs/read_write.c:653 do_iter_write+0x154/0x540 fs/read_write.c:932 vfs_writev+0x18a/0x340 fs/read_write.c:977 do_writev+0xfc/0x2a0 fs/read_write.c:1012 SYSC_writev fs/read_write.c:1085 [inline] SyS_writev+0x27/0x30 fs/read_write.c:1082 entry_SYSCALL_64_fastpath+0x29/0xa0 Admittedly PPPoE shouldn't be allowed to run on non Ethernet-like interfaces, but reserving space for ->needed_headroom is a more fundamental issue that needs to be addressed first. Same problem exists for __pppoe_xmit(), which also needs to take dev->needed_headroom into account in skb_cow_head(). Fixes: f5184d267c1a ("net: Allow netdevices to specify needed head/tailroom") Reported-by: syzbot+ed0838d0fa4c4f2b528e20286e6dc63effc7c14d@syzkaller.appspotmail.com Signed-off-by: Guillaume Nault Reviewed-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ppp/pppoe.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 4ddae8118c85..dc36c2ec1d10 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -842,6 +842,7 @@ static int pppoe_sendmsg(struct socket *sock, struct msghdr *m, struct pppoe_hdr *ph; struct net_device *dev; char *start; + int hlen; lock_sock(sk); if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) { @@ -860,16 +861,16 @@ static int pppoe_sendmsg(struct socket *sock, struct msghdr *m, if (total_len > (dev->mtu + dev->hard_header_len)) goto end; - - skb = sock_wmalloc(sk, total_len + dev->hard_header_len + 32, - 0, GFP_KERNEL); + hlen = LL_RESERVED_SPACE(dev); + skb = sock_wmalloc(sk, hlen + sizeof(*ph) + total_len + + dev->needed_tailroom, 0, GFP_KERNEL); if (!skb) { error = -ENOMEM; goto end; } /* Reserve space for headers. */ - skb_reserve(skb, dev->hard_header_len); + skb_reserve(skb, hlen); skb_reset_network_header(skb); skb->dev = dev; @@ -930,7 +931,7 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb) /* Copy the data if there is no space for the header or if it's * read-only. */ - if (skb_cow_head(skb, sizeof(*ph) + dev->hard_header_len)) + if (skb_cow_head(skb, LL_RESERVED_SPACE(dev) + sizeof(*ph))) goto abort; __skb_push(skb, sizeof(*ph)); From 0f51492d1bd5f7376d8175edd266b808d58df21f Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Fri, 26 Jan 2018 01:53:26 +0100 Subject: [PATCH 190/705] r8169: fix memory corruption on retrieval of hardware statistics. [ Upstream commit a78e93661c5fd30b9e1dee464b2f62f966883ef7 ] Hardware statistics retrieval hurts in tight invocation loops. Avoid extraneous write and enforce strict ordering of writes targeted to the tally counters dump area address registers. Signed-off-by: Francois Romieu Tested-by: Oliver Freyermuth Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 2c4350a1c629..298b74ebc1e9 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -2222,19 +2222,14 @@ static bool rtl8169_do_counters(struct net_device *dev, u32 counter_cmd) void __iomem *ioaddr = tp->mmio_addr; dma_addr_t paddr = tp->counters_phys_addr; u32 cmd; - bool ret; RTL_W32(CounterAddrHigh, (u64)paddr >> 32); + RTL_R32(CounterAddrHigh); cmd = (u64)paddr & DMA_BIT_MASK(32); RTL_W32(CounterAddrLow, cmd); RTL_W32(CounterAddrLow, cmd | counter_cmd); - ret = rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000); - - RTL_W32(CounterAddrLow, 0); - RTL_W32(CounterAddrHigh, 0); - - return ret; + return rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000); } static bool rtl8169_reset_counters(struct net_device *dev) From 8e3534ea657e8e072e480b4a36c743458c5c852d Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 15 Jan 2018 17:02:00 +0800 Subject: [PATCH 191/705] sctp: do not allow the v4 socket to bind a v4mapped v6 address [ Upstream commit c5006b8aa74599ce19104b31d322d2ea9ff887cc ] The check in sctp_sockaddr_af is not robust enough to forbid binding a v4mapped v6 addr on a v4 socket. The worse thing is that v4 socket's bind_verify would not convert this v4mapped v6 addr to a v4 addr. syzbot even reported a crash as the v4 socket bound a v6 addr. This patch is to fix it by doing the common sa.sa_family check first, then AF_INET check for v4mapped v6 addrs. Fixes: 7dab83de50c7 ("sctp: Support ipv6only AF_INET6 sockets.") Reported-by: syzbot+7b7b518b1228d2743963@syzkaller.appspotmail.com Acked-by: Neil Horman Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/socket.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 7181ce6c62bf..b9260d4029d8 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -332,16 +332,14 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt, if (len < sizeof (struct sockaddr)) return NULL; + if (!opt->pf->af_supported(addr->sa.sa_family, opt)) + return NULL; + /* V4 mapped address are really of AF_INET family */ if (addr->sa.sa_family == AF_INET6 && - ipv6_addr_v4mapped(&addr->v6.sin6_addr)) { - if (!opt->pf->af_supported(AF_INET, opt)) - return NULL; - } else { - /* Does this PF support this AF? */ - if (!opt->pf->af_supported(addr->sa.sa_family, opt)) - return NULL; - } + ipv6_addr_v4mapped(&addr->v6.sin6_addr) && + !opt->pf->af_supported(AF_INET, opt)) + return NULL; /* If we get this far, af is valid. */ af = sctp_get_af_specific(addr->sa.sa_family); From 2f056e7def4236bf5279da38410c93acee164d5a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 15 Jan 2018 17:01:36 +0800 Subject: [PATCH 192/705] sctp: return error if the asoc has been peeled off in sctp_wait_for_sndbuf [ Upstream commit a0ff660058b88d12625a783ce9e5c1371c87951f ] After commit cea0cc80a677 ("sctp: use the right sk after waking up from wait_buf sleep"), it may change to lock another sk if the asoc has been peeled off in sctp_wait_for_sndbuf. However, the asoc's new sk could be already closed elsewhere, as it's in the sendmsg context of the old sk that can't avoid the new sk's closing. If the sk's last one refcnt is held by this asoc, later on after putting this asoc, the new sk will be freed, while under it's own lock. This patch is to revert that commit, but fix the old issue by returning error under the old sk's lock. Fixes: cea0cc80a677 ("sctp: use the right sk after waking up from wait_buf sleep") Reported-by: syzbot+ac6ea7baa4432811eb50@syzkaller.appspotmail.com Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/socket.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b9260d4029d8..c472b8391dde 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -83,7 +83,7 @@ static int sctp_writeable(struct sock *sk); static void sctp_wfree(struct sk_buff *skb); static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, - size_t msg_len, struct sock **orig_sk); + size_t msg_len); static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p); static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p); static int sctp_wait_for_accept(struct sock *sk, long timeo); @@ -1956,7 +1956,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); if (!sctp_wspace(asoc)) { /* sk can be changed by peel off when waiting for buf. */ - err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len, &sk); + err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); if (err) { if (err == -ESRCH) { /* asoc is already dead. */ @@ -7439,12 +7439,12 @@ void sctp_sock_rfree(struct sk_buff *skb) /* Helper function to wait for space in the sndbuf. */ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, - size_t msg_len, struct sock **orig_sk) + size_t msg_len) { struct sock *sk = asoc->base.sk; - int err = 0; long current_timeo = *timeo_p; DEFINE_WAIT(wait); + int err = 0; pr_debug("%s: asoc:%p, timeo:%ld, msg_len:%zu\n", __func__, asoc, *timeo_p, msg_len); @@ -7473,17 +7473,13 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, release_sock(sk); current_timeo = schedule_timeout(current_timeo); lock_sock(sk); - if (sk != asoc->base.sk) { - release_sock(sk); - sk = asoc->base.sk; - lock_sock(sk); - } + if (sk != asoc->base.sk) + goto do_error; *timeo_p = current_timeo; } out: - *orig_sk = sk; finish_wait(&asoc->wait, &wait); /* Release the association's refcnt. */ From 0e52703d0746ee35326623c5442e9eec0139ffeb Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 10 Jan 2018 12:50:25 -0800 Subject: [PATCH 193/705] tipc: fix a memory leak in tipc_nl_node_get_link() [ Upstream commit 59b36613e85fb16ebf9feaf914570879cd5c2a21 ] When tipc_node_find_by_name() fails, the nlmsg is not freed. While on it, switch to a goto label to properly free it. Fixes: be9c086715c ("tipc: narrow down exposure of struct tipc_node") Reported-by: Dmitry Vyukov Cc: Jon Maloy Cc: Ying Xue Signed-off-by: Cong Wang Acked-by: Ying Xue Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/node.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 27753325e06e..5b3e1ea37b6d 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1848,36 +1848,38 @@ int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info) if (strcmp(name, tipc_bclink_name) == 0) { err = tipc_nl_add_bc_link(net, &msg); - if (err) { - nlmsg_free(msg.skb); - return err; - } + if (err) + goto err_free; } else { int bearer_id; struct tipc_node *node; struct tipc_link *link; node = tipc_node_find_by_name(net, name, &bearer_id); - if (!node) - return -EINVAL; + if (!node) { + err = -EINVAL; + goto err_free; + } tipc_node_read_lock(node); link = node->links[bearer_id].link; if (!link) { tipc_node_read_unlock(node); - nlmsg_free(msg.skb); - return -EINVAL; + err = -EINVAL; + goto err_free; } err = __tipc_nl_add_link(net, &msg, link, 0); tipc_node_read_unlock(node); - if (err) { - nlmsg_free(msg.skb); - return err; - } + if (err) + goto err_free; } return genlmsg_reply(msg.skb, info); + +err_free: + nlmsg_free(msg.skb); + return err; } int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) From 66c16a22e3b141152c2bc85236b48372b2b1e984 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 22 Jan 2018 16:06:37 -0500 Subject: [PATCH 194/705] vmxnet3: repair memory leak [ Upstream commit 848b159835ddef99cc4193083f7e786c3992f580 ] with the introduction of commit b0eb57cb97e7837ebb746404c2c58c6f536f23fa, it appears that rq->buf_info is improperly handled. While it is heap allocated when an rx queue is setup, and freed when torn down, an old line of code in vmxnet3_rq_destroy was not properly removed, leading to rq->buf_info[0] being set to NULL prior to its being freed, causing a memory leak, which eventually exhausts the system on repeated create/destroy operations (for example, when the mtu of a vmxnet3 interface is changed frequently. Fix is pretty straight forward, just move the NULL set to after the free. Tested by myself with successful results Applies to net, and should likely be queued for stable, please Signed-off-by: Neil Horman Reported-By: boyang@redhat.com CC: boyang@redhat.com CC: Shrikrishna Khare CC: "VMware, Inc." CC: David S. Miller Acked-by: Shrikrishna Khare Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vmxnet3/vmxnet3_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index ef83ae3b0a44..4afba17e2403 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1616,7 +1616,6 @@ static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq, rq->rx_ring[i].basePA); rq->rx_ring[i].base = NULL; } - rq->buf_info[i] = NULL; } if (rq->data_ring.base) { @@ -1638,6 +1637,7 @@ static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq, (rq->rx_ring[0].size + rq->rx_ring[1].size); dma_free_coherent(&adapter->pdev->dev, sz, rq->buf_info[0], rq->buf_info_pa); + rq->buf_info[0] = rq->buf_info[1] = NULL; } } From 014510b11781c7ba17b4e2c25720ce15233c712f Mon Sep 17 00:00:00 2001 From: Jim Westfall Date: Sun, 14 Jan 2018 04:18:50 -0800 Subject: [PATCH 195/705] net: Allow neigh contructor functions ability to modify the primary_key [ Upstream commit 096b9854c04df86f03b38a97d40b6506e5730919 ] Use n->primary_key instead of pkey to account for the possibility that a neigh constructor function may have modified the primary_key value. Signed-off-by: Jim Westfall Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/neighbour.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index f45f6198851f..7b315663f840 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -496,7 +496,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, if (atomic_read(&tbl->entries) > (1 << nht->hash_shift)) nht = neigh_hash_grow(tbl, nht->hash_shift + 1); - hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); + hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift); if (n->parms->dead) { rc = ERR_PTR(-EINVAL); @@ -508,7 +508,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, n1 != NULL; n1 = rcu_dereference_protected(n1->next, lockdep_is_held(&tbl->lock))) { - if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) { + if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) { if (want_ref) neigh_hold(n1); rc = n1; From 260eb694b5a4bd3424a7c445128fb5a784e95e3d Mon Sep 17 00:00:00 2001 From: Jim Westfall Date: Sun, 14 Jan 2018 04:18:51 -0800 Subject: [PATCH 196/705] ipv4: Make neigh lookup keys for loopback/point-to-point devices be INADDR_ANY [ Upstream commit cd9ff4de0107c65d69d02253bb25d6db93c3dbc1 ] Map all lookup neigh keys to INADDR_ANY for loopback/point-to-point devices to avoid making an entry for every remote ip the device needs to talk to. This used the be the old behavior but became broken in a263b3093641f (ipv4: Make neigh lookups directly in output packet path) and later removed in 0bb4087cbec0 (ipv4: Fix neigh lookup keying over loopback/point-to-point devices) because it was broken. Signed-off-by: Jim Westfall Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/arp.h | 3 +++ net/ipv4/arp.c | 7 ++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/net/arp.h b/include/net/arp.h index 5e0f891d476c..1b3f86981757 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -19,6 +19,9 @@ static inline u32 arp_hashfn(const void *pkey, const struct net_device *dev, u32 static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key) { + if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) + key = INADDR_ANY; + return ___neigh_lookup_noref(&arp_tbl, neigh_key_eq32, arp_hashfn, &key, dev); } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 51b27ae09fbd..e60517eb1c3a 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -223,11 +223,16 @@ static bool arp_key_eq(const struct neighbour *neigh, const void *pkey) static int arp_constructor(struct neighbour *neigh) { - __be32 addr = *(__be32 *)neigh->primary_key; + __be32 addr; struct net_device *dev = neigh->dev; struct in_device *in_dev; struct neigh_parms *parms; + u32 inaddr_any = INADDR_ANY; + if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) + memcpy(neigh->primary_key, &inaddr_any, arp_tbl.key_len); + + addr = *(__be32 *)neigh->primary_key; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); if (!in_dev) { From 00f9e47c6f9d9d25e1bf9cd5f58652d74e36d567 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 10 Jan 2018 16:24:45 +0100 Subject: [PATCH 197/705] ppp: unlock all_ppp_mutex before registering device [ Upstream commit 0171c41835591e9aa2e384b703ef9a6ae367c610 ] ppp_dev_uninit(), which is the .ndo_uninit() handler of PPP devices, needs to lock pn->all_ppp_mutex. Therefore we mustn't call register_netdevice() with pn->all_ppp_mutex already locked, or we'd deadlock in case register_netdevice() fails and calls .ndo_uninit(). Fortunately, we can unlock pn->all_ppp_mutex before calling register_netdevice(). This lock protects pn->units_idr, which isn't used in the device registration process. However, keeping pn->all_ppp_mutex locked during device registration did ensure that no device in transient state would be published in pn->units_idr. In practice, unlocking it before calling register_netdevice() doesn't change this property: ppp_unit_register() is called with 'ppp_mutex' locked and all searches done in pn->units_idr hold this lock too. Fixes: 8cb775bc0a34 ("ppp: fix device unregistration upon netns deletion") Reported-and-tested-by: syzbot+367889b9c9e279219175@syzkaller.appspotmail.com Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ppp/ppp_generic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index b883af93929c..fc4c2ccc3d22 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1002,17 +1002,18 @@ static int ppp_unit_register(struct ppp *ppp, int unit, bool ifname_is_set) if (!ifname_is_set) snprintf(ppp->dev->name, IFNAMSIZ, "ppp%i", ppp->file.index); + mutex_unlock(&pn->all_ppp_mutex); + ret = register_netdevice(ppp->dev); if (ret < 0) goto err_unit; atomic_inc(&ppp_unit_count); - mutex_unlock(&pn->all_ppp_mutex); - return 0; err_unit: + mutex_lock(&pn->all_ppp_mutex); unit_put(&pn->units_idr, ppp->file.index); err: mutex_unlock(&pn->all_ppp_mutex); From 1711ba166e5f7148bc41b5e24f7f282ffc055515 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 19 Jan 2018 20:23:50 +0100 Subject: [PATCH 198/705] be2net: restore properly promisc mode after queues reconfiguration [ Upstream commit 52acf06451930eb4cefabd5ecea56e2d46c32f76 ] The commit 622190669403 ("be2net: Request RSS capability of Rx interface depending on number of Rx rings") modified be_update_queues() so the IFACE (HW representation of the netdevice) is destroyed and then re-created. This causes a regression because potential promiscuous mode is not restored properly during be_open() because the driver thinks that the HW has promiscuous mode already enabled. Note that Lancer is not affected by this bug because RX-filter flags are disabled during be_close() for this chipset. Cc: Sathya Perla Cc: Ajit Khaparde Cc: Sriharsha Basavapatna Cc: Somnath Kotur Fixes: 622190669403 ("be2net: Request RSS capability of Rx interface depending on number of Rx rings") Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/emulex/benet/be_main.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 1644896568c4..b2eeecb26939 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4733,6 +4733,15 @@ int be_update_queues(struct be_adapter *adapter) be_schedule_worker(adapter); + /* + * The IF was destroyed and re-created. We need to clear + * all promiscuous flags valid for the destroyed IF. + * Without this promisc mode is not restored during + * be_open() because the driver thinks that it is + * already enabled in HW. + */ + adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS; + if (netif_running(netdev)) status = be_open(netdev); From cc99c6d59adf7daf62522b09e25af72267c997c8 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Thu, 18 Jan 2018 20:51:12 +0300 Subject: [PATCH 199/705] ip6_gre: init dev->mtu and dev->hard_header_len correctly [ Upstream commit 128bb975dc3c25d00de04e503e2fe0a780d04459 ] Commit b05229f44228 ("gre6: Cleanup GREv6 transmit path, call common GRE functions") moved dev->mtu initialization from ip6gre_tunnel_setup() to ip6gre_tunnel_init(), as a result, the previously set values, before ndo_init(), are reset in the following cases: * rtnl_create_link() can update dev->mtu from IFLA_MTU parameter. * ip6gre_tnl_link_config() is invoked before ndo_init() in netlink and ioctl setup, so ndo_init() can reset MTU adjustments with the lower device MTU as well, dev->mtu and dev->hard_header_len. Not applicable for ip6gretap because it has one more call to ip6gre_tnl_link_config(tunnel, 1) in ip6gre_tap_init(). Fix the first case by updating dev->mtu with 'tb[IFLA_MTU]' parameter if a user sets it manually on a device creation, and fix the second one by moving ip6gre_tnl_link_config() call after register_netdevice(). Fixes: b05229f44228 ("gre6: Cleanup GREv6 transmit path, call common GRE functions") Fixes: db2ec95d1ba4 ("ip6_gre: Fix MTU setting") Signed-off-by: Alexey Kodanev Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_gre.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index c46066c5dc27..db2613b4a049 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -337,11 +337,12 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net, nt->dev = dev; nt->net = dev_net(dev); - ip6gre_tnl_link_config(nt, 1); if (register_netdevice(dev) < 0) goto failed_free; + ip6gre_tnl_link_config(nt, 1); + /* Can use a lockless transmit, unless we generate output sequences */ if (!(nt->parms.o_flags & TUNNEL_SEQ)) dev->features |= NETIF_F_LLTX; @@ -1263,7 +1264,6 @@ static void ip6gre_netlink_parms(struct nlattr *data[], static int ip6gre_tap_init(struct net_device *dev) { - struct ip6_tnl *tunnel; int ret; ret = ip6gre_tunnel_init_common(dev); @@ -1272,10 +1272,6 @@ static int ip6gre_tap_init(struct net_device *dev) dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; - tunnel = netdev_priv(dev); - - ip6gre_tnl_link_config(tunnel, 1); - return 0; } @@ -1370,7 +1366,6 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev, nt->dev = dev; nt->net = dev_net(dev); - ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); dev->features |= GRE6_FEATURES; dev->hw_features |= GRE6_FEATURES; @@ -1396,6 +1391,11 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev, if (err) goto out; + ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); + + if (tb[IFLA_MTU]) + ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); + dev_hold(dev); ip6gre_tunnel_link(ign, nt); From 3110e2134c9718a1f3c091873769d1d9b621bb87 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 19 Jan 2018 09:29:18 -0500 Subject: [PATCH 200/705] gso: validate gso_type in GSO handlers [ Upstream commit 121d57af308d0cf943f08f4738d24d3966c38cd9 ] Validate gso_type during segmentation as SKB_GSO_DODGY sources may pass packets where the gso_type does not match the contents. Syzkaller was able to enter the SCTP gso handler with a packet of gso_type SKB_GSO_TCPV4. On entry of transport layer gso handlers, verify that the gso_type matches the transport protocol. Fixes: 90017accff61 ("sctp: Add GSO support") Link: http://lkml.kernel.org/r/<001a1137452496ffc305617e5fe0@google.com> Reported-by: syzbot+fee64147a25aecd48055@syzkaller.appspotmail.com Signed-off-by: Willem de Bruijn Acked-by: Jason Wang Reviewed-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_offload.c | 3 +++ net/ipv4/udp_offload.c | 3 +++ net/ipv6/tcpv6_offload.c | 3 +++ net/ipv6/udp_offload.c | 3 +++ net/sctp/offload.c | 3 +++ 5 files changed, 15 insertions(+) diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index bc68da38ea86..366b1becff9d 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -32,6 +32,9 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, netdev_features_t features) { + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)) + return ERR_PTR(-EINVAL); + if (!pskb_may_pull(skb, sizeof(struct tcphdr))) return ERR_PTR(-EINVAL); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 6401574cd638..f4f616eaaeb8 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -205,6 +205,9 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, goto out; } + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) + goto out; + if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto out; diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index d883c9204c01..278e49cd67d4 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -46,6 +46,9 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, { struct tcphdr *th; + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)) + return ERR_PTR(-EINVAL); + if (!pskb_may_pull(skb, sizeof(*th))) return ERR_PTR(-EINVAL); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index e7d378c032cb..2bd2087bd105 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -55,6 +55,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, const struct ipv6hdr *ipv6h; struct udphdr *uh; + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) + goto out; + if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto out; diff --git a/net/sctp/offload.c b/net/sctp/offload.c index 4f5a2b580aa5..6300f28c9588 100644 --- a/net/sctp/offload.c +++ b/net/sctp/offload.c @@ -44,6 +44,9 @@ static struct sk_buff *sctp_gso_segment(struct sk_buff *skb, struct sk_buff *segs = ERR_PTR(-EINVAL); struct sctphdr *sh; + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP)) + goto out; + sh = sctp_hdr(skb); if (!pskb_may_pull(skb, sizeof(*sh))) goto out; From 1105145cb3d5eee496de1d55aaea160c78e1c5b5 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 24 Jan 2018 10:02:09 +0100 Subject: [PATCH 201/705] mlxsw: spectrum_router: Don't log an error on missing neighbor [ Upstream commit 1ecdaea02ca6bfacf2ecda500dc1af51e9780c42 ] Driver periodically samples all neighbors configured in device in order to update the kernel regarding their state. When finding an entry configured in HW that doesn't show in neigh_lookup() driver logs an error message. This introduces a race when removing multiple neighbors - it's possible that a given entry would still be configured in HW as its removal is still being processed but is already removed from the kernel's neighbor tables. Simply remove the error message and gracefully accept such events. Fixes: c723c735fa6b ("mlxsw: spectrum_router: Periodically update the kernel's neigh table") Fixes: 60f040ca11b9 ("mlxsw: spectrum_router: Periodically dump active IPv6 neighbours") Signed-off-by: Yuval Mintz Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 8aa91ddff287..16556011d571 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -765,11 +765,8 @@ static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp, dipn = htonl(dip); dev = mlxsw_sp->rifs[rif]->dev; n = neigh_lookup(&arp_tbl, &dipn, dev); - if (!n) { - netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n", - &dip); + if (!n) return; - } netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip); neigh_event_send(n, NULL); From 18717ee28ef5c0285ff969d1e9357529a8a9233f Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 15 Jan 2018 11:37:29 -0800 Subject: [PATCH 202/705] tun: fix a memory leak for tfile->tx_array [ Upstream commit 4df0bfc79904b7169dc77dcce44598b1545721f9 ] tfile->tun could be detached before we close the tun fd, via tun_detach_all(), so it should not be used to check for tfile->tx_array. As Jason suggested, we probably have to clean it up unconditionally both in __tun_deatch() and tun_detach_all(), but this requires to check if it is initialized or not. Currently skb_array_cleanup() doesn't have such a check, so I check it in the caller and introduce a helper function, it is a bit ugly but we can always improve it in net-next. Reported-by: Dmitry Vyukov Fixes: 1576d9860599 ("tun: switch to use skb array for tx") Cc: Jason Wang Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 518cbfbc8b65..eb6dc28e5e52 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -525,6 +525,14 @@ static void tun_queue_purge(struct tun_file *tfile) skb_queue_purge(&tfile->sk.sk_error_queue); } +static void tun_cleanup_tx_array(struct tun_file *tfile) +{ + if (tfile->tx_array.ring.queue) { + skb_array_cleanup(&tfile->tx_array); + memset(&tfile->tx_array, 0, sizeof(tfile->tx_array)); + } +} + static void __tun_detach(struct tun_file *tfile, bool clean) { struct tun_file *ntfile; @@ -566,8 +574,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun->dev->reg_state == NETREG_REGISTERED) unregister_netdevice(tun->dev); } - if (tun) - skb_array_cleanup(&tfile->tx_array); + tun_cleanup_tx_array(tfile); sock_put(&tfile->sk); } } @@ -606,11 +613,13 @@ static void tun_detach_all(struct net_device *dev) /* Drop read queue */ tun_queue_purge(tfile); sock_put(&tfile->sk); + tun_cleanup_tx_array(tfile); } list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { tun_enable_queue(tfile); tun_queue_purge(tfile); sock_put(&tfile->sk); + tun_cleanup_tx_array(tfile); } BUG_ON(tun->numdisabled != 0); @@ -2363,6 +2372,8 @@ static int tun_chr_open(struct inode *inode, struct file * file) sock_set_flag(&tfile->sk, SOCK_ZEROCOPY); + memset(&tfile->tx_array, 0, sizeof(tfile->tx_array)); + return 0; } From eecfa2eeefe3caef4c8d6b2284f6066c76ab26a1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 17 Jan 2018 14:21:13 -0800 Subject: [PATCH 203/705] flow_dissector: properly cap thoff field [ Upstream commit d0c081b49137cd3200f2023c0875723be66e7ce5 ] syzbot reported yet another crash [1] that is caused by insufficient validation of DODGY packets. Two bugs are happening here to trigger the crash. 1) Flow dissection leaves with incorrect thoff field. 2) skb_probe_transport_header() sets transport header to this invalid thoff, even if pointing after skb valid data. 3) qdisc_pkt_len_init() reads out-of-bound data because it trusts tcp_hdrlen(skb) Possible fixes : - Full flow dissector validation before injecting bad DODGY packets in the stack. This approach was attempted here : https://patchwork.ozlabs.org/patch/ 861874/ - Have more robust functions in the core. This might be needed anyway for stable versions. This patch fixes the flow dissection issue. [1] CPU: 1 PID: 3144 Comm: syzkaller271204 Not tainted 4.15.0-rc4-mm1+ #49 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:53 print_address_description+0x73/0x250 mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:355 [inline] kasan_report+0x23b/0x360 mm/kasan/report.c:413 __asan_report_load2_noabort+0x14/0x20 mm/kasan/report.c:432 __tcp_hdrlen include/linux/tcp.h:35 [inline] tcp_hdrlen include/linux/tcp.h:40 [inline] qdisc_pkt_len_init net/core/dev.c:3160 [inline] __dev_queue_xmit+0x20d3/0x2200 net/core/dev.c:3465 dev_queue_xmit+0x17/0x20 net/core/dev.c:3554 packet_snd net/packet/af_packet.c:2943 [inline] packet_sendmsg+0x3ad5/0x60a0 net/packet/af_packet.c:2968 sock_sendmsg_nosec net/socket.c:628 [inline] sock_sendmsg+0xca/0x110 net/socket.c:638 sock_write_iter+0x31a/0x5d0 net/socket.c:907 call_write_iter include/linux/fs.h:1776 [inline] new_sync_write fs/read_write.c:469 [inline] __vfs_write+0x684/0x970 fs/read_write.c:482 vfs_write+0x189/0x510 fs/read_write.c:544 SYSC_write fs/read_write.c:589 [inline] SyS_write+0xef/0x220 fs/read_write.c:581 entry_SYSCALL_64_fastpath+0x1f/0x96 Fixes: 34fad54c2537 ("net: __skb_flow_dissect() must cap its return value") Fixes: a6e544b0a88b ("flow_dissector: Jump to exit code in __skb_flow_dissect") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Reported-by: syzbot Acked-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/flow_dissector.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 32e4e0158846..862d63ec56e4 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -550,8 +550,8 @@ ip_proto_again: out_good: ret = true; - key_control->thoff = (u16)nhoff; out: + key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); key_basic->n_proto = proto; key_basic->ip_proto = ip_proto; @@ -559,7 +559,6 @@ out: out_bad: ret = false; - key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); goto out; } EXPORT_SYMBOL(__skb_flow_dissect); From dc1932c69835928dcb6259c744043dd03028cdee Mon Sep 17 00:00:00 2001 From: Xiao Liang Date: Mon, 22 Jan 2018 14:12:52 +0800 Subject: [PATCH 204/705] perf/x86/amd/power: Do not load AMD power module on !AMD platforms commit 40d4071ce2d20840d224b4a77b5dc6f752c9ab15 upstream. The AMD power module can be loaded on non AMD platforms, but unload fails with the following Oops: BUG: unable to handle kernel NULL pointer dereference at (null) IP: __list_del_entry_valid+0x29/0x90 Call Trace: perf_pmu_unregister+0x25/0xf0 amd_power_pmu_exit+0x1c/0xd23 [power] SyS_delete_module+0x1a8/0x2b0 ? exit_to_usermode_loop+0x8f/0xb0 entry_SYSCALL_64_fastpath+0x20/0x83 Return -ENODEV instead of 0 from the module init function if the CPU does not match. Fixes: c7ab62bfbe0e ("perf/x86/amd/power: Add AMD accumulated power reporting mechanism") Signed-off-by: Xiao Liang Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20180122061252.6394-1-xiliang@redhat.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/amd/power.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c index 9842270ed2f2..21a4e4127f43 100644 --- a/arch/x86/events/amd/power.c +++ b/arch/x86/events/amd/power.c @@ -277,7 +277,7 @@ static int __init amd_power_pmu_init(void) int ret; if (!x86_match_cpu(cpu_match)) - return 0; + return -ENODEV; if (!boot_cpu_has(X86_FEATURE_ACC_POWER)) return -ENODEV; From 9f3a6cadf494f378b7839de14063793259ad5626 Mon Sep 17 00:00:00 2001 From: Jia Zhang Date: Tue, 23 Jan 2018 11:41:32 +0100 Subject: [PATCH 205/705] x86/microcode/intel: Extend BDW late-loading further with LLC size check commit 7e702d17ed138cf4ae7c00e8c00681ed464587c7 upstream. Commit b94b73733171 ("x86/microcode/intel: Extend BDW late-loading with a revision check") reduced the impact of erratum BDF90 for Broadwell model 79. The impact can be reduced further by checking the size of the last level cache portion per core. Tony: "The erratum says the problem only occurs on the large-cache SKUs. So we only need to avoid the update if we are on a big cache SKU that is also running old microcode." For more details, see erratum BDF90 in document #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family Specification Update) from September 2017. Fixes: b94b73733171 ("x86/microcode/intel: Extend BDW late-loading with a revision check") Signed-off-by: Jia Zhang Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Acked-by: Tony Luck Link: https://lkml.kernel.org/r/1516321542-31161-1-git-send-email-zhang.jia@linux.alibaba.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/microcode/intel.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index ac3e636ad586..f90f17610f62 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -40,6 +40,9 @@ #include #include +/* last level cache size per core */ +static int llc_size_per_core; + /* * Temporary microcode blobs pointers storage. We note here during early load * the pointers to microcode blobs we've got from whatever storage (detached @@ -1053,12 +1056,14 @@ static bool is_blacklisted(unsigned int cpu) /* * Late loading on model 79 with microcode revision less than 0x0b000021 - * may result in a system hang. This behavior is documented in item - * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family). + * and LLC size per core bigger than 2.5MB may result in a system hang. + * This behavior is documented in item BDF90, #334165 (Intel Xeon + * Processor E7-8800/4800 v4 Product Family). */ if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X && c->x86_mask == 0x01 && + llc_size_per_core > 2621440 && c->microcode < 0x0b000021) { pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode); pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n"); @@ -1125,6 +1130,15 @@ static struct microcode_ops microcode_intel_ops = { .microcode_fini_cpu = microcode_fini_cpu, }; +static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c) +{ + u64 llc_size = c->x86_cache_size * 1024; + + do_div(llc_size, c->x86_max_cores); + + return (int)llc_size; +} + struct microcode_ops * __init init_intel_microcode(void) { struct cpuinfo_x86 *c = &boot_cpu_data; @@ -1135,6 +1149,8 @@ struct microcode_ops * __init init_intel_microcode(void) return NULL; } + llc_size_per_core = calc_llc_size_per_core(c); + return µcode_intel_ops; } From c98ff7299b404f110167883695f81080723e6e15 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 26 Jan 2018 14:54:32 +0100 Subject: [PATCH 206/705] hrtimer: Reset hrtimer cpu base proper on CPU hotplug commit d5421ea43d30701e03cadc56a38854c36a8b4433 upstream. The hrtimer interrupt code contains a hang detection and mitigation mechanism, which prevents that a long delayed hrtimer interrupt causes a continous retriggering of interrupts which prevent the system from making progress. If a hang is detected then the timer hardware is programmed with a certain delay into the future and a flag is set in the hrtimer cpu base which prevents newly enqueued timers from reprogramming the timer hardware prior to the chosen delay. The subsequent hrtimer interrupt after the delay clears the flag and resumes normal operation. If such a hang happens in the last hrtimer interrupt before a CPU is unplugged then the hang_detected flag is set and stays that way when the CPU is plugged in again. At that point the timer hardware is not armed and it cannot be armed because the hang_detected flag is still active, so nothing clears that flag. As a consequence the CPU does not receive hrtimer interrupts and no timers expire on that CPU which results in RCU stalls and other malfunctions. Clear the flag along with some other less critical members of the hrtimer cpu base to ensure starting from a clean state when a CPU is plugged in. Thanks to Paul, Sebastian and Anna-Maria for their help to get down to the root cause of that hard to reproduce heisenbug. Once understood it's trivial and certainly justifies a brown paperbag. Fixes: 41d2e4949377 ("hrtimer: Tune hrtimer_interrupt hang logic") Reported-by: Paul E. McKenney Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Sebastian Sewior Cc: Anna-Maria Gleixner Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801261447590.2067@nanos Signed-off-by: Greg Kroah-Hartman --- kernel/time/hrtimer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index eeb7f2f5698d..54fd2fed36e9 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -652,7 +652,9 @@ static void hrtimer_reprogram(struct hrtimer *timer, static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { base->expires_next.tv64 = KTIME_MAX; + base->hang_detected = 0; base->hres_active = 0; + base->next_timer = NULL; } /* @@ -1610,6 +1612,7 @@ int hrtimers_prepare_cpu(unsigned int cpu) timerqueue_init_head(&cpu_base->clock_base[i].active); } + cpu_base->active_bases = 0; cpu_base->cpu = cpu; hrtimer_init_hres(cpu_base); return 0; From c964ad34f6d9c5c907e5cc2f1a855d044346aa8a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 29 Jan 2018 02:48:54 +0100 Subject: [PATCH 207/705] x86: bpf_jit: small optimization in emit_bpf_tail_call() [ upstream commit 84ccac6e7854ebbfb56d2fc6d5bef9be49bb304c ] Saves 4 bytes replacing following instructions : lea rax, [rsi + rdx * 8 + offsetof(...)] mov rax, qword ptr [rax] cmp rax, 0 by : mov rax, [rsi + rdx * 8 + offsetof(...)] test rax, rax Signed-off-by: Eric Dumazet Cc: Alexei Starovoitov Cc: Daniel Borkmann Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/x86/net/bpf_jit_comp.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 15f743615923..ece29e27faa0 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -281,7 +281,7 @@ static void emit_bpf_tail_call(u8 **pprog) EMIT4(0x48, 0x8B, 0x46, /* mov rax, qword ptr [rsi + 16] */ offsetof(struct bpf_array, map.max_entries)); EMIT3(0x48, 0x39, 0xD0); /* cmp rax, rdx */ -#define OFFSET1 47 /* number of bytes to jump */ +#define OFFSET1 43 /* number of bytes to jump */ EMIT2(X86_JBE, OFFSET1); /* jbe out */ label1 = cnt; @@ -290,21 +290,20 @@ static void emit_bpf_tail_call(u8 **pprog) */ EMIT2_off32(0x8B, 0x85, -STACKSIZE + 36); /* mov eax, dword ptr [rbp - 516] */ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ -#define OFFSET2 36 +#define OFFSET2 32 EMIT2(X86_JA, OFFSET2); /* ja out */ label2 = cnt; EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ EMIT2_off32(0x89, 0x85, -STACKSIZE + 36); /* mov dword ptr [rbp - 516], eax */ /* prog = array->ptrs[index]; */ - EMIT4_off32(0x48, 0x8D, 0x84, 0xD6, /* lea rax, [rsi + rdx * 8 + offsetof(...)] */ + EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */ offsetof(struct bpf_array, ptrs)); - EMIT3(0x48, 0x8B, 0x00); /* mov rax, qword ptr [rax] */ /* if (prog == NULL) * goto out; */ - EMIT4(0x48, 0x83, 0xF8, 0x00); /* cmp rax, 0 */ + EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ #define OFFSET3 10 EMIT2(X86_JE, OFFSET3); /* je out */ label3 = cnt; From 5226bb3b95515d7f6f2e1c11ac78b612e0056342 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 29 Jan 2018 02:48:55 +0100 Subject: [PATCH 208/705] bpf: fix bpf_tail_call() x64 JIT [ upstream commit 90caccdd8cc0215705f18b92771b449b01e2474a ] - bpf prog_array just like all other types of bpf array accepts 32-bit index. Clarify that in the comment. - fix x64 JIT of bpf_tail_call which was incorrectly loading 8 instead of 4 bytes - tighten corresponding check in the interpreter to stay consistent The JIT bug can be triggered after introduction of BPF_F_NUMA_NODE flag in commit 96eabe7a40aa in 4.14. Before that the map_flags would stay zero and though JIT code is wrong it will check bounds correctly. Hence two fixes tags. All other JITs don't have this problem. Signed-off-by: Alexei Starovoitov Fixes: 96eabe7a40aa ("bpf: Allow selecting numa node during map creation") Fixes: b52f00e6a715 ("x86: bpf_jit: implement bpf_tail_call() helper") Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/x86/net/bpf_jit_comp.c | 4 ++-- kernel/bpf/core.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index ece29e27faa0..7840331d3056 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -278,9 +278,9 @@ static void emit_bpf_tail_call(u8 **pprog) /* if (index >= array->map.max_entries) * goto out; */ - EMIT4(0x48, 0x8B, 0x46, /* mov rax, qword ptr [rsi + 16] */ + EMIT2(0x89, 0xD2); /* mov edx, edx */ + EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ offsetof(struct bpf_array, map.max_entries)); - EMIT3(0x48, 0x39, 0xD0); /* cmp rax, rdx */ #define OFFSET1 43 /* number of bytes to jump */ EMIT2(X86_JBE, OFFSET1); /* jbe out */ label1 = cnt; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index aa6d98154106..ab9576b3bde5 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -715,7 +715,7 @@ select_insn: struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2; struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_prog *prog; - u64 index = BPF_R3; + u32 index = BPF_R3; if (unlikely(index >= array->map.max_entries)) goto out; From a3d6dd6a66c1bf01a36926705db4687c7d0d4734 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 29 Jan 2018 02:48:56 +0100 Subject: [PATCH 209/705] bpf: introduce BPF_JIT_ALWAYS_ON config [ upstream commit 290af86629b25ffd1ed6232c4e9107da031705cb ] The BPF interpreter has been used as part of the spectre 2 attack CVE-2017-5715. A quote from goolge project zero blog: "At this point, it would normally be necessary to locate gadgets in the host kernel code that can be used to actually leak data by reading from an attacker-controlled location, shifting and masking the result appropriately and then using the result of that as offset to an attacker-controlled address for a load. But piecing gadgets together and figuring out which ones work in a speculation context seems annoying. So instead, we decided to use the eBPF interpreter, which is built into the host kernel - while there is no legitimate way to invoke it from inside a VM, the presence of the code in the host kernel's text section is sufficient to make it usable for the attack, just like with ordinary ROP gadgets." To make attacker job harder introduce BPF_JIT_ALWAYS_ON config option that removes interpreter from the kernel in favor of JIT-only mode. So far eBPF JIT is supported by: x64, arm64, arm32, sparc64, s390, powerpc64, mips64 The start of JITed program is randomized and code page is marked as read-only. In addition "constant blinding" can be turned on with net.core.bpf_jit_harden v2->v3: - move __bpf_prog_ret0 under ifdef (Daniel) v1->v2: - fix init order, test_bpf and cBPF (Daniel's feedback) - fix offloaded bpf (Jakub's feedback) - add 'return 0' dummy in case something can invoke prog->bpf_func - retarget bpf tree. For bpf-next the patch would need one extra hunk. It will be sent when the trees are merged back to net-next Considered doing: int bpf_jit_enable __read_mostly = BPF_EBPF_JIT_DEFAULT; but it seems better to land the patch as-is and in bpf-next remove bpf_jit_enable global variable from all JITs, consolidate in one place and remove this jit_init() function. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Greg Kroah-Hartman --- init/Kconfig | 7 +++++++ kernel/bpf/core.c | 18 ++++++++++++++++++ lib/test_bpf.c | 11 +++++++---- net/core/filter.c | 6 ++---- net/core/sysctl_net_core.c | 6 ++++++ net/socket.c | 9 +++++++++ 6 files changed, 49 insertions(+), 8 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index 34407f15e6d3..b331feeabda4 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1609,6 +1609,13 @@ config BPF_SYSCALL Enable the bpf() system call that allows to manipulate eBPF programs and maps via file descriptors. +config BPF_JIT_ALWAYS_ON + bool "Permanently enable BPF JIT and remove BPF interpreter" + depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT + help + Enables BPF JIT and removes BPF interpreter to avoid + speculative execution of BPF instructions by the interpreter + config SHMEM bool "Use full shmem filesystem" if EXPERT default y diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index ab9576b3bde5..64c4b13952f0 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -458,6 +458,7 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) } EXPORT_SYMBOL_GPL(__bpf_call_base); +#ifndef CONFIG_BPF_JIT_ALWAYS_ON /** * __bpf_prog_run - run eBPF program on a given context * @ctx: is the data we are operating on @@ -923,6 +924,13 @@ load_byte: } STACK_FRAME_NON_STANDARD(__bpf_prog_run); /* jump table */ +#else +static unsigned int __bpf_prog_ret0(void *ctx, const struct bpf_insn *insn) +{ + return 0; +} +#endif + bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp) { @@ -970,7 +978,11 @@ static int bpf_check_tail_call(const struct bpf_prog *fp) */ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) { +#ifndef CONFIG_BPF_JIT_ALWAYS_ON fp->bpf_func = (void *) __bpf_prog_run; +#else + fp->bpf_func = (void *) __bpf_prog_ret0; +#endif /* eBPF JITs can rewrite the program in case constant * blinding is active. However, in case of error during @@ -979,6 +991,12 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) * be JITed, but falls back to the interpreter. */ fp = bpf_int_jit_compile(fp); +#ifdef CONFIG_BPF_JIT_ALWAYS_ON + if (!fp->jited) { + *err = -ENOTSUPP; + return fp; + } +#endif bpf_prog_lock_ro(fp); /* The tail call compatibility check can only be done at diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 2e385026915c..98da7520a6aa 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -5646,9 +5646,8 @@ static struct bpf_prog *generate_filter(int which, int *err) return NULL; } } - /* We don't expect to fail. */ if (*err) { - pr_cont("FAIL to attach err=%d len=%d\n", + pr_cont("FAIL to prog_create err=%d len=%d\n", *err, fprog.len); return NULL; } @@ -5671,6 +5670,10 @@ static struct bpf_prog *generate_filter(int which, int *err) * checks. */ fp = bpf_prog_select_runtime(fp, err); + if (*err) { + pr_cont("FAIL to select_runtime err=%d\n", *err); + return NULL; + } break; } @@ -5856,8 +5859,8 @@ static __init int test_bpf(void) pass_cnt++; continue; } - - return err; + err_cnt++; + continue; } pr_cont("jited:%u ", fp->jited); diff --git a/net/core/filter.c b/net/core/filter.c index 4eb4ce0aeef4..6e0d17bc8382 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1005,11 +1005,9 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) */ goto out_err_free; - /* We are guaranteed to never error here with cBPF to eBPF - * transitions, since there's no issue with type compatibility - * checks on program arrays. - */ fp = bpf_prog_select_runtime(fp, &err); + if (err) + goto out_err_free; kfree(old_prog); return fp; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index a7f05f0130e8..1b4619008c4e 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -292,7 +292,13 @@ static struct ctl_table net_core_table[] = { .data = &bpf_jit_enable, .maxlen = sizeof(int), .mode = 0644, +#ifndef CONFIG_BPF_JIT_ALWAYS_ON .proc_handler = proc_dointvec +#else + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + .extra2 = &one, +#endif }, # ifdef CONFIG_HAVE_EBPF_JIT { diff --git a/net/socket.c b/net/socket.c index 05f13b24572c..bd3b33988ee0 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2548,6 +2548,15 @@ out_fs: core_initcall(sock_init); /* early initcall */ +static int __init jit_init(void) +{ +#ifdef CONFIG_BPF_JIT_ALWAYS_ON + bpf_jit_enable = 1; +#endif + return 0; +} +pure_initcall(jit_init); + #ifdef CONFIG_PROC_FS void socket_seq_show(struct seq_file *seq) { From fcabc6d008856356258f86e96bfcf3806acf9f38 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 29 Jan 2018 02:48:57 +0100 Subject: [PATCH 210/705] bpf: arsh is not supported in 32 bit alu thus reject it [ upstream commit 7891a87efc7116590eaba57acc3c422487802c6f ] The following snippet was throwing an 'unknown opcode cc' warning in BPF interpreter: 0: (18) r0 = 0x0 2: (7b) *(u64 *)(r10 -16) = r0 3: (cc) (u32) r0 s>>= (u32) r0 4: (95) exit Although a number of JITs do support BPF_ALU | BPF_ARSH | BPF_{K,X} generation, not all of them do and interpreter does neither. We can leave existing ones and implement it later in bpf-next for the remaining ones, but reject this properly in verifier for the time being. Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)") Reported-by: syzbot+93c4904c5c70348a6890@syzkaller.appspotmail.com Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 19c44cf59bb2..8e0bf890ffb7 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1843,6 +1843,11 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) return -EINVAL; } + if (opcode == BPF_ARSH && BPF_CLASS(insn->code) != BPF_ALU64) { + verbose("BPF_ARSH not supported for 32 bit ALU\n"); + return -EINVAL; + } + if ((opcode == BPF_LSH || opcode == BPF_RSH || opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) { int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32; From 5cb917aa1f1e03df9a4c29b363e3900d73508fa8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 29 Jan 2018 02:48:58 +0100 Subject: [PATCH 211/705] bpf: avoid false sharing of map refcount with max_entries [ upstream commit be95a845cc4402272994ce290e3ad928aff06cb9 ] In addition to commit b2157399cc98 ("bpf: prevent out-of-bounds speculation") also change the layout of struct bpf_map such that false sharing of fast-path members like max_entries is avoided when the maps reference counter is altered. Therefore enforce them to be placed into separate cachelines. pahole dump after change: struct bpf_map { const struct bpf_map_ops * ops; /* 0 8 */ struct bpf_map * inner_map_meta; /* 8 8 */ void * security; /* 16 8 */ enum bpf_map_type map_type; /* 24 4 */ u32 key_size; /* 28 4 */ u32 value_size; /* 32 4 */ u32 max_entries; /* 36 4 */ u32 map_flags; /* 40 4 */ u32 pages; /* 44 4 */ u32 id; /* 48 4 */ int numa_node; /* 52 4 */ bool unpriv_array; /* 56 1 */ /* XXX 7 bytes hole, try to pack */ /* --- cacheline 1 boundary (64 bytes) --- */ struct user_struct * user; /* 64 8 */ atomic_t refcnt; /* 72 4 */ atomic_t usercnt; /* 76 4 */ struct work_struct work; /* 80 32 */ char name[16]; /* 112 16 */ /* --- cacheline 2 boundary (128 bytes) --- */ /* size: 128, cachelines: 2, members: 17 */ /* sum members: 121, holes: 1, sum holes: 7 */ }; Now all entries in the first cacheline are read only throughout the life time of the map, set up once during map creation. Overall struct size and number of cachelines doesn't change from the reordering. struct bpf_map is usually first member and embedded in map structs in specific map implementations, so also avoid those members to sit at the end where it could potentially share the cacheline with first map values e.g. in the array since remote CPUs could trigger map updates just as well for those (easily dirtying members like max_entries intentionally as well) while having subsequent values in cache. Quoting from Google's Project Zero blog [1]: Additionally, at least on the Intel machine on which this was tested, bouncing modified cache lines between cores is slow, apparently because the MESI protocol is used for cache coherence [8]. Changing the reference counter of an eBPF array on one physical CPU core causes the cache line containing the reference counter to be bounced over to that CPU core, making reads of the reference counter on all other CPU cores slow until the changed reference counter has been written back to memory. Because the length and the reference counter of an eBPF array are stored in the same cache line, this also means that changing the reference counter on one physical CPU core causes reads of the eBPF array's length to be slow on other physical CPU cores (intentional false sharing). While this doesn't 'control' the out-of-bounds speculation through masking the index as in commit b2157399cc98, triggering a manipulation of the map's reference counter is really trivial, so lets not allow to easily affect max_entries from it. Splitting to separate cachelines also generally makes sense from a performance perspective anyway in that fast-path won't have a cache miss if the map gets pinned, reused in other progs, etc out of control path, thus also avoids unintentional false sharing. [1] https://googleprojectzero.blogspot.ch/2018/01/reading-privileged-memory-with-side.html Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- include/linux/bpf.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 75ffd3b2149e..7995940d4187 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -36,7 +36,10 @@ struct bpf_map_ops { }; struct bpf_map { - atomic_t refcnt; + /* 1st cacheline with read-mostly members of which some + * are also accessed in fast-path (e.g. ops, max_entries). + */ + const struct bpf_map_ops *ops ____cacheline_aligned; enum bpf_map_type map_type; u32 key_size; u32 value_size; @@ -44,10 +47,15 @@ struct bpf_map { u32 map_flags; u32 pages; bool unpriv_array; - struct user_struct *user; - const struct bpf_map_ops *ops; - struct work_struct work; + /* 7 bytes hole */ + + /* 2nd cacheline with misc members to avoid false sharing + * particularly with refcounting. + */ + struct user_struct *user ____cacheline_aligned; + atomic_t refcnt; atomic_t usercnt; + struct work_struct work; }; struct bpf_map_type_list { From 4606077802f2c6ef7aff5185d9f7d99a50784ffd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 29 Jan 2018 02:48:59 +0100 Subject: [PATCH 212/705] bpf: fix divides by zero [ upstream commit c366287ebd698ef5e3de300d90cd62ee9ee7373e ] Divides by zero are not nice, lets avoid them if possible. Also do_div() seems not needed when dealing with 32bit operands, but this seems a minor detail. Fixes: bd4cf0ed331a ("net: filter: rework/optimize internal BPF interpreter's instruction set") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 64c4b13952f0..879ca844ba1d 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -642,7 +642,7 @@ select_insn: DST = tmp; CONT; ALU_MOD_X: - if (unlikely(SRC == 0)) + if (unlikely((u32)SRC == 0)) return 0; tmp = (u32) DST; DST = do_div(tmp, (u32) SRC); @@ -661,7 +661,7 @@ select_insn: DST = div64_u64(DST, SRC); CONT; ALU_DIV_X: - if (unlikely(SRC == 0)) + if (unlikely((u32)SRC == 0)) return 0; tmp = (u32) DST; do_div(tmp, (u32) SRC); From 265d7657c9baf09d57eb386d0374e912e9649626 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 29 Jan 2018 02:49:00 +0100 Subject: [PATCH 213/705] bpf: fix 32-bit divide by zero [ upstream commit 68fda450a7df51cff9e5a4d4a4d9d0d5f2589153 ] due to some JITs doing if (src_reg == 0) check in 64-bit mode for div/mod operations mask upper 32-bits of src register before doing the check Fixes: 622582786c9e ("net: filter: x86: internal BPF JIT") Fixes: 7a12b5031c6b ("sparc64: Add eBPF JIT.") Reported-by: syzbot+48340bb518e88849e2e3@syzkaller.appspotmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 18 ++++++++++++++++++ net/core/filter.c | 4 ++++ 2 files changed, 22 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8e0bf890ffb7..5c56ff9eeed2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3391,6 +3391,24 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) for (i = 0; i < insn_cnt; i++, insn++) { + if (insn->code == (BPF_ALU | BPF_MOD | BPF_X) || + insn->code == (BPF_ALU | BPF_DIV | BPF_X)) { + /* due to JIT bugs clear upper 32-bits of src register + * before div/mod operation + */ + insn_buf[0] = BPF_MOV32_REG(insn->src_reg, insn->src_reg); + insn_buf[1] = *insn; + cnt = 2; + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + if (insn->code != (BPF_JMP | BPF_CALL)) continue; diff --git a/net/core/filter.c b/net/core/filter.c index 6e0d17bc8382..e8c89d2d2bc0 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -441,6 +441,10 @@ do_pass: convert_bpf_extensions(fp, &insn)) break; + if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) || + fp->code == (BPF_ALU | BPF_MOD | BPF_X)) + *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X); + *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k); break; From f531fbb06a56361d4a9807bbb16db4facc8537d3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 29 Jan 2018 02:49:01 +0100 Subject: [PATCH 214/705] bpf: reject stores into ctx via st and xadd [ upstream commit f37a8cb84cce18762e8f86a70bd6a49a66ab964c ] Alexei found that verifier does not reject stores into context via BPF_ST instead of BPF_STX. And while looking at it, we also should not allow XADD variant of BPF_STX. The context rewriter is only assuming either BPF_LDX_MEM- or BPF_STX_MEM-type operations, thus reject anything other than that so that assumptions in the rewriter properly hold. Add test cases as well for BPF selftests. Fixes: d691f9e8d440 ("bpf: allow programs to write to certain skb fields") Reported-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5c56ff9eeed2..076e4a0ff95e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -702,6 +702,13 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno) return __is_pointer_value(env->allow_ptr_leaks, &env->cur_state.regs[regno]); } +static bool is_ctx_reg(struct bpf_verifier_env *env, int regno) +{ + const struct bpf_reg_state *reg = &env->cur_state.regs[regno]; + + return reg->type == PTR_TO_CTX; +} + static int check_ptr_alignment(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int off, int size) { @@ -896,6 +903,12 @@ static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn) return -EACCES; } + if (is_ctx_reg(env, insn->dst_reg)) { + verbose("BPF_XADD stores into R%d context is not allowed\n", + insn->dst_reg); + return -EACCES; + } + /* check whether atomic_add can read the memory */ err = check_mem_access(env, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_READ, -1); @@ -3012,6 +3025,12 @@ static int do_check(struct bpf_verifier_env *env) if (err) return err; + if (is_ctx_reg(env, insn->dst_reg)) { + verbose("BPF_ST stores into R%d context is not allowed\n", + insn->dst_reg); + return -EACCES; + } + /* check that memory (dst_reg + off) is writeable */ err = check_mem_access(env, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, From f12d0602633decf073796f3aaa59eec7ff2da9e2 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 22 Jan 2018 20:11:06 +0000 Subject: [PATCH 215/705] nfsd: auth: Fix gid sorting when rootsquash enabled commit 1995266727fa8143897e89b55f5d3c79aa828420 upstream. Commit bdcf0a423ea1 ("kernel: make groups_sort calling a responsibility group_info allocators") appears to break nfsd rootsquash in a pretty major way. It adds a call to groups_sort() inside the loop that copies/squashes gids, which means the valid gids are sorted along with the following garbage. The net result is that the highest numbered valid gids are replaced with any lower-valued garbage gids, possibly including 0. We should sort only once, after filling in all the gids. Fixes: bdcf0a423ea1 ("kernel: make groups_sort calling a responsibility ...") Signed-off-by: Ben Hutchings Acked-by: J. Bruce Fields Signed-off-by: Linus Torvalds Cc: Wolfgang Walter Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/auth.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 75f942ae5176..81c018e5c31e 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -59,10 +59,10 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) gi->gid[i] = exp->ex_anon_gid; else gi->gid[i] = rqgi->gid[i]; - - /* Each thread allocates its own gi, no race */ - groups_sort(gi); } + + /* Each thread allocates its own gi, no race */ + groups_sort(gi); } else { gi = get_group_info(rqgi); } From 6c6f924f9c6294944ee6efb1bbd8cdb853582e50 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 31 Jan 2018 12:55:57 +0100 Subject: [PATCH 216/705] Linux 4.9.79 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8a6f158a1176..4a7e6dff1c2e 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 78 +SUBLEVEL = 79 EXTRAVERSION = NAME = Roaring Lionus From 56bc086358cac1a2949783646eabd57447b9d672 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 5 Jan 2018 16:26:00 -0800 Subject: [PATCH 217/705] loop: fix concurrent lo_open/lo_release MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ae6650163c66a7eff1acd6eb8b0f752dcfa8eba5 upstream. 范龙飞 reports that KASAN can report a use-after-free in __lock_acquire. The reason is due to insufficient serialization in lo_release(), which will continue to use the loop device even after it has decremented the lo_refcnt to zero. In the meantime, another process can come in, open the loop device again as it is being shut down. Confusion ensues. Reported-by: 范龙飞 Signed-off-by: Linus Torvalds Signed-off-by: Jens Axboe Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/block/loop.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 24d6cefceb32..402254d26247 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1558,9 +1558,8 @@ out: return err; } -static void lo_release(struct gendisk *disk, fmode_t mode) +static void __lo_release(struct loop_device *lo) { - struct loop_device *lo = disk->private_data; int err; if (atomic_dec_return(&lo->lo_refcnt)) @@ -1586,6 +1585,13 @@ static void lo_release(struct gendisk *disk, fmode_t mode) mutex_unlock(&lo->lo_ctl_mutex); } +static void lo_release(struct gendisk *disk, fmode_t mode) +{ + mutex_lock(&loop_index_mutex); + __lo_release(disk->private_data); + mutex_unlock(&loop_index_mutex); +} + static const struct block_device_operations lo_fops = { .owner = THIS_MODULE, .open = lo_open, From 20e6f5bdf542cb2510bd54e69eed6fe395f6c026 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 28 May 2017 10:53:10 +0300 Subject: [PATCH 218/705] net/mlx5: Define interface bits for fencing UMR wqe commit 1410a90ae449061b7e1ae19d275148f36948801b upstream. HW can implement UMR wqe re-transmission in various ways. Thus, add HCA cap to distinguish the needed fence for UMR to make sure that the wqe wouldn't fail on mkey checks. Signed-off-by: Max Gurtovoy Acked-by: Leon Romanovsky Reviewed-by: Christoph Hellwig Signed-off-by: Doug Ledford Cc: Marta Rybczynska Signed-off-by: Greg Kroah-Hartman --- include/linux/mlx5/mlx5_ifc.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 25ed105bbcfb..20ee90c47cd5 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -737,6 +737,12 @@ enum { MLX5_CAP_PORT_TYPE_ETH = 0x1, }; +enum { + MLX5_CAP_UMR_FENCE_STRONG = 0x0, + MLX5_CAP_UMR_FENCE_SMALL = 0x1, + MLX5_CAP_UMR_FENCE_NONE = 0x2, +}; + struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_0[0x80]; @@ -838,7 +844,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 striding_rq[0x1]; u8 reserved_at_201[0x2]; u8 ipoib_basic_offloads[0x1]; - u8 reserved_at_205[0xa]; + u8 reserved_at_205[0x5]; + u8 umr_fence[0x2]; + u8 reserved_at_20c[0x3]; u8 drain_sigerr[0x1]; u8 cmdif_checksum[0x2]; u8 sigerr_cqe[0x1]; From 2a7076e71575b741f61f33f93be5ea284242a603 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 28 May 2017 10:53:11 +0300 Subject: [PATCH 219/705] RDMA/mlx5: set UMR wqe fence according to HCA cap commit 6e8484c5cf07c7ee632587e98c1a12d319dacb7c upstream. Cache the needed umr_fence and set the wqe ctrl segmennt accordingly. Signed-off-by: Max Gurtovoy Acked-by: Leon Romanovsky Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford Cc: Marta Rybczynska Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/main.c | 14 +++++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 +- drivers/infiniband/hw/mlx5/qp.c | 59 +++++++++++----------------- 3 files changed, 39 insertions(+), 37 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a2120ff0ef4c..5e29fbd3a5a0 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2575,6 +2575,18 @@ error_0: return ret; } +static u8 mlx5_get_umr_fence(u8 umr_fence_cap) +{ + switch (umr_fence_cap) { + case MLX5_CAP_UMR_FENCE_NONE: + return MLX5_FENCE_MODE_NONE; + case MLX5_CAP_UMR_FENCE_SMALL: + return MLX5_FENCE_MODE_INITIATOR_SMALL; + default: + return MLX5_FENCE_MODE_STRONG_ORDERING; + } +} + static int create_dev_resources(struct mlx5_ib_resources *devr) { struct ib_srq_init_attr attr; @@ -3101,6 +3113,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) mlx5_ib_internal_fill_odp_caps(dev); + dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence)); + if (MLX5_CAP_GEN(mdev, imaicl)) { dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw; dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 86e1e08125ff..d5cc954e8ac2 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -345,7 +345,7 @@ struct mlx5_ib_qp { struct mlx5_ib_wq rq; u8 sq_signal_bits; - u8 fm_cache; + u8 next_fence; struct mlx5_ib_wq sq; /* serialize qp state modifications @@ -643,6 +643,7 @@ struct mlx5_ib_dev { struct list_head qp_list; /* Array with num_ports elements */ struct mlx5_ib_port *port; + u8 umr_fence; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 2665414b4875..fdd156101a72 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3755,24 +3755,6 @@ static void mlx5_bf_copy(u64 __iomem *dst, u64 *src, } } -static u8 get_fence(u8 fence, struct ib_send_wr *wr) -{ - if (unlikely(wr->opcode == IB_WR_LOCAL_INV && - wr->send_flags & IB_SEND_FENCE)) - return MLX5_FENCE_MODE_STRONG_ORDERING; - - if (unlikely(fence)) { - if (wr->send_flags & IB_SEND_FENCE) - return MLX5_FENCE_MODE_SMALL_AND_FENCE; - else - return fence; - } else if (unlikely(wr->send_flags & IB_SEND_FENCE)) { - return MLX5_FENCE_MODE_FENCE; - } - - return 0; -} - static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, struct mlx5_wqe_ctrl_seg **ctrl, struct ib_send_wr *wr, unsigned *idx, @@ -3801,8 +3783,7 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, static void finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl, u8 size, unsigned idx, u64 wr_id, - int nreq, u8 fence, u8 next_fence, - u32 mlx5_opcode) + int nreq, u8 fence, u32 mlx5_opcode) { u8 opmod = 0; @@ -3810,7 +3791,6 @@ static void finish_wqe(struct mlx5_ib_qp *qp, mlx5_opcode | ((u32)opmod << 24)); ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8)); ctrl->fm_ce_se |= fence; - qp->fm_cache = next_fence; if (unlikely(qp->wq_sig)) ctrl->signature = wq_sig(ctrl); @@ -3870,7 +3850,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - fence = qp->fm_cache; num_sge = wr->num_sge; if (unlikely(num_sge > qp->sq.max_gs)) { mlx5_ib_warn(dev, "\n"); @@ -3887,6 +3866,19 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } + if (wr->opcode == IB_WR_LOCAL_INV || + wr->opcode == IB_WR_REG_MR) { + fence = dev->umr_fence; + next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; + } else if (wr->send_flags & IB_SEND_FENCE) { + if (qp->next_fence) + fence = MLX5_FENCE_MODE_SMALL_AND_FENCE; + else + fence = MLX5_FENCE_MODE_FENCE; + } else { + fence = qp->next_fence; + } + switch (ibqp->qp_type) { case IB_QPT_XRC_INI: xrc = seg; @@ -3913,7 +3905,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; case IB_WR_LOCAL_INV: - next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; qp->sq.wr_data[idx] = IB_WR_LOCAL_INV; ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey); set_linv_wr(qp, &seg, &size); @@ -3921,7 +3912,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case IB_WR_REG_MR: - next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; qp->sq.wr_data[idx] = IB_WR_REG_MR; ctrl->imm = cpu_to_be32(reg_wr(wr)->key); err = set_reg_wr(qp, reg_wr(wr), &seg, &size); @@ -3944,9 +3934,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, - nreq, get_fence(fence, wr), - next_fence, MLX5_OPCODE_UMR); + finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, + fence, MLX5_OPCODE_UMR); /* * SET_PSV WQEs are not signaled and solicited * on error @@ -3971,9 +3960,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, - nreq, get_fence(fence, wr), - next_fence, MLX5_OPCODE_SET_PSV); + finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, + fence, MLX5_OPCODE_SET_PSV); err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq); if (err) { @@ -3983,7 +3971,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->wire, mr->sig->psv_wire.psv_idx, &seg, &size); @@ -3993,9 +3980,9 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, - nreq, get_fence(fence, wr), - next_fence, MLX5_OPCODE_SET_PSV); + finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, + fence, MLX5_OPCODE_SET_PSV); + qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; num_sge = 0; goto skip_psv; @@ -4100,8 +4087,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, - get_fence(fence, wr), next_fence, + qp->next_fence = next_fence; + finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, fence, mlx5_ib_opcode[wr->opcode]); skip_psv: if (0) From efe3f94f83d2b03abf2fc071ec59bc668761bac5 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Thu, 21 Dec 2017 11:11:31 +1030 Subject: [PATCH 220/705] tools/gpio: Fix build error with musl libc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1696784eb7b52b13b62d160c028ef2c2c981d4f2 upstream. The GPIO tools build fails when using a buildroot toolchain that uses musl as it's C library: arm-broomstick-linux-musleabi-gcc -Wp,-MD,./.gpio-event-mon.o.d \ -Wp,-MT,gpio-event-mon.o -O2 -Wall -g -D_GNU_SOURCE \ -Iinclude -D"BUILD_STR(s)=#s" -c -o gpio-event-mon.o gpio-event-mon.c gpio-event-mon.c:30:6: error: unknown type name ‘u_int32_t’; did you mean ‘uint32_t’? u_int32_t handleflags, ^~~~~~~~~ uint32_t The glibc headers installed on my laptop include sys/types.h in unistd.h, but it appears that musl does not. Fixes: 97f69747d8b1 ("tools/gpio: add the gpio-event-mon tool") Signed-off-by: Joel Stanley Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- tools/gpio/gpio-event-mon.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c index 1c14c2595158..4b36323ea64b 100644 --- a/tools/gpio/gpio-event-mon.c +++ b/tools/gpio/gpio-event-mon.c @@ -23,6 +23,7 @@ #include #include #include +#include #include int monitor_device(const char *device_name, From 241c04f75e90b6e0eaf099d53c68a77ed5767835 Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Fri, 12 Jan 2018 13:16:08 +0100 Subject: [PATCH 221/705] gpio: stmpe: i2c transfer are forbiden in atomic context commit b888fb6f2a278442933e3bfab70262e9a5365fb3 upstream. Move the workaround from stmpe_gpio_irq_unmask() which is executed in atomic context to stmpe_gpio_irq_sync_unlock() which is not. It fixes the following issue: [ 1.500000] BUG: scheduling while atomic: swapper/1/0x00000002 [ 1.500000] CPU: 0 PID: 1 Comm: swapper Not tainted 4.15.0-rc2-00020-gbd4301f-dirty #28 [ 1.520000] Hardware name: STM32 (Device Tree Support) [ 1.520000] [<0000bfc9>] (unwind_backtrace) from [<0000b347>] (show_stack+0xb/0xc) [ 1.530000] [<0000b347>] (show_stack) from [<0001fc49>] (__schedule_bug+0x39/0x58) [ 1.530000] [<0001fc49>] (__schedule_bug) from [<00168211>] (__schedule+0x23/0x2b2) [ 1.550000] [<00168211>] (__schedule) from [<001684f7>] (schedule+0x57/0x64) [ 1.550000] [<001684f7>] (schedule) from [<0016a513>] (schedule_timeout+0x137/0x164) [ 1.550000] [<0016a513>] (schedule_timeout) from [<00168b91>] (wait_for_common+0x8d/0xfc) [ 1.570000] [<00168b91>] (wait_for_common) from [<00139753>] (stm32f4_i2c_xfer+0xe9/0xfe) [ 1.580000] [<00139753>] (stm32f4_i2c_xfer) from [<00138545>] (__i2c_transfer+0x111/0x148) [ 1.590000] [<00138545>] (__i2c_transfer) from [<001385cf>] (i2c_transfer+0x53/0x70) [ 1.590000] [<001385cf>] (i2c_transfer) from [<001388a5>] (i2c_smbus_xfer+0x12f/0x36e) [ 1.600000] [<001388a5>] (i2c_smbus_xfer) from [<00138b49>] (i2c_smbus_read_byte_data+0x1f/0x2a) [ 1.610000] [<00138b49>] (i2c_smbus_read_byte_data) from [<00124fdd>] (__stmpe_reg_read+0xd/0x24) [ 1.620000] [<00124fdd>] (__stmpe_reg_read) from [<001252b3>] (stmpe_reg_read+0x19/0x24) [ 1.630000] [<001252b3>] (stmpe_reg_read) from [<0002c4d1>] (unmask_irq+0x17/0x22) [ 1.640000] [<0002c4d1>] (unmask_irq) from [<0002c57f>] (irq_startup+0x6f/0x78) [ 1.650000] [<0002c57f>] (irq_startup) from [<0002b7a1>] (__setup_irq+0x319/0x47c) [ 1.650000] [<0002b7a1>] (__setup_irq) from [<0002bad3>] (request_threaded_irq+0x6b/0xe8) [ 1.660000] [<0002bad3>] (request_threaded_irq) from [<0002d0b9>] (devm_request_threaded_irq+0x3b/0x6a) [ 1.670000] [<0002d0b9>] (devm_request_threaded_irq) from [<001446e7>] (mmc_gpiod_request_cd_irq+0x49/0x8a) [ 1.680000] [<001446e7>] (mmc_gpiod_request_cd_irq) from [<0013d45d>] (mmc_start_host+0x49/0x60) [ 1.690000] [<0013d45d>] (mmc_start_host) from [<0013e40b>] (mmc_add_host+0x3b/0x54) [ 1.700000] [<0013e40b>] (mmc_add_host) from [<00148119>] (mmci_probe+0x4d1/0x60c) [ 1.710000] [<00148119>] (mmci_probe) from [<000f903b>] (amba_probe+0x7b/0xbe) [ 1.720000] [<000f903b>] (amba_probe) from [<001170e5>] (driver_probe_device+0x169/0x1f8) [ 1.730000] [<001170e5>] (driver_probe_device) from [<001171b7>] (__driver_attach+0x43/0x5c) [ 1.740000] [<001171b7>] (__driver_attach) from [<0011618d>] (bus_for_each_dev+0x3d/0x46) [ 1.740000] [<0011618d>] (bus_for_each_dev) from [<001165cd>] (bus_add_driver+0xcd/0x124) [ 1.740000] [<001165cd>] (bus_add_driver) from [<00117713>] (driver_register+0x4d/0x7a) [ 1.760000] [<00117713>] (driver_register) from [<001fc765>] (do_one_initcall+0xbd/0xe8) [ 1.770000] [<001fc765>] (do_one_initcall) from [<001fc88b>] (kernel_init_freeable+0xfb/0x134) [ 1.780000] [<001fc88b>] (kernel_init_freeable) from [<00167ee3>] (kernel_init+0x7/0x9c) [ 1.790000] [<00167ee3>] (kernel_init) from [<00009b65>] (ret_from_fork+0x11/0x2c) Signed-off-by: Alexandre TORGUE Signed-off-by: Patrice Chotard Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-stmpe.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpio/gpio-stmpe.c b/drivers/gpio/gpio-stmpe.c index adba614b3965..abb5a2752511 100644 --- a/drivers/gpio/gpio-stmpe.c +++ b/drivers/gpio/gpio-stmpe.c @@ -190,6 +190,16 @@ static void stmpe_gpio_irq_sync_unlock(struct irq_data *d) }; int i, j; + /* + * STMPE1600: to be able to get IRQ from pins, + * a read must be done on GPMR register, or a write in + * GPSR or GPCR registers + */ + if (stmpe->partnum == STMPE1600) { + stmpe_reg_read(stmpe, stmpe->regs[STMPE_IDX_GPMR_LSB]); + stmpe_reg_read(stmpe, stmpe->regs[STMPE_IDX_GPMR_CSB]); + } + for (i = 0; i < CACHE_NR_REGS; i++) { /* STMPE801 and STMPE1600 don't have RE and FE registers */ if ((stmpe->partnum == STMPE801 || @@ -227,21 +237,11 @@ static void stmpe_gpio_irq_unmask(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct stmpe_gpio *stmpe_gpio = gpiochip_get_data(gc); - struct stmpe *stmpe = stmpe_gpio->stmpe; int offset = d->hwirq; int regoffset = offset / 8; int mask = BIT(offset % 8); stmpe_gpio->regs[REG_IE][regoffset] |= mask; - - /* - * STMPE1600 workaround: to be able to get IRQ from pins, - * a read must be done on GPMR register, or a write in - * GPSR or GPCR registers - */ - if (stmpe->partnum == STMPE1600) - stmpe_reg_read(stmpe, - stmpe->regs[STMPE_IDX_GPMR_LSB + regoffset]); } static void stmpe_dbg_show_one(struct seq_file *s, From cc1fa4a7b653a4f0a4c95a26ce842d699e8e4b1a Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 22 Jan 2018 13:19:28 +0100 Subject: [PATCH 222/705] gpio: Fix kernel stack leak to userspace commit 24bd3efc9d1efb5f756a7c6f807a36ddb6adc671 upstream. The GPIO event descriptor was leaking kernel stack to userspace because we don't zero the variable before use. Ooops. Fix this. Reported-by: Arnd Bergmann Reviewed-by: Bartosz Golaszewski Reviewed-by: Arnd Bergmann Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 063d176baa24..f3c3680963b9 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -705,6 +705,9 @@ static irqreturn_t lineevent_irq_thread(int irq, void *p) struct gpioevent_data ge; int ret, level; + /* Do not leak kernel stack to userspace */ + memset(&ge, 0, sizeof(ge)); + ge.timestamp = ktime_get_real_ns(); level = gpiod_get_value_cansleep(le->desc); From f1803207b5eab7fc9540f633b3bb73a75ad82494 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sun, 26 Nov 2017 00:16:46 +0100 Subject: [PATCH 223/705] crypto: ecdh - fix typo in KPP dependency of CRYPTO_ECDH commit b5b9007730ce1d90deaf25d7f678511550744bdc upstream. This fixes a typo in the CRYPTO_KPP dependency of CRYPTO_ECDH. Fixes: 3c4b23901a0c ("crypto: ecdh - Add ECDH software support") Signed-off-by: Hauke Mehrtens Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 84d71482bf08..ab0d93ab5695 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -120,7 +120,7 @@ config CRYPTO_DH config CRYPTO_ECDH tristate "ECDH algorithm" - select CRYTPO_KPP + select CRYPTO_KPP help Generic implementation of the ECDH algorithm From 95259cb008ba7ec88d3869db33377a111a6d9dee Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Thu, 18 Jan 2018 20:41:09 +0100 Subject: [PATCH 224/705] crypto: aesni - handle zero length dst buffer commit 9c674e1e2f9e24fa4392167efe343749008338e0 upstream. GCM can be invoked with a zero destination buffer. This is possible if the AAD and the ciphertext have zero lengths and only the tag exists in the source buffer (i.e. a source buffer cannot be zero). In this case, the GCM cipher only performs the authentication and no decryption operation. When the destination buffer has zero length, it is possible that no page is mapped to the SG pointing to the destination. In this case, sg_page(req->dst) is an invalid access. Therefore, page accesses should only be allowed if the req->dst->length is non-zero which is the indicator that a page must exist. This fixes a crash that can be triggered by user space via AF_ALG. Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/aesni-intel_glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index aa8b0672f87a..d9ae404f08c9 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -906,7 +906,7 @@ static int helper_rfc4106_encrypt(struct aead_request *req) if (sg_is_last(req->src) && req->src->offset + req->src->length <= PAGE_SIZE && - sg_is_last(req->dst) && ++ sg_is_last(req->dst) && req->dst->length && req->dst->offset + req->dst->length <= PAGE_SIZE) { one_entry_in_sg = 1; scatterwalk_start(&src_sg_walk, req->src); From 1ce8e52f6f368f9d62b6320357bf7c26a1480ecb Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Jan 2018 12:04:33 +0000 Subject: [PATCH 225/705] crypto: sha3-generic - fixes for alignment and big endian operation commit c013cee99d5a18aec8c71fee8f5f41369cd12595 upstream. Ensure that the input is byte swabbed before injecting it into the SHA3 transform. Use the get_unaligned() accessor for this so that we don't perform unaligned access inadvertently on architectures that do not support that. Fixes: 53964b9ee63b7075 ("crypto: sha3 - Add SHA-3 hash algorithm") Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/sha3_generic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crypto/sha3_generic.c b/crypto/sha3_generic.c index 7e8ed96236ce..a68be626017c 100644 --- a/crypto/sha3_generic.c +++ b/crypto/sha3_generic.c @@ -18,6 +18,7 @@ #include #include #include +#include #define KECCAK_ROUNDS 24 @@ -149,7 +150,7 @@ static int sha3_update(struct shash_desc *desc, const u8 *data, unsigned int i; for (i = 0; i < sctx->rsizw; i++) - sctx->st[i] ^= ((u64 *) src)[i]; + sctx->st[i] ^= get_unaligned_le64(src + 8 * i); keccakf(sctx->st); done += sctx->rsiz; @@ -174,7 +175,7 @@ static int sha3_final(struct shash_desc *desc, u8 *out) sctx->buf[sctx->rsiz - 1] |= 0x80; for (i = 0; i < sctx->rsizw; i++) - sctx->st[i] ^= ((u64 *) sctx->buf)[i]; + sctx->st[i] ^= get_unaligned_le64(sctx->buf + 8 * i); keccakf(sctx->st); From b7edc45f3adace20d74788dd4d5a085c134ba6be Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Tue, 2 Jan 2018 08:55:25 +0100 Subject: [PATCH 226/705] crypto: af_alg - whitelist mask and type commit bb30b8848c85e18ca7e371d0a869e94b3e383bdf upstream. The user space interface allows specifying the type and mask field used to allocate the cipher. Only a subset of the possible flags are intended for user space. Therefore, white-list the allowed flags. In case the user space caller uses at least one non-allowed flag, EINVAL is returned. Reported-by: syzbot Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/af_alg.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index f5e18c2a4852..ca50eeb13097 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -149,7 +149,7 @@ EXPORT_SYMBOL_GPL(af_alg_release_parent); static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { - const u32 forbidden = CRYPTO_ALG_INTERNAL; + const u32 allowed = CRYPTO_ALG_KERN_DRIVER_ONLY; struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); struct sockaddr_alg *sa = (void *)uaddr; @@ -157,6 +157,10 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) void *private; int err; + /* If caller uses non-allowed flag, return error. */ + if ((sa->salg_feat & ~allowed) || (sa->salg_mask & ~allowed)) + return -EINVAL; + if (sock->state == SS_CONNECTED) return -EINVAL; @@ -175,9 +179,7 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (IS_ERR(type)) return PTR_ERR(type); - private = type->bind(sa->salg_name, - sa->salg_feat & ~forbidden, - sa->salg_mask & ~forbidden); + private = type->bind(sa->salg_name, sa->salg_feat, sa->salg_mask); if (IS_ERR(private)) { module_put(type->owner); return PTR_ERR(private); From ddba3c67a5b81955e34563d225a3f117354a247e Mon Sep 17 00:00:00 2001 From: Aaron Armstrong Skomra Date: Thu, 7 Dec 2017 12:31:56 -0800 Subject: [PATCH 227/705] HID: wacom: EKR: ensure devres groups at higher indexes are released commit 791ae273731fa85d3332e45064dab177ae663e80 upstream. Background: ExpressKey Remotes communicate their events via usb dongle. Each dongle can hold up to 5 pairings at one time and one EKR (identified by its serial number) can unfortunately be paired with its dongle more than once. The pairing takes place in a round-robin fashion. Input devices are only created once per EKR, when a new serial number is seen in the list of pairings. However, if a device is created for a "higher" paring index and subsequently a second pairing occurs at a lower pairing index, unpairing the remote with that serial number from any pairing index will currently cause a driver crash. This occurs infrequently, as two remotes are necessary to trigger this bug and most users have only one remote. As an illustration, to trigger the bug you need to have two remotes, and pair them in this order: 1. slot 0 -> remote 1 (input device created for remote 1) 2. slot 1 -> remote 1 (duplicate pairing - no device created) 3. slot 2 -> remote 1 (duplicate pairing - no device created) 4. slot 3 -> remote 1 (duplicate pairing - no device created) 5. slot 4 -> remote 2 (input device created for remote 2) 6. slot 0 -> remote 2 (1 destroyed and recreated at slot 1) 7. slot 1 -> remote 2 (1 destroyed and recreated at slot 2) 8. slot 2 -> remote 2 (1 destroyed and recreated at slot 3) 9. slot 3 -> remote 2 (1 destroyed and not recreated) 10. slot 4 -> remote 2 (2 was already in this slot so no changes) 11. slot 0 -> remote 1 (The current code sees remote 2 was paired over in one of the dongle slots it occupied and attempts to remove all information about remote 2 [1]. It calls wacom_remote_destroy_one for remote 2, but the destroy function assumes the lowest index is where the remote's input device was created. The code "cleans up" the other remote 2 pairings including the one which the input device was based on, assuming they were were just duplicate pairings. However, the cleanup doesn't call the devres release function for the input device that was created in slot 4). This issue is fixed by this commit. [1] Remote 2 should subsequently be re-created on the next packet from the EKR at the lowest numbered slot that it occupies (here slot 1). Fixes: f9036bd43602 ("HID: wacom: EKR: use devres groups to manage resources") Signed-off-by: Aaron Armstrong Skomra Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/wacom_sys.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index d72dfb2bbdb8..7a4d39ce51d9 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -2192,23 +2192,23 @@ static void wacom_remote_destroy_one(struct wacom *wacom, unsigned int index) int i; unsigned long flags; - spin_lock_irqsave(&remote->remote_lock, flags); - remote->remotes[index].registered = false; - spin_unlock_irqrestore(&remote->remote_lock, flags); - - if (remote->remotes[index].battery.battery) - devres_release_group(&wacom->hdev->dev, - &remote->remotes[index].battery.bat_desc); - - if (remote->remotes[index].group.name) - devres_release_group(&wacom->hdev->dev, - &remote->remotes[index]); - for (i = 0; i < WACOM_MAX_REMOTES; i++) { if (remote->remotes[i].serial == serial) { + + spin_lock_irqsave(&remote->remote_lock, flags); + remote->remotes[i].registered = false; + spin_unlock_irqrestore(&remote->remote_lock, flags); + + if (remote->remotes[i].battery.battery) + devres_release_group(&wacom->hdev->dev, + &remote->remotes[i].battery.bat_desc); + + if (remote->remotes[i].group.name) + devres_release_group(&wacom->hdev->dev, + &remote->remotes[i]); + remote->remotes[i].serial = 0; remote->remotes[i].group.name = NULL; - remote->remotes[i].registered = false; remote->remotes[i].battery.battery = NULL; wacom->led.groups[i].select = WACOM_STATUS_UNKNOWN; } From 517931760e696c779e4d24424633c334a6447450 Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Mon, 20 Nov 2017 12:58:27 -0800 Subject: [PATCH 228/705] power: reset: zx-reboot: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE commit 348c7cf5fcbcb68838255759d4cb45d039af36d2 upstream. This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in drivers/power/reset/zx-reboot.o see include/linux/module.h for more information This adds the license as "GPL v2", which matches the header of the file. MODULE_DESCRIPTION and MODULE_AUTHOR are also added. Signed-off-by: Jesse Chan Signed-off-by: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- drivers/power/reset/zx-reboot.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/power/reset/zx-reboot.c b/drivers/power/reset/zx-reboot.c index b0b1eb3a78c2..76153ac0706c 100644 --- a/drivers/power/reset/zx-reboot.c +++ b/drivers/power/reset/zx-reboot.c @@ -81,3 +81,7 @@ static struct platform_driver zx_reboot_driver = { }, }; module_platform_driver(zx_reboot_driver); + +MODULE_DESCRIPTION("ZTE SoCs reset driver"); +MODULE_AUTHOR("Jun Nie "); +MODULE_LICENSE("GPL v2"); From cb1a0b51d1031ce335d16d7528cace677f105106 Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Mon, 20 Nov 2017 12:54:52 -0800 Subject: [PATCH 229/705] gpio: iop: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE commit 97b03136e1b637d7a9d2274c099e44ecf23f1103 upstream. This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in drivers/gpio/gpio-iop.o see include/linux/module.h for more information This adds the license as "GPL", which matches the header of the file. MODULE_DESCRIPTION and MODULE_AUTHOR are also added. Signed-off-by: Jesse Chan Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-iop.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpio/gpio-iop.c b/drivers/gpio/gpio-iop.c index 98c7ff2a76e7..8d62db447ec1 100644 --- a/drivers/gpio/gpio-iop.c +++ b/drivers/gpio/gpio-iop.c @@ -58,3 +58,7 @@ static int __init iop3xx_gpio_init(void) return platform_driver_register(&iop3xx_gpio_driver); } arch_initcall(iop3xx_gpio_init); + +MODULE_DESCRIPTION("GPIO handling for Intel IOP3xx processors"); +MODULE_AUTHOR("Lennert Buytenhek "); +MODULE_LICENSE("GPL"); From e29997d55268c9d4d4fc2bb7833f83dc5b220cd5 Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Mon, 20 Nov 2017 12:54:26 -0800 Subject: [PATCH 230/705] gpio: ath79: add missing MODULE_DESCRIPTION/LICENSE commit 539340f37e6d6ed4cd93e8e18c9b2e4eafd4b842 upstream. This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in drivers/gpio/gpio-ath79.o see include/linux/module.h for more information This adds the license as "GPL v2", which matches the header of the file. MODULE_DESCRIPTION is also added. Signed-off-by: Jesse Chan Acked-by: Alban Bedel Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-ath79.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpio/gpio-ath79.c b/drivers/gpio/gpio-ath79.c index dc37dbe4b46d..a83e97e15c14 100644 --- a/drivers/gpio/gpio-ath79.c +++ b/drivers/gpio/gpio-ath79.c @@ -323,3 +323,6 @@ static struct platform_driver ath79_gpio_driver = { }; module_platform_driver(ath79_gpio_driver); + +MODULE_DESCRIPTION("Atheros AR71XX/AR724X/AR913X GPIO API support"); +MODULE_LICENSE("GPL v2"); From 3a98d0753928cb8260930f2b651ecef47a0c3037 Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Mon, 20 Nov 2017 12:57:13 -0800 Subject: [PATCH 231/705] mtd: nand: denali_pci: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE commit d822401d1c6898a4a4ee03977b78b8cec402e88a upstream. This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in drivers/mtd/nand/denali_pci.o see include/linux/module.h for more information This adds the license as "GPL v2", which matches the header of the file. MODULE_DESCRIPTION and MODULE_AUTHOR are also added. Signed-off-by: Jesse Chan Acked-by: Masahiro Yamada Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/denali_pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mtd/nand/denali_pci.c b/drivers/mtd/nand/denali_pci.c index de31514df282..d38527e0a2f2 100644 --- a/drivers/mtd/nand/denali_pci.c +++ b/drivers/mtd/nand/denali_pci.c @@ -119,3 +119,7 @@ static struct pci_driver denali_pci_driver = { }; module_pci_driver(denali_pci_driver); + +MODULE_DESCRIPTION("PCI driver for Denali NAND controller"); +MODULE_AUTHOR("Intel Corporation and its suppliers"); +MODULE_LICENSE("GPL v2"); From 30942f91b5a57a1ca7557998e901367901d1bccf Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Tue, 12 Dec 2017 14:31:30 -0500 Subject: [PATCH 232/705] igb: Free IRQs when device is hotplugged commit 888f22931478a05bc81ceb7295c626e1292bf0ed upstream. Recently I got a Caldigit TS3 Thunderbolt 3 dock, and noticed that upon hotplugging my kernel would immediately crash due to igb: [ 680.825801] kernel BUG at drivers/pci/msi.c:352! [ 680.828388] invalid opcode: 0000 [#1] SMP [ 680.829194] Modules linked in: igb(O) thunderbolt i2c_algo_bit joydev vfat fat btusb btrtl btbcm btintel bluetooth ecdh_generic hp_wmi sparse_keymap rfkill wmi_bmof iTCO_wdt intel_rapl x86_pkg_temp_thermal coretemp crc32_pclmul snd_pcm rtsx_pci_ms mei_me snd_timer memstick snd pcspkr mei soundcore i2c_i801 tpm_tis psmouse shpchp wmi tpm_tis_core tpm video hp_wireless acpi_pad rtsx_pci_sdmmc mmc_core crc32c_intel serio_raw rtsx_pci mfd_core xhci_pci xhci_hcd i2c_hid i2c_core [last unloaded: igb] [ 680.831085] CPU: 1 PID: 78 Comm: kworker/u16:1 Tainted: G O 4.15.0-rc3Lyude-Test+ #6 [ 680.831596] Hardware name: HP HP ZBook Studio G4/826B, BIOS P71 Ver. 01.03 06/09/2017 [ 680.832168] Workqueue: kacpi_hotplug acpi_hotplug_work_fn [ 680.832687] RIP: 0010:free_msi_irqs+0x180/0x1b0 [ 680.833271] RSP: 0018:ffffc9000030fbf0 EFLAGS: 00010286 [ 680.833761] RAX: ffff8803405f9c00 RBX: ffff88033e3d2e40 RCX: 000000000000002c [ 680.834278] RDX: 0000000000000000 RSI: 00000000000000ac RDI: ffff880340be2178 [ 680.834832] RBP: 0000000000000000 R08: ffff880340be1ff0 R09: ffff8803405f9c00 [ 680.835342] R10: 0000000000000000 R11: 0000000000000040 R12: ffff88033d63a298 [ 680.835822] R13: ffff88033d63a000 R14: 0000000000000060 R15: ffff880341959000 [ 680.836332] FS: 0000000000000000(0000) GS:ffff88034f440000(0000) knlGS:0000000000000000 [ 680.836817] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 680.837360] CR2: 000055e64044afdf CR3: 0000000001c09002 CR4: 00000000003606e0 [ 680.837954] Call Trace: [ 680.838853] pci_disable_msix+0xce/0xf0 [ 680.839616] igb_reset_interrupt_capability+0x5d/0x60 [igb] [ 680.840278] igb_remove+0x9d/0x110 [igb] [ 680.840764] pci_device_remove+0x36/0xb0 [ 680.841279] device_release_driver_internal+0x157/0x220 [ 680.841739] pci_stop_bus_device+0x7d/0xa0 [ 680.842255] pci_stop_bus_device+0x2b/0xa0 [ 680.842722] pci_stop_bus_device+0x3d/0xa0 [ 680.843189] pci_stop_and_remove_bus_device+0xe/0x20 [ 680.843627] trim_stale_devices+0xf3/0x140 [ 680.844086] trim_stale_devices+0x94/0x140 [ 680.844532] trim_stale_devices+0xa6/0x140 [ 680.845031] ? get_slot_status+0x90/0xc0 [ 680.845536] acpiphp_check_bridge.part.5+0xfe/0x140 [ 680.846021] acpiphp_hotplug_notify+0x175/0x200 [ 680.846581] ? free_bridge+0x100/0x100 [ 680.847113] acpi_device_hotplug+0x8a/0x490 [ 680.847535] acpi_hotplug_work_fn+0x1a/0x30 [ 680.848076] process_one_work+0x182/0x3a0 [ 680.848543] worker_thread+0x2e/0x380 [ 680.848963] ? process_one_work+0x3a0/0x3a0 [ 680.849373] kthread+0x111/0x130 [ 680.849776] ? kthread_create_worker_on_cpu+0x50/0x50 [ 680.850188] ret_from_fork+0x1f/0x30 [ 680.850601] Code: 43 14 85 c0 0f 84 d5 fe ff ff 31 ed eb 0f 83 c5 01 39 6b 14 0f 86 c5 fe ff ff 8b 7b 10 01 ef e8 b7 e4 d2 ff 48 83 78 70 00 74 e3 <0f> 0b 49 8d b5 a0 00 00 00 e8 62 6f d3 ff e9 c7 fe ff ff 48 8b [ 680.851497] RIP: free_msi_irqs+0x180/0x1b0 RSP: ffffc9000030fbf0 As it turns out, normally the freeing of IRQs that would fix this is called inside of the scope of __igb_close(). However, since the device is already gone by the point we try to unregister the netdevice from the driver due to a hotplug we end up seeing that the netif isn't present and thus, forget to free any of the device IRQs. So: make sure that if we're in the process of dismantling the netdev, we always allow __igb_close() to be called so that IRQs may be freed normally. Additionally, only allow igb_close() to be called from __igb_close() if it hasn't already been called for the given adapter. Signed-off-by: Lyude Paul Fixes: 9474933caf21 ("igb: close/suspend race in netif_device_detach") Cc: Todd Fujinaka Cc: Stephen Hemminger Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/igb/igb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index ca54f7684668..3a61491421b1 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -3273,7 +3273,7 @@ static int __igb_close(struct net_device *netdev, bool suspending) int igb_close(struct net_device *netdev) { - if (netif_device_present(netdev)) + if (netif_device_present(netdev) || netdev->dismantle) return __igb_close(netdev, false); return 0; } From 036c227cdd1ca8e5e7e59e18ac87de3003f90d23 Mon Sep 17 00:00:00 2001 From: Stefan Schake Date: Fri, 10 Nov 2017 02:05:06 +0100 Subject: [PATCH 233/705] drm/vc4: Account for interrupts in flight [ Upstream commit 253696ccd613fbdaa5aba1de44c461a058e0a114 ] Synchronously disable the IRQ to make the following cancel_work_sync invocation effective. An interrupt in flight could enqueue further overflow mem work. As we free the binner BO immediately following vc4_irq_uninstall this caused a NULL pointer dereference in the work callback vc4_overflow_mem_work. Link: https://github.com/anholt/linux/issues/114 Signed-off-by: Stefan Schake Fixes: d5b1a78a772f ("drm/vc4: Add support for drawing 3D frames.") Signed-off-by: Eric Anholt Reviewed-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/1510275907-993-2-git-send-email-stschake@gmail.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vc4/vc4_irq.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c index 094bc6a475c1..d45a7c0a7915 100644 --- a/drivers/gpu/drm/vc4/vc4_irq.c +++ b/drivers/gpu/drm/vc4/vc4_irq.c @@ -208,6 +208,9 @@ vc4_irq_postinstall(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); + /* Undo the effects of a previous vc4_irq_uninstall. */ + enable_irq(dev->irq); + /* Enable both the render done and out of memory interrupts. */ V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS); @@ -225,6 +228,9 @@ vc4_irq_uninstall(struct drm_device *dev) /* Clear any pending interrupts we might have left. */ V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS); + /* Finish any interrupt handler still in flight. */ + disable_irq(dev->irq); + cancel_work_sync(&vc4->overflow_mem_work); } From 82e57cdce058713d3546f525140599c3b9b339fb Mon Sep 17 00:00:00 2001 From: Abhishek Goel Date: Tue, 7 Nov 2017 15:17:55 +0530 Subject: [PATCH 234/705] cpupowerutils: bench - Fix cpu online check [ Upstream commit 53d1cd6b125fb9d69303516a1179ebc3b72f797a ] cpupower_is_cpu_online was incorrectly checking for 0. This patch fixes this by checking for 1 when the cpu is online. Signed-off-by: Abhishek Goel Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/power/cpupower/bench/system.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/cpupower/bench/system.c b/tools/power/cpupower/bench/system.c index c25a74ae51ba..2bb3eef7d5c1 100644 --- a/tools/power/cpupower/bench/system.c +++ b/tools/power/cpupower/bench/system.c @@ -61,7 +61,7 @@ int set_cpufreq_governor(char *governor, unsigned int cpu) dprintf("set %s as cpufreq governor\n", governor); - if (cpupower_is_cpu_online(cpu) != 0) { + if (cpupower_is_cpu_online(cpu) != 1) { perror("cpufreq_cpu_exists"); fprintf(stderr, "error: cpu %u does not exist\n", cpu); return -1; From d8f75b4c7f259307ab9d0f4f304476e2b701c6ef Mon Sep 17 00:00:00 2001 From: Abhishek Goel Date: Wed, 15 Nov 2017 14:10:02 +0530 Subject: [PATCH 235/705] cpupower : Fix cpupower working when cpu0 is offline [ Upstream commit dbdc468f35ee827cab2753caa1c660bdb832243a ] cpuidle_monitor used to assume that cpu0 is always online which is not a valid assumption on POWER machines. This patch fixes this by getting the cpu on which the current thread is running, instead of always using cpu0 for monitoring which may not be online. Signed-off-by: Abhishek Goel Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c index 1b5da0066ebf..5b3205f16217 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c +++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c @@ -130,15 +130,18 @@ static struct cpuidle_monitor *cpuidle_register(void) { int num; char *tmp; + int this_cpu; + + this_cpu = sched_getcpu(); /* Assume idle state count is the same for all CPUs */ - cpuidle_sysfs_monitor.hw_states_num = cpuidle_state_count(0); + cpuidle_sysfs_monitor.hw_states_num = cpuidle_state_count(this_cpu); if (cpuidle_sysfs_monitor.hw_states_num <= 0) return NULL; for (num = 0; num < cpuidle_sysfs_monitor.hw_states_num; num++) { - tmp = cpuidle_state_name(0, num); + tmp = cpuidle_state_name(this_cpu, num); if (tmp == NULL) continue; @@ -146,7 +149,7 @@ static struct cpuidle_monitor *cpuidle_register(void) strncpy(cpuidle_cstates[num].name, tmp, CSTATE_NAME_LEN - 1); free(tmp); - tmp = cpuidle_state_desc(0, num); + tmp = cpuidle_state_desc(this_cpu, num); if (tmp == NULL) continue; strncpy(cpuidle_cstates[num].desc, tmp, CSTATE_DESC_LEN - 1); From 60d9b22b1ffc09a28be556e31caf7cdffe0df7ed Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Sun, 5 Nov 2017 16:56:33 +0200 Subject: [PATCH 236/705] KVM: x86: emulator: Return to user-mode on L1 CPL=0 emulation failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1f4dcb3b213235e642088709a1c54964d23365e9 ] On this case, handle_emulation_failure() fills kvm_run with internal-error information which it expects to be delivered to user-mode for further processing. However, the code reports a wrong return-value which makes KVM to never return to user-mode on this scenario. Fixes: 6d77dbfc88e3 ("KVM: inject #UD if instruction emulation fails and exit to userspace") Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d3f80cccb9aa..6c11a98860d1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5308,7 +5308,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; vcpu->run->internal.ndata = 0; - r = EMULATE_FAIL; + r = EMULATE_USER_EXIT; } kvm_queue_exception(vcpu, UD_VECTOR); From 114de9bfefa5aee862815b0ae7453d40bf5278c2 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Sun, 5 Nov 2017 16:56:34 +0200 Subject: [PATCH 237/705] KVM: x86: Don't re-execute instruction when not passing CR2 value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9b8ae63798cb97e785a667ff27e43fa6220cb734 ] In case of instruction-decode failure or emulation failure, x86_emulate_instruction() will call reexecute_instruction() which will attempt to use the cr2 value passed to x86_emulate_instruction(). However, when x86_emulate_instruction() is called from emulate_instruction(), cr2 is not passed (passed as 0) and therefore it doesn't make sense to execute reexecute_instruction() logic at all. Fixes: 51d8b66199e9 ("KVM: cleanup emulate_instruction") Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/kvm/vmx.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index cbd1d44da2d3..20cfeeb681c6 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1113,7 +1113,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, static inline int emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type) { - return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); + return x86_emulate_instruction(vcpu, 0, + emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0); } void kvm_enable_efer_bits(u64); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3ca6d15994e4..b9673bc82caa 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6257,7 +6257,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (test_bit(KVM_REQ_EVENT, &vcpu->requests)) return 1; - err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); + err = emulate_instruction(vcpu, 0); if (err == EMULATE_USER_EXIT) { ++vcpu->stat.mmio_exits; From ec73d16bc7ce352375052d81a22bbb53e3eccabe Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Sun, 5 Nov 2017 16:54:47 -0800 Subject: [PATCH 238/705] KVM: X86: Fix operand/address-size during instruction decoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3853be2603191829b442b64dac6ae8ba0c027bf9 ] Pedro reported: During tests that we conducted on KVM, we noticed that executing a "PUSH %ES" instruction under KVM produces different results on both memory and the SP register depending on whether EPT support is enabled. With EPT the SP is reduced by 4 bytes (and the written value is 0-padded) but without EPT support it is only reduced by 2 bytes. The difference can be observed when the CS.DB field is 1 (32-bit) but not when it's 0 (16-bit). The internal segment descriptor cache exist even in real/vm8096 mode. The CS.D also should be respected instead of just default operand/address-size/66H prefix/67H prefix during instruction decoding. This patch fixes it by also adjusting operand/address-size according to CS.D. Reported-by: Pedro Fonseca Tested-by: Pedro Fonseca Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Nadav Amit Cc: Pedro Fonseca Signed-off-by: Wanpeng Li Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index c8f8dd8ca0a1..6f5a3b076341 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4990,6 +4990,8 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) bool op_prefix = false; bool has_seg_override = false; struct opcode opcode; + u16 dummy; + struct desc_struct desc; ctxt->memop.type = OP_NONE; ctxt->memopp = NULL; @@ -5008,6 +5010,11 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) switch (mode) { case X86EMUL_MODE_REAL: case X86EMUL_MODE_VM86: + def_op_bytes = def_ad_bytes = 2; + ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS); + if (desc.d) + def_op_bytes = def_ad_bytes = 4; + break; case X86EMUL_MODE_PROT16: def_op_bytes = def_ad_bytes = 2; break; From 2f9e94ef498db9e7f4d920d9dcd042fa378e3f03 Mon Sep 17 00:00:00 2001 From: Nikita Leshenko Date: Sun, 5 Nov 2017 15:52:29 +0200 Subject: [PATCH 239/705] KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0fc5a36dd6b345eb0d251a65c236e53bead3eef7 ] KVM uses ioapic_handled_vectors to track vectors that need to notify the IOAPIC on EOI. The problem is that IOAPIC can be reconfigured while an interrupt with old configuration is pending or running and ioapic_handled_vectors only remembers the newest configuration; thus EOI from the old interrupt is not delievered to the IOAPIC. A previous commit db2bdcbbbd32 ("KVM: x86: fix edge EOI and IOAPIC reconfig race") addressed this issue by adding pending edge-triggered interrupts to ioapic_handled_vectors, fixing this race for edge-triggered interrupts. The commit explicitly ignored level-triggered interrupts, but this race applies to them as well: 1) IOAPIC sends a level triggered interrupt vector to VCPU0 2) VCPU0's handler deasserts the irq line and reconfigures the IOAPIC to route the vector to VCPU1. The reconfiguration rewrites only the upper 32 bits of the IOREDTBLn register. (Causes KVM to update ioapic_handled_vectors for VCPU0 and it no longer includes the vector.) 3) VCPU0 sends EOI for the vector, but it's not delievered to the IOAPIC because the ioapic_handled_vectors doesn't include the vector. 4) New interrupts are not delievered to VCPU1 because remote_irr bit is set forever. Therefore, the correct behavior is to add all pending and running interrupts to ioapic_handled_vectors. This commit introduces a slight performance hit similar to commit db2bdcbbbd32 ("KVM: x86: fix edge EOI and IOAPIC reconfig race") for the rare case that the vector is reused by a non-IOAPIC source on VCPU0. We prefer to keep solution simple and not handle this case just as the original commit does. Fixes: db2bdcbbbd32 ("KVM: x86: fix edge EOI and IOAPIC reconfig race") Signed-off-by: Nikita Leshenko Reviewed-by: Liran Alon Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/ioapic.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 6e219e5c07d2..a7ac8688bba8 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -257,8 +257,7 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors) index == RTC_GSI) { if (kvm_apic_match_dest(vcpu, NULL, 0, e->fields.dest_id, e->fields.dest_mode) || - (e->fields.trig_mode == IOAPIC_EDGE_TRIG && - kvm_apic_pending_eoi(vcpu, e->fields.vector))) + kvm_apic_pending_eoi(vcpu, e->fields.vector)) __set_bit(e->fields.vector, ioapic_handled_vectors); } From a9f2c169366752927fd0bf8f5a9b78d3315f6c85 Mon Sep 17 00:00:00 2001 From: Nikita Leshenko Date: Sun, 5 Nov 2017 15:52:32 +0200 Subject: [PATCH 240/705] KVM: x86: ioapic: Clear Remote IRR when entry is switched to edge-triggered MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a8bfec2930525808c01f038825d1df3904638631 ] Some OSes (Linux, Xen) use this behavior to clear the Remote IRR bit for IOAPICs without an EOI register. They simulate the EOI message manually by changing the trigger mode to edge and then back to level, with the entry being masked during this. QEMU implements this feature in commit ed1263c363c9 ("ioapic: clear remote irr bit for edge-triggered interrupts") As a side effect, this commit removes an incorrect behavior where Remote IRR was cleared when the redirection table entry was rewritten. This is not consistent with the manual and also opens an opportunity for a strange behavior when a redirection table entry is modified from an interrupt handler that handles the same entry: The modification will clear the Remote IRR bit even though the interrupt handler is still running. Signed-off-by: Nikita Leshenko Reviewed-by: Liran Alon Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Reviewed-by: Steve Rutherford Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/ioapic.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index a7ac8688bba8..4b573c8694ac 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -306,8 +306,17 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) } else { e->bits &= ~0xffffffffULL; e->bits |= (u32) val; - e->fields.remote_irr = 0; } + + /* + * Some OSes (Linux, Xen) assume that Remote IRR bit will + * be cleared by IOAPIC hardware when the entry is configured + * as edge-triggered. This behavior is used to simulate an + * explicit EOI on IOAPICs that don't have the EOI register. + */ + if (e->fields.trig_mode == IOAPIC_EDGE_TRIG) + e->fields.remote_irr = 0; + mask_after = e->fields.mask; if (mask_before != mask_after) kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); From 1d3ab3b2964e927515b947c8b112a9a40ed789c4 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 15 Oct 2017 21:24:49 +0200 Subject: [PATCH 241/705] ACPI / bus: Leave modalias empty for devices which are not present [ Upstream commit 10809bb976648ac58194a629e3d7af99e7400297 ] Most Bay and Cherry Trail devices use a generic DSDT with all possible peripheral devices present in the DSDT, with their _STA returning 0x00 or 0x0f based on AML variables which describe what is actually present on the board. Since ACPI device objects with a 0x00 status (not present) still get an entry under /sys/bus/acpi/devices, and those entry had an acpi:PNPID modalias, userspace would end up loading modules for non present hardware. This commit fixes this by leaving the modalias empty for non present devices. This results in 10 modules less being loaded with a generic distro kernel config on my Cherry Trail test-device (a GPD pocket). Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/device_sysfs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/acpi/device_sysfs.c b/drivers/acpi/device_sysfs.c index 7b2c48fde4e2..201c7ceb7052 100644 --- a/drivers/acpi/device_sysfs.c +++ b/drivers/acpi/device_sysfs.c @@ -146,6 +146,10 @@ static int create_pnp_modalias(struct acpi_device *acpi_dev, char *modalias, int count; struct acpi_hardware_id *id; + /* Avoid unnecessarily loading modules for non present devices. */ + if (!acpi_device_is_present(acpi_dev)) + return 0; + /* * Since we skip ACPI_DT_NAMESPACE_HID from the modalias below, 0 should * be returned if ACPI_DT_NAMESPACE_HID is the only ACPI/PNP ID in the From 876b31fd9815c57ae0389132f3306ff31f148092 Mon Sep 17 00:00:00 2001 From: Nikita Leshenko Date: Sun, 5 Nov 2017 15:52:33 +0200 Subject: [PATCH 242/705] KVM: x86: ioapic: Preserve read-only values in the redirection table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b200dded0a6974a3b69599832b2203483920ab25 ] According to 82093AA (IOAPIC) manual, Remote IRR and Delivery Status are read-only. QEMU implements the bits as RO in commit 479c2a1cb7fb ("ioapic: keep RO bits for IOAPIC entry"). Signed-off-by: Nikita Leshenko Reviewed-by: Liran Alon Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Reviewed-by: Steve Rutherford Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/ioapic.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 4b573c8694ac..5f810bb80802 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -278,6 +278,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) { unsigned index; bool mask_before, mask_after; + int old_remote_irr, old_delivery_status; union kvm_ioapic_redirect_entry *e; switch (ioapic->ioregsel) { @@ -300,6 +301,9 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) return; e = &ioapic->redirtbl[index]; mask_before = e->fields.mask; + /* Preserve read-only fields */ + old_remote_irr = e->fields.remote_irr; + old_delivery_status = e->fields.delivery_status; if (ioapic->ioregsel & 1) { e->bits &= 0xffffffff; e->bits |= (u64) val << 32; @@ -307,6 +311,8 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) e->bits &= ~0xffffffffULL; e->bits |= (u32) val; } + e->fields.remote_irr = old_remote_irr; + e->fields.delivery_status = old_delivery_status; /* * Some OSes (Linux, Xen) assume that Remote IRR bit will From 6436981ba6d11dc13b242d48855944b73569739f Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 15 Nov 2017 21:17:55 +0000 Subject: [PATCH 243/705] cpufreq: Add Loongson machine dependencies [ Upstream commit 0d307935fefa6389eb726c6362351c162c949101 ] The MIPS loongson cpufreq drivers don't build unless configured for the correct machine type, due to dependency on machine specific architecture headers and symbols in machine specific platform code. More specifically loongson1-cpufreq.c uses RST_CPU_EN and RST_CPU, neither of which is defined in asm/mach-loongson32/regs-clk.h unless CONFIG_LOONGSON1_LS1B=y, and loongson2_cpufreq.c references loongson2_clockmod_table[], which is only defined in arch/mips/loongson64/lemote-2f/clock.c, i.e. when CONFIG_LEMOTE_MACH2F=y. Add these dependencies to Kconfig to avoid randconfig / allyesconfig build failures (e.g. when based on BMIPS which also has a cpufreq driver). Signed-off-by: James Hogan Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index d8b164a7c4e5..cac26fb22891 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -273,6 +273,7 @@ endif if MIPS config LOONGSON2_CPUFREQ tristate "Loongson2 CPUFreq Driver" + depends on LEMOTE_MACH2F help This option adds a CPUFreq driver for loongson processors which support software configurable cpu frequency. @@ -285,6 +286,7 @@ config LOONGSON2_CPUFREQ config LOONGSON1_CPUFREQ tristate "Loongson1 CPUFreq Driver" + depends on LOONGSON1_LS1B help This option adds a CPUFreq driver for loongson1 processors which support software configurable cpu frequency. From 409982cbb5eb814532953fb996ddc27bae5282a4 Mon Sep 17 00:00:00 2001 From: Michael Lyle Date: Fri, 24 Nov 2017 15:14:27 -0800 Subject: [PATCH 244/705] bcache: check return value of register_shrinker [ Upstream commit 6c4ca1e36cdc1a0a7a84797804b87920ccbebf51 ] register_shrinker is now __must_check, so check it to kill a warning. Caller of bch_btree_cache_alloc in super.c appropriately checks return value so this is fully plumbed through. This V2 fixes checkpatch warnings and improves the commit description, as I was too hasty getting the previous version out. Signed-off-by: Michael Lyle Reviewed-by: Vojtech Pavlik Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/btree.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 2efdce07247c..cac297f8170e 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -803,7 +803,10 @@ int bch_btree_cache_alloc(struct cache_set *c) c->shrink.scan_objects = bch_mca_scan; c->shrink.seeks = 4; c->shrink.batch = c->btree_pages * 2; - register_shrinker(&c->shrink); + + if (register_shrinker(&c->shrink)) + pr_warn("bcache: %s: could not register shrinker", + __func__); return 0; } From 8afdbb165a792f1e89b0a2c67b54826889d3d51f Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 1 Nov 2017 19:21:55 -0400 Subject: [PATCH 245/705] drm/amdgpu: Fix SDMA load/unload sequence on HWS disabled mode [ Upstream commit cf21654b40968609779751b34e7923180968fe5b ] Fix the SDMA load and unload sequence as suggested by HW document. Signed-off-by: shaoyun liu Signed-off-by: Felix Kuehling Acked-by: Oded Gabbay Signed-off-by: Oded Gabbay Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 57 +++++++++++++------ 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 1a0a5f7cccbc..47951f4775b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -367,29 +367,50 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_sdma_rlc_registers *m; + unsigned long end_jiffies; uint32_t sdma_base_addr; + uint32_t data; m = get_sdma_mqd(mqd); sdma_base_addr = get_sdma_base_addr(m); - WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, - m->sdma_rlc_virtual_addr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + m->sdma_rlc_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, - m->sdma_rlc_rb_base); - - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, - m->sdma_rlc_rb_base_hi); - - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, - m->sdma_rlc_rb_rptr_addr_lo); - - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, - m->sdma_rlc_rb_rptr_addr_hi); + end_jiffies = msecs_to_jiffies(2000) + jiffies; + while (true) { + data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) + return -ETIME; + usleep_range(500, 1000); + } + if (m->sdma_engine_id) { + data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL); + data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data); + } else { + data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL); + data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data); + } WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, - m->sdma_rlc_doorbell); - + m->sdma_rlc_doorbell); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, + m->sdma_rlc_virtual_addr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + m->sdma_rlc_rb_base_hi); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + m->sdma_rlc_rb_rptr_addr_lo); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + m->sdma_rlc_rb_rptr_addr_hi); WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, m->sdma_rlc_rb_cntl); @@ -493,9 +514,9 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, } WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); return 0; } From 16980affa1b09fc536657ddb933902825ead1c38 Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Wed, 1 Nov 2017 19:21:56 -0400 Subject: [PATCH 246/705] drm/amdkfd: Fix SDMA ring buffer size calculation [ Upstream commit d12fb13f23199faa7e536acec1db49068e5a067d ] ffs function return the position of the first bit set on 1 based. (bit zero returns 1). Signed-off-by: shaoyun liu Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index d83de985e88c..8577a563600f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -215,8 +215,8 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, BUG_ON(!mm || !mqd || !q); m = get_sdma_mqd(mqd); - m->sdma_rlc_rb_cntl = ffs(q->queue_size / sizeof(unsigned int)) << - SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | + m->sdma_rlc_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1) + << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT | 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT; From 8275584082063726845b7ce2acd1b9d87cdee3fa Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 1 Nov 2017 19:21:57 -0400 Subject: [PATCH 247/705] drm/amdkfd: Fix SDMA oversubsription handling [ Upstream commit 8c946b8988acec785bcf67088b6bd0747f36d2d3 ] SDMA only supports a fixed number of queues. HWS cannot handle oversubscription. Signed-off-by: shaoyun liu Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../drm/amd/amdkfd/kfd_process_queue_manager.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index e1fb40b84c72..5425c68d0287 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -205,6 +205,24 @@ int pqm_create_queue(struct process_queue_manager *pqm, switch (type) { case KFD_QUEUE_TYPE_SDMA: + if (dev->dqm->queue_count >= + CIK_SDMA_QUEUES_PER_ENGINE * CIK_SDMA_ENGINE_NUM) { + pr_err("Over-subscription is not allowed for SDMA.\n"); + retval = -EPERM; + goto err_create_queue; + } + + retval = create_cp_queue(pqm, dev, &q, properties, f, *qid); + if (retval != 0) + goto err_create_queue; + pqn->q = q; + pqn->kq = NULL; + retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, + &q->properties.vmid); + pr_debug("DQM returned %d for create_queue\n", retval); + print_queue(q); + break; + case KFD_QUEUE_TYPE_COMPUTE: /* check if there is over subscription */ if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && From 03899a46c223510160632e991bad3ad234da386d Mon Sep 17 00:00:00 2001 From: zhangliping Date: Sat, 25 Nov 2017 22:02:12 +0800 Subject: [PATCH 248/705] openvswitch: fix the incorrect flow action alloc size [ Upstream commit 67c8d22a73128ff910e2287567132530abcf5b71 ] If we want to add a datapath flow, which has more than 500 vxlan outputs' action, we will get the following error reports: openvswitch: netlink: Flow action size 32832 bytes exceeds max openvswitch: netlink: Flow action size 32832 bytes exceeds max openvswitch: netlink: Actions may not be safe on all matching packets ... ... It seems that we can simply enlarge the MAX_ACTIONS_BUFSIZE to fix it, but this is not the root cause. For example, for a vxlan output action, we need about 60 bytes for the nlattr, but after it is converted to the flow action, it only occupies 24 bytes. This means that we can still support more than 1000 vxlan output actions for a single datapath flow under the the current 32k max limitation. So even if the nla_len(attr) is larger than MAX_ACTIONS_BUFSIZE, we shouldn't report EINVAL and keep it move on, as the judgement can be done by the reserve_sfa_size. Signed-off-by: zhangliping Acked-by: Pravin B Shelar Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/openvswitch/flow_netlink.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 07925418c2a5..1668916bdbde 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -1789,14 +1789,11 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb) #define MAX_ACTIONS_BUFSIZE (32 * 1024) -static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log) +static struct sw_flow_actions *nla_alloc_flow_actions(int size) { struct sw_flow_actions *sfa; - if (size > MAX_ACTIONS_BUFSIZE) { - OVS_NLERR(log, "Flow action size %u bytes exceeds max", size); - return ERR_PTR(-EINVAL); - } + WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE); sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); if (!sfa) @@ -1869,12 +1866,15 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, new_acts_size = ksize(*sfa) * 2; if (new_acts_size > MAX_ACTIONS_BUFSIZE) { - if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) + if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) { + OVS_NLERR(log, "Flow action size exceeds max %u", + MAX_ACTIONS_BUFSIZE); return ERR_PTR(-EMSGSIZE); + } new_acts_size = MAX_ACTIONS_BUFSIZE; } - acts = nla_alloc_flow_actions(new_acts_size, log); + acts = nla_alloc_flow_actions(new_acts_size); if (IS_ERR(acts)) return (void *)acts; @@ -2500,7 +2500,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, { int err; - *sfa = nla_alloc_flow_actions(nla_len(attr), log); + *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE)); if (IS_ERR(*sfa)) return PTR_ERR(*sfa); From 030d4676a2681a0656c205a9e58b9624a74a16f4 Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Tue, 14 Nov 2017 23:20:05 +0800 Subject: [PATCH 249/705] mac80211: fix the update of path metric for RANN frame [ Upstream commit fbbdad5edf0bb59786a51b94a9d006bc8c2da9a2 ] The previous path metric update from RANN frame has not considered the own link metric toward the transmitting mesh STA. Fix this. Reported-by: Michael65535 Signed-off-by: Chun-Yeow Yeoh Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/mac80211/mesh_hwmp.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index b747c9645e43..fed598a202c8 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -788,7 +788,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, struct mesh_path *mpath; u8 ttl, flags, hopcount; const u8 *orig_addr; - u32 orig_sn, metric, metric_txsta, interval; + u32 orig_sn, new_metric, orig_metric, last_hop_metric, interval; bool root_is_gate; ttl = rann->rann_ttl; @@ -799,7 +799,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, interval = le32_to_cpu(rann->rann_interval); hopcount = rann->rann_hopcount; hopcount++; - metric = le32_to_cpu(rann->rann_metric); + orig_metric = le32_to_cpu(rann->rann_metric); /* Ignore our own RANNs */ if (ether_addr_equal(orig_addr, sdata->vif.addr)) @@ -816,7 +816,10 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, return; } - metric_txsta = airtime_link_metric_get(local, sta); + last_hop_metric = airtime_link_metric_get(local, sta); + new_metric = orig_metric + last_hop_metric; + if (new_metric < orig_metric) + new_metric = MAX_METRIC; mpath = mesh_path_lookup(sdata, orig_addr); if (!mpath) { @@ -829,7 +832,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, } if (!(SN_LT(mpath->sn, orig_sn)) && - !(mpath->sn == orig_sn && metric < mpath->rann_metric)) { + !(mpath->sn == orig_sn && new_metric < mpath->rann_metric)) { rcu_read_unlock(); return; } @@ -847,7 +850,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, } mpath->sn = orig_sn; - mpath->rann_metric = metric + metric_txsta; + mpath->rann_metric = new_metric; mpath->is_root = true; /* Recording RANNs sender address to send individually * addressed PREQs destined for root mesh STA */ @@ -867,7 +870,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, mesh_path_sel_frame_tx(MPATH_RANN, flags, orig_addr, orig_sn, 0, NULL, 0, broadcast_addr, hopcount, ttl, interval, - metric + metric_txsta, 0, sdata); + new_metric, 0, sdata); } rcu_read_unlock(); From 8cfb3965ebcd9e02365f8c8e9f4c9f19fd0be004 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 15 Nov 2017 16:20:52 -0500 Subject: [PATCH 250/705] btrfs: fix deadlock when writing out space cache [ Upstream commit b77000ed558daa3bef0899d29bf171b8c9b5e6a8 ] If we fail to prepare our pages for whatever reason (out of memory in our case) we need to make sure to drop the block_group->data_rwsem, otherwise hilarity ensues. Signed-off-by: Josef Bacik Reviewed-by: Omar Sandoval Reviewed-by: Liu Bo Reviewed-by: David Sterba [ add label and use existing unlocking code ] Signed-off-by: David Sterba Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/free-space-cache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index e4b48f377d3a..c56253a1e5b4 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1253,7 +1253,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, /* Lock all pages first so we can lock the extent safely. */ ret = io_ctl_prepare_pages(io_ctl, inode, 0); if (ret) - goto out; + goto out_unlock; lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, &cached_state); @@ -1346,6 +1346,7 @@ out_nospc_locked: out_nospc: cleanup_write_cache_enospc(inode, io_ctl, &cached_state, &bitmap_list); +out_unlock: if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) up_write(&block_group->data_rwsem); From 90ef2c30ebd358005d892aa2895bdcbc027ddce1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 30 Oct 2017 11:20:15 -0400 Subject: [PATCH 251/705] reiserfs: remove unneeded i_version bump [ Upstream commit 9f97df50c52c2887432debb6238f4e43567386a5 ] The i_version field in reiserfs is not initialized and is only ever updated here. Nothing ever views it, so just remove it. Signed-off-by: Jeff Layton Signed-off-by: Jan Kara Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/reiserfs/super.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 0a6ad4e71e88..e101d70d2327 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -2521,7 +2521,6 @@ out: return err; if (inode->i_size < off + len - towrite) i_size_write(inode, off + len - towrite); - inode->i_version++; inode->i_mtime = inode->i_ctime = current_time(inode); mark_inode_dirty(inode); return len - towrite; From b0fa04e8429ed74b6156b8fbf329a022e701072e Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 20 Nov 2017 14:55:05 -0800 Subject: [PATCH 252/705] KVM: X86: Fix softlockup when get the current kvmclock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e70b57a6ce4e8b92a56a615ae79bdb2bd66035e7 ] watchdog: BUG: soft lockup - CPU#6 stuck for 22s! [qemu-system-x86:10185] CPU: 6 PID: 10185 Comm: qemu-system-x86 Tainted: G OE 4.14.0-rc4+ #4 RIP: 0010:kvm_get_time_scale+0x4e/0xa0 [kvm] Call Trace: get_time_ref_counter+0x5a/0x80 [kvm] kvm_hv_process_stimers+0x120/0x5f0 [kvm] kvm_arch_vcpu_ioctl_run+0x4b4/0x1690 [kvm] kvm_vcpu_ioctl+0x33a/0x620 [kvm] do_vfs_ioctl+0xa1/0x5d0 SyS_ioctl+0x79/0x90 entry_SYSCALL_64_fastpath+0x1e/0xa9 This can be reproduced when running kvm-unit-tests/hyperv_stimer.flat and cpu-hotplug stress simultaneously. __this_cpu_read(cpu_tsc_khz) returns 0 (set in kvmclock_cpu_down_prep()) when the pCPU is unhotplug which results in kvm_get_time_scale() gets into an infinite loop. This patch fixes it by treating the unhotplug pCPU as not using master clock. Reviewed-by: Radim Krčmář Reviewed-by: David Hildenbrand Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6c11a98860d1..e023ef981feb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1751,10 +1751,13 @@ static u64 __get_kvmclock_ns(struct kvm *kvm) /* both __this_cpu_read() and rdtsc() should be on the same cpu */ get_cpu(); - kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, - &hv_clock.tsc_shift, - &hv_clock.tsc_to_system_mul); - ret = __pvclock_read_cycles(&hv_clock, rdtsc()); + if (__this_cpu_read(cpu_tsc_khz)) { + kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, + &hv_clock.tsc_shift, + &hv_clock.tsc_to_system_mul); + ret = __pvclock_read_cycles(&hv_clock, rdtsc()); + } else + ret = ktime_get_boot_ns() + ka->kvmclock_offset; put_cpu(); From 5c0b19bd8cffec6822c92af223814712bdde42bd Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 20 Nov 2017 14:52:21 -0800 Subject: [PATCH 253/705] KVM: VMX: Fix rflags cache during vCPU reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c37c28730bb031cc8a44a130c2555c0f3efbe2d0 ] Reported by syzkaller: *** Guest State *** CR0: actual=0x0000000080010031, shadow=0x0000000060000010, gh_mask=fffffffffffffff7 CR4: actual=0x0000000000002061, shadow=0x0000000000000000, gh_mask=ffffffffffffe8f1 CR3 = 0x000000002081e000 RSP = 0x000000000000fffa RIP = 0x0000000000000000 RFLAGS=0x00023000 DR7 = 0x00000000000000 ^^^^^^^^^^ ------------[ cut here ]------------ WARNING: CPU: 6 PID: 24431 at /home/kernel/linux/arch/x86/kvm//x86.c:7302 kvm_arch_vcpu_ioctl_run+0x651/0x2ea0 [kvm] CPU: 6 PID: 24431 Comm: reprotest Tainted: G W OE 4.14.0+ #26 RIP: 0010:kvm_arch_vcpu_ioctl_run+0x651/0x2ea0 [kvm] RSP: 0018:ffff880291d179e0 EFLAGS: 00010202 Call Trace: kvm_vcpu_ioctl+0x479/0x880 [kvm] do_vfs_ioctl+0x142/0x9a0 SyS_ioctl+0x74/0x80 entry_SYSCALL_64_fastpath+0x23/0x9a The failed vmentry is triggered by the following beautified testcase: #include #include #include #include #include #include #include long r[5]; int main() { struct kvm_debugregs dr = { 0 }; r[2] = open("/dev/kvm", O_RDONLY); r[3] = ioctl(r[2], KVM_CREATE_VM, 0); r[4] = ioctl(r[3], KVM_CREATE_VCPU, 7); struct kvm_guest_debug debug = { .control = 0xf0403, .arch = { .debugreg[6] = 0x2, .debugreg[7] = 0x2 } }; ioctl(r[4], KVM_SET_GUEST_DEBUG, &debug); ioctl(r[4], KVM_RUN, 0); } which testcase tries to setup the processor specific debug registers and configure vCPU for handling guest debug events through KVM_SET_GUEST_DEBUG. The KVM_SET_GUEST_DEBUG ioctl will get and set rflags in order to set TF bit if single step is needed. All regs' caches are reset to avail and GUEST_RFLAGS vmcs field is reset to 0x2 during vCPU reset. However, the cache of rflags is not reset during vCPU reset. The function vmx_get_rflags() returns an unreset rflags cache value since the cache is marked avail, it is 0 after boot. Vmentry fails if the rflags reserved bit 1 is 0. This patch fixes it by resetting both the GUEST_RFLAGS vmcs field and its cache to 0x2 during vCPU reset. Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Reviewed-by: David Hildenbrand Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Nadav Amit Cc: Dmitry Vyukov Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b9673bc82caa..178a344f55f8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5194,7 +5194,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmcs_write64(GUEST_IA32_DEBUGCTL, 0); } - vmcs_writel(GUEST_RFLAGS, 0x02); + kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); kvm_rip_write(vcpu, 0xfff0); vmcs_writel(GUEST_GDTR_BASE, 0); From b5bfda0f8e29c7070f19c0cac4005fd85f37a84e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 22 Nov 2017 12:21:07 -0800 Subject: [PATCH 254/705] xfs: always free inline data before resetting inode fork during ifree [ Upstream commit 98c4f78dcdd8cec112d1cbc5e9a792ee6e5ab7a6 ] In xfs_ifree, we reset the data/attr forks to extents format without bothering to free any inline data buffer that might still be around after all the blocks have been truncated off the file. Prior to commit 43518812d2 ("xfs: remove support for inlining data/extents into the inode fork") nobody noticed because the leftover inline data after truncation was small enough to fit inside the inline buffer inside the fork itself. However, now that we've removed the inline buffer, we /always/ have to free the inline data buffer or else we leak them like crazy. This test was found by turning on kmemleak for generic/001 or generic/388. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_inode.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 98ca9f1b6a07..c5f2f1e3cc4b 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2429,6 +2429,24 @@ retry: return 0; } +/* + * Free any local-format buffers sitting around before we reset to + * extents format. + */ +static inline void +xfs_ifree_local_data( + struct xfs_inode *ip, + int whichfork) +{ + struct xfs_ifork *ifp; + + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) + return; + + ifp = XFS_IFORK_PTR(ip, whichfork); + xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); +} + /* * This is called to return an inode to the inode free list. * The inode should already be truncated to 0 length and have @@ -2466,6 +2484,9 @@ xfs_ifree( if (error) return error; + xfs_ifree_local_data(ip, XFS_DATA_FORK); + xfs_ifree_local_data(ip, XFS_ATTR_FORK); + VFS_I(ip)->i_mode = 0; /* mark incore inode as free */ ip->i_d.di_flags = 0; ip->i_d.di_dmevmask = 0; From c6a34556f539c69366bd7bcbeff490a6949f95ce Mon Sep 17 00:00:00 2001 From: Eduardo Otubo Date: Thu, 23 Nov 2017 15:18:35 +0100 Subject: [PATCH 255/705] xen-netfront: remove warning when unloading module [ Upstream commit 5b5971df3bc2775107ddad164018a8a8db633b81 ] v2: * Replace busy wait with wait_event()/wake_up_all() * Cannot garantee that at the time xennet_remove is called, the xen_netback state will not be XenbusStateClosed, so added a condition for that * There's a small chance for the xen_netback state is XenbusStateUnknown by the time the xen_netfront switches to Closed, so added a condition for that. When unloading module xen_netfront from guest, dmesg would output warning messages like below: [ 105.236836] xen:grant_table: WARNING: g.e. 0x903 still in use! [ 105.236839] deferring g.e. 0x903 (pfn 0x35805) This problem relies on netfront and netback being out of sync. By the time netfront revokes the g.e.'s netback didn't have enough time to free all of them, hence displaying the warnings on dmesg. The trick here is to make netfront to wait until netback frees all the g.e.'s and only then continue to cleanup for the module removal, and this is done by manipulating both device states. Signed-off-by: Eduardo Otubo Acked-by: Juergen Gross Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 8d498a997e25..1a9dadf7b3cc 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -86,6 +86,8 @@ struct netfront_cb { /* IRQ name is queue name with "-tx" or "-rx" appended */ #define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3) +static DECLARE_WAIT_QUEUE_HEAD(module_unload_q); + struct netfront_stats { u64 packets; u64 bytes; @@ -2051,10 +2053,12 @@ static void netback_changed(struct xenbus_device *dev, break; case XenbusStateClosed: + wake_up_all(&module_unload_q); if (dev->state == XenbusStateClosed) break; /* Missed the backend's CLOSING state -- fallthrough */ case XenbusStateClosing: + wake_up_all(&module_unload_q); xenbus_frontend_closed(dev); break; } @@ -2160,6 +2164,20 @@ static int xennet_remove(struct xenbus_device *dev) dev_dbg(&dev->dev, "%s\n", dev->nodename); + if (xenbus_read_driver_state(dev->otherend) != XenbusStateClosed) { + xenbus_switch_state(dev, XenbusStateClosing); + wait_event(module_unload_q, + xenbus_read_driver_state(dev->otherend) == + XenbusStateClosing); + + xenbus_switch_state(dev, XenbusStateClosed); + wait_event(module_unload_q, + xenbus_read_driver_state(dev->otherend) == + XenbusStateClosed || + xenbus_read_driver_state(dev->otherend) == + XenbusStateUnknown); + } + xennet_disconnect_backend(info); unregister_netdev(info->netdev); From c57767b60962f4c965064aadc028db55bd0e63fb Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Thu, 10 Aug 2017 10:53:53 +0200 Subject: [PATCH 256/705] auxdisplay: img-ascii-lcd: Only build on archs that have IOMEM [ Upstream commit 141cbfba1d0502006463aa80f57c64086226af1a ] This avoids the MODPOST error: ERROR: "devm_ioremap_resource" [drivers/auxdisplay/img-ascii-lcd.ko] undefined! Signed-off-by: Thomas Meyer Acked-by: Randy Dunlap Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/auxdisplay/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig index 10e1b9eee10e..f03cf1df8d6b 100644 --- a/drivers/auxdisplay/Kconfig +++ b/drivers/auxdisplay/Kconfig @@ -121,6 +121,7 @@ config CFAG12864B_RATE config IMG_ASCII_LCD tristate "Imagination Technologies ASCII LCD Display" + depends on HAS_IOMEM default y if MIPS_MALTA || MIPS_SEAD3 select SYSCON help From 5cd3586ca8d4347d2cd658062c994b9edd09ab59 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 3 Nov 2017 08:00:12 -0400 Subject: [PATCH 257/705] nfsd: CLOSE SHOULD return the invalid special stateid for NFSv4.x (x>0) [ Upstream commit fb500a7cfee7f2f447d2bbf30cb59629feab6ac1 ] Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9ebb2d7c8182..7f3a11ab4191 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -63,6 +63,9 @@ static const stateid_t zero_stateid = { static const stateid_t currentstateid = { .si_generation = 1, }; +static const stateid_t close_stateid = { + .si_generation = 0xffffffffU, +}; static u64 current_sessionid = 1; @@ -5407,6 +5410,11 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_close_open_stateid(stp); mutex_unlock(&stp->st_mutex); + /* See RFC5661 sectionm 18.2.4 */ + if (stp->st_stid.sc_client->cl_minorversion) + memcpy(&close->cl_stateid, &close_stateid, + sizeof(close->cl_stateid)); + /* put reference from nfs4_preprocess_seqid_op */ nfs4_put_stid(&stp->st_stid); out: From 2a7d4a723d2e04f9d155d2dcf82ed64d3acc7b10 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 3 Nov 2017 08:00:15 -0400 Subject: [PATCH 258/705] nfsd: Ensure we check stateid validity in the seqid operation checks [ Upstream commit 9271d7e509c1bfc0b9a418caec29ec8d1ac38270 ] After taking the stateid st_mutex, we want to know that the stateid still represents valid state before performing any non-idempotent actions. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 7f3a11ab4191..799c18cba6a5 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5178,15 +5178,9 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ status = nfsd4_check_seqid(cstate, sop, seqid); if (status) return status; - if (stp->st_stid.sc_type == NFS4_CLOSED_STID - || stp->st_stid.sc_type == NFS4_REVOKED_DELEG_STID) - /* - * "Closed" stateid's exist *only* to return - * nfserr_replay_me from the previous step, and - * revoked delegations are kept only for free_stateid. - */ - return nfserr_bad_stateid; - mutex_lock(&stp->st_mutex); + status = nfsd4_lock_ol_stateid(stp); + if (status != nfs_ok) + return status; status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); if (status == nfs_ok) status = nfs4_check_fh(current_fh, &stp->st_stid); From f35ab8e2eeb8bc9492cbaf4653e04eea9cb63648 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 6 Nov 2017 16:22:48 +0300 Subject: [PATCH 259/705] grace: replace BUG_ON by WARN_ONCE in exit_net hook [ Upstream commit b872285751c1af010e12d02bce7069e2061a58ca ] Signed-off-by: Vasily Averin Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfs_common/grace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c index fd8c9a5bcac4..e280e9c9ebf3 100644 --- a/fs/nfs_common/grace.c +++ b/fs/nfs_common/grace.c @@ -104,7 +104,9 @@ grace_exit_net(struct net *net) { struct list_head *grace_list = net_generic(net, grace_net_id); - BUG_ON(!list_empty(grace_list)); + WARN_ONCE(!list_empty(grace_list), + "net %x %s: grace_list is not empty\n", + net->ns.inum, __func__); } static struct pernet_operations grace_net_ops = { From f25e222ccc50b0b52da98706c1e7da4ec090c4d7 Mon Sep 17 00:00:00 2001 From: Andrew Elble Date: Thu, 9 Nov 2017 13:41:10 -0500 Subject: [PATCH 260/705] nfsd: check for use of the closed special stateid [ Upstream commit ae254dac721d44c0bfebe2795df87459e2e88219 ] Prevent the use of the closed (invalid) special stateid by clients. Signed-off-by: Andrew Elble Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 799c18cba6a5..c548190ef5ef 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -72,6 +72,7 @@ static u64 current_sessionid = 1; #define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t))) #define ONE_STATEID(stateid) (!memcmp((stateid), &one_stateid, sizeof(stateid_t))) #define CURRENT_STATEID(stateid) (!memcmp((stateid), ¤tstateid, sizeof(stateid_t))) +#define CLOSE_STATEID(stateid) (!memcmp((stateid), &close_stateid, sizeof(stateid_t))) /* forward declarations */ static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner); @@ -4869,7 +4870,8 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) struct nfs4_stid *s; __be32 status = nfserr_bad_stateid; - if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) + if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || + CLOSE_STATEID(stateid)) return status; /* Client debugging aid. */ if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) { @@ -4927,7 +4929,8 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, else if (typemask & NFS4_DELEG_STID) typemask |= NFS4_REVOKED_DELEG_STID; - if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) + if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || + CLOSE_STATEID(stateid)) return nfserr_bad_stateid; status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn); if (status == nfserr_stale_clientid) { From 3b7742374f3e4d2a9891b02b13c69796d7bda090 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 13 Nov 2017 07:25:40 +0300 Subject: [PATCH 261/705] lockd: fix "list_add double add" caused by legacy signal interface [ Upstream commit 81833de1a46edce9ca20cfe079872ac1c20ef359 ] restart_grace() uses hardcoded init_net. It can cause to "list_add double add" in following scenario: 1) nfsd and lockd was started in several net namespaces 2) nfsd in init_net was stopped (lockd was not stopped because it have users from another net namespaces) 3) lockd got signal, called restart_grace() -> set_grace_period() and enabled lock_manager in hardcoded init_net. 4) nfsd in init_net is started again, its lockd_up() calls set_grace_period() and tries to add lock_manager into init_net 2nd time. Jeff Layton suggest: "Make it safe to call locks_start_grace multiple times on the same lock_manager. If it's already on the global grace_list, then don't try to add it again. (But we don't intentionally add twice, so for now we WARN about that case.) With this change, we also need to ensure that the nfsd4 lock manager initializes the list before we call locks_start_grace. While we're at it, move the rest of the nfsd_net initialization into nfs4_state_create_net. I see no reason to have it spread over two functions like it is today." Suggested patch was updated to generate warning in described situation. Suggested-by: Jeff Layton Signed-off-by: Vasily Averin Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfs_common/grace.c | 6 +++++- fs/nfsd/nfs4state.c | 7 ++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c index e280e9c9ebf3..77d136ac8909 100644 --- a/fs/nfs_common/grace.c +++ b/fs/nfs_common/grace.c @@ -30,7 +30,11 @@ locks_start_grace(struct net *net, struct lock_manager *lm) struct list_head *grace_list = net_generic(net, grace_net_id); spin_lock(&grace_lock); - list_add(&lm->list, grace_list); + if (list_empty(&lm->list)) + list_add(&lm->list, grace_list); + else + WARN(1, "double list_add attempt detected in net %x %s\n", + net->ns.inum, (net == &init_net) ? "(init_net)" : ""); spin_unlock(&grace_lock); } EXPORT_SYMBOL_GPL(locks_start_grace); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c548190ef5ef..f463c4e0b2ea 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -7012,6 +7012,10 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]); nn->conf_name_tree = RB_ROOT; nn->unconf_name_tree = RB_ROOT; + nn->boot_time = get_seconds(); + nn->grace_ended = false; + nn->nfsd4_manager.block_opens = true; + INIT_LIST_HEAD(&nn->nfsd4_manager.list); INIT_LIST_HEAD(&nn->client_lru); INIT_LIST_HEAD(&nn->close_lru); INIT_LIST_HEAD(&nn->del_recall_lru); @@ -7069,9 +7073,6 @@ nfs4_state_start_net(struct net *net) ret = nfs4_state_create_net(net); if (ret) return ret; - nn->boot_time = get_seconds(); - nn->grace_ended = false; - nn->nfsd4_manager.block_opens = true; locks_start_grace(net, &nn->nfsd4_manager); nfsd4_client_tracking_init(net); printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n", From d5a746cf47975a30a8ffaa799db93881512530e8 Mon Sep 17 00:00:00 2001 From: Robert Lippert Date: Mon, 27 Nov 2017 15:51:55 -0800 Subject: [PATCH 262/705] hwmon: (pmbus) Use 64bit math for DIRECT format values [ Upstream commit bd467e4eababe4c04272c1e646f066db02734c79 ] Power values in the 100s of watt range can easily blow past 32bit math limits when processing everything in microwatts. Use 64bit math instead to avoid these issues on common 32bit ARM BMC platforms. Fixes: 442aba78728e ("hwmon: PMBus device driver") Signed-off-by: Robert Lippert Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/pmbus/pmbus_core.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index ba59eaef2e07..d013acf3f83a 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -20,6 +20,7 @@ */ #include +#include #include #include #include @@ -476,8 +477,8 @@ static long pmbus_reg2data_linear(struct pmbus_data *data, static long pmbus_reg2data_direct(struct pmbus_data *data, struct pmbus_sensor *sensor) { - long val = (s16) sensor->data; - long m, b, R; + s64 b, val = (s16)sensor->data; + s32 m, R; m = data->info->m[sensor->class]; b = data->info->b[sensor->class]; @@ -505,11 +506,12 @@ static long pmbus_reg2data_direct(struct pmbus_data *data, R--; } while (R < 0) { - val = DIV_ROUND_CLOSEST(val, 10); + val = div_s64(val + 5LL, 10L); /* round closest */ R++; } - return (val - b) / m; + val = div_s64(val - b, m); + return clamp_val(val, LONG_MIN, LONG_MAX); } /* @@ -629,7 +631,8 @@ static u16 pmbus_data2reg_linear(struct pmbus_data *data, static u16 pmbus_data2reg_direct(struct pmbus_data *data, struct pmbus_sensor *sensor, long val) { - long m, b, R; + s64 b, val64 = val; + s32 m, R; m = data->info->m[sensor->class]; b = data->info->b[sensor->class]; @@ -646,18 +649,18 @@ static u16 pmbus_data2reg_direct(struct pmbus_data *data, R -= 3; /* Adjust R and b for data in milli-units */ b *= 1000; } - val = val * m + b; + val64 = val64 * m + b; while (R > 0) { - val *= 10; + val64 *= 10; R--; } while (R < 0) { - val = DIV_ROUND_CLOSEST(val, 10); + val64 = div_s64(val64 + 5LL, 10L); /* round closest */ R++; } - return val; + return (u16)clamp_val(val64, S16_MIN, S16_MAX); } static u16 pmbus_data2reg_vid(struct pmbus_data *data, From 98ae1ca7534e47508719ee54657f01df55437f62 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 21 Nov 2017 20:46:49 +0100 Subject: [PATCH 263/705] bnxt_en: Fix an error handling path in 'bnxt_get_module_eeprom()' [ Upstream commit dea521a2b9f96e905fa2bb2f95e23ec00c2ec436 ] Error code returned by 'bnxt_read_sfp_module_eeprom_info()' is handled a few lines above when reading the A0 portion of the EEPROM. The same should be done when reading the A2 portion of the EEPROM. In order to correctly propagate an error, update 'rc' in this 2nd call as well, otherwise 0 (success) is returned. Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index a7e04ff4eaed..cde4b96f3153 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1843,8 +1843,8 @@ static int bnxt_get_module_eeprom(struct net_device *dev, /* Read A2 portion of the EEPROM */ if (length) { start -= ETH_MODULE_SFF_8436_LEN; - bnxt_read_sfp_module_eeprom_info(bp, I2C_DEV_ADDR_A2, 1, start, - length, data); + rc = bnxt_read_sfp_module_eeprom_info(bp, I2C_DEV_ADDR_A2, 1, + start, length, data); } return rc; } From 54a1fdff1b09465d4f5f06bff53fe2b1e0434673 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 23 Nov 2017 17:13:40 +0100 Subject: [PATCH 264/705] xfs: fortify xfs_alloc_buftarg error handling [ Upstream commit d210a9874b8f6166579408131cb74495caff1958 ] percpu_counter_init failure path doesn't clean up &btp->bt_lru list. Call list_lru_destroy in that error path. Similarly register_shrinker error path is not handled. While it is unlikely to trigger these error path, it is not impossible especially the later might fail with large NUMAs. Let's handle the failure to make the code more robust. Noticed-by: Tetsuo Handa Signed-off-by: Michal Hocko Acked-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_buf.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index eca7baecc9f0..3f45d9867e10 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1785,22 +1785,27 @@ xfs_alloc_buftarg( btp->bt_bdi = blk_get_backing_dev_info(bdev); if (xfs_setsize_buftarg_early(btp, bdev)) - goto error; + goto error_free; if (list_lru_init(&btp->bt_lru)) - goto error; + goto error_free; if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL)) - goto error; + goto error_lru; btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count; btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan; btp->bt_shrinker.seeks = DEFAULT_SEEKS; btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE; - register_shrinker(&btp->bt_shrinker); + if (register_shrinker(&btp->bt_shrinker)) + goto error_pcpu; return btp; -error: +error_pcpu: + percpu_counter_destroy(&btp->bt_io_count); +error_lru: + list_lru_destroy(&btp->bt_lru); +error_free: kmem_free(btp); return NULL; } From e11616d5e6c3209196c697e773cb41362419adf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 24 Nov 2017 11:39:30 +0100 Subject: [PATCH 265/705] drm/amdgpu: don't try to move pinned BOs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6edc6910ba4cd6eab309263539c8f09b8ad772bf ] Never try to move pinned BOs during CS. Signed-off-by: Christian König Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index f26d1fd53bef..cb505f66d3aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -416,6 +416,10 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, if (candidate == lobj) break; + /* We can't move pinned BOs here */ + if (bo->pin_count) + continue; + other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); /* Check if this BO is in one of the domains we need space for */ From d47907bcac94cc455a62d50e6c71ae5341cee0ea Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 29 Nov 2017 11:01:09 +0100 Subject: [PATCH 266/705] net: ethernet: xilinx: Mark XILINX_LL_TEMAC broken on 64-bit [ Upstream commit 15bfe05c8d6386f1a90e9340d15336e85e32aad6 ] On 64-bit (e.g. powerpc64/allmodconfig): drivers/net/ethernet/xilinx/ll_temac_main.c: In function 'temac_start_xmit_done': drivers/net/ethernet/xilinx/ll_temac_main.c:633:22: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] dev_kfree_skb_irq((struct sk_buff *)cur_p->app4); ^ cdmac_bd.app4 is u32, so it is too small to hold a kernel pointer. Note that several other fields in struct cdmac_bd are also too small to hold physical addresses on 64-bit platforms. Signed-off-by: Geert Uytterhoeven Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/xilinx/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/xilinx/Kconfig b/drivers/net/ethernet/xilinx/Kconfig index 6d68c8a8f4f2..da4ec575ccf9 100644 --- a/drivers/net/ethernet/xilinx/Kconfig +++ b/drivers/net/ethernet/xilinx/Kconfig @@ -34,6 +34,7 @@ config XILINX_AXI_EMAC config XILINX_LL_TEMAC tristate "Xilinx LL TEMAC (LocalLink Tri-mode Ethernet MAC) driver" depends on (PPC || MICROBLAZE) + depends on !64BIT || BROKEN select PHYLIB ---help--- This driver supports the Xilinx 10/100/1000 LocalLink TEMAC From c4ecc2f696430bbd6c4b8bf3f114ad9a86e2c9b1 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 29 Nov 2017 22:34:50 +0900 Subject: [PATCH 267/705] quota: Check for register_shrinker() failure. [ Upstream commit 88bc0ede8d35edc969350852894dc864a2dc1859 ] register_shrinker() might return -ENOMEM error since Linux 3.12. Call panic() as with other failure checks in this function if register_shrinker() failed. Fixes: 1d3d4437eae1 ("vmscan: per-node deferred work") Signed-off-by: Tetsuo Handa Cc: Jan Kara Cc: Michal Hocko Reviewed-by: Michal Hocko Signed-off-by: Jan Kara Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/quota/dquot.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 1bfac28b7e7d..f9246ac4eef8 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2985,7 +2985,8 @@ static int __init dquot_init(void) pr_info("VFS: Dquot-cache hash table entries: %ld (order %ld," " %ld bytes)\n", nr_hash, order, (PAGE_SIZE << order)); - register_shrinker(&dqcache_shrinker); + if (register_shrinker(&dqcache_shrinker)) + panic("Cannot register dquot shrinker"); return 0; } From d2a67f7afcad12bc958f947118e94d5af05aa218 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 24 Nov 2017 12:00:24 -0500 Subject: [PATCH 268/705] SUNRPC: Allow connect to return EHOSTUNREACH [ Upstream commit 4ba161a793d5f43757c35feff258d9f20a082940 ] Reported-by: Dmitry Vyukov Signed-off-by: Trond Myklebust Tested-by: Dmitry Vyukov Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtsock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index e01c825bc683..d24d14ea8ba4 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2381,6 +2381,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) case -ECONNREFUSED: case -ECONNRESET: case -ENETUNREACH: + case -EHOSTUNREACH: case -EADDRINUSE: case -ENOBUFS: /* retry with existing socket, after a delay */ From 7b8623841f2b3845eaa5292f0b15e7f9dd0c5f42 Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Wed, 29 Nov 2017 16:11:08 -0800 Subject: [PATCH 269/705] kmemleak: add scheduling point to kmemleak_scan() [ Upstream commit bde5f6bc68db51128f875a756e9082a6c6ff7b4c ] kmemleak_scan() will scan struct page for each node and it can be really large and resulting in a soft lockup. We have seen a soft lockup when do scan while compile kernel: watchdog: BUG: soft lockup - CPU#53 stuck for 22s! [bash:10287] [...] Call Trace: kmemleak_scan+0x21a/0x4c0 kmemleak_write+0x312/0x350 full_proxy_write+0x5a/0xa0 __vfs_write+0x33/0x150 vfs_write+0xad/0x1a0 SyS_write+0x52/0xc0 do_syscall_64+0x61/0x1a0 entry_SYSCALL64_slow_path+0x25/0x25 Fix this by adding cond_resched every MAX_SCAN_SIZE. Link: http://lkml.kernel.org/r/1511439788-20099-1-git-send-email-xieyisheng1@huawei.com Signed-off-by: Yisheng Xie Suggested-by: Catalin Marinas Acked-by: Catalin Marinas Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- mm/kmemleak.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index d1380ed93fdf..20cf3be9a5e8 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1442,6 +1442,8 @@ static void kmemleak_scan(void) if (page_count(page) == 0) continue; scan_block(page, page + 1, NULL); + if (!(pfn % (MAX_SCAN_SIZE / sizeof(*page)))) + cond_resched(); } } put_online_mems(); From 5f6a0441ca0d5dbf64ee56743028b8a9ca932e48 Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:19 +0300 Subject: [PATCH 270/705] drm/bridge: tc358767: do no fail on hi-res displays [ Upstream commit cffd2b16c01c3431a7a7dd62e722af33490fc436 ] Do not fail data rates higher than 2.7 and more than 2 lanes. Try to fall back to 2.7Gbps and 2 lanes. Acked-by: Philipp Zabel Reviewed-by: Andrzej Hajda Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-2-git-send-email-andrey.gusakov@cogentembedded.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/tc358767.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 44d476ea6d2e..14b4e58f727f 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -603,8 +603,15 @@ static int tc_get_display_props(struct tc_data *tc) ret = drm_dp_link_probe(&tc->aux, &tc->link.base); if (ret < 0) goto err_dpcd_read; - if ((tc->link.base.rate != 162000) && (tc->link.base.rate != 270000)) - goto err_dpcd_inval; + if (tc->link.base.rate != 162000 && tc->link.base.rate != 270000) { + dev_dbg(tc->dev, "Falling to 2.7 Gbps rate\n"); + tc->link.base.rate = 270000; + } + + if (tc->link.base.num_lanes > 2) { + dev_dbg(tc->dev, "Falling to 2 lanes\n"); + tc->link.base.num_lanes = 2; + } ret = drm_dp_dpcd_readb(&tc->aux, DP_MAX_DOWNSPREAD, tmp); if (ret < 0) @@ -637,9 +644,6 @@ static int tc_get_display_props(struct tc_data *tc) err_dpcd_read: dev_err(tc->dev, "failed to read DPCD: %d\n", ret); return ret; -err_dpcd_inval: - dev_err(tc->dev, "invalid DPCD\n"); - return -EINVAL; } static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) From 8d4bfe89aacf5dbe66fa6dd26587c7d11a9f6a7a Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:20 +0300 Subject: [PATCH 271/705] drm/bridge: tc358767: filter out too high modes [ Upstream commit 99fc8e963a4c0203dba26a77cf737db6081bca14 ] Pixel clock limitation for DPI is 154 MHz. Do not accept modes with higher pixel clock rate. Reviewed-by: Andrzej Hajda Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-3-git-send-email-andrey.gusakov@cogentembedded.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/tc358767.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 14b4e58f727f..97087f962363 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -1109,7 +1109,10 @@ static bool tc_bridge_mode_fixup(struct drm_bridge *bridge, static int tc_connector_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { - /* Accept any mode */ + /* DPI interface clock limitation: upto 154 MHz */ + if (mode->clock > 154000) + return MODE_CLOCK_HIGH; + return MODE_OK; } From c55908604eccc659840e52a2c17614cfe90178bb Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:21 +0300 Subject: [PATCH 272/705] drm/bridge: tc358767: fix DP0_MISC register set [ Upstream commit f3b8adbe1911f66fd3cab1aaa74f0f66b7ceda25 ] Remove shift from TU_SIZE_RECOMMENDED define as it used to calculate max_tu_symbols. Acked-by: Philipp Zabel Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-4-git-send-email-andrey.gusakov@cogentembedded.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/tc358767.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 97087f962363..4d348800222e 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -97,7 +97,7 @@ #define DP0_ACTIVEVAL 0x0650 #define DP0_SYNCVAL 0x0654 #define DP0_MISC 0x0658 -#define TU_SIZE_RECOMMENDED (0x3f << 16) /* LSCLK cycles per TU */ +#define TU_SIZE_RECOMMENDED (63) /* LSCLK cycles per TU */ #define BPC_6 (0 << 5) #define BPC_8 (1 << 5) @@ -716,7 +716,8 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) * Must be less than tu_size. */ max_tu_symbol = TU_SIZE_RECOMMENDED - 1; - tc_write(DP0_MISC, (max_tu_symbol << 23) | TU_SIZE_RECOMMENDED | BPC_8); + tc_write(DP0_MISC, (max_tu_symbol << 23) | (TU_SIZE_RECOMMENDED << 16) | + BPC_8); return 0; err: From 1bdfc52c331a149e393fb199f7e361fcb8e3c9ad Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:22 +0300 Subject: [PATCH 273/705] drm/bridge: tc358767: fix timing calculations [ Upstream commit 66d1c3b94d5d59e4325e61a78d520f92c043d645 ] Fields in HTIM01 and HTIM02 regs should be even. Recomended thresh_dly value is max_tu_symbol. Remove set of VPCTRL0.VSDELAY as it is related to DSI input interface. Currently driver supports only DPI. Acked-by: Philipp Zabel Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-5-git-send-email-andrey.gusakov@cogentembedded.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/tc358767.c | 34 ++++++++++++++++++------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 4d348800222e..821b93f68eef 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -659,6 +659,14 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) int lower_margin = mode->vsync_start - mode->vdisplay; int vsync_len = mode->vsync_end - mode->vsync_start; + /* + * Recommended maximum number of symbols transferred in a transfer unit: + * DIV_ROUND_UP((input active video bandwidth in bytes) * tu_size, + * (output active video bandwidth in bytes)) + * Must be less than tu_size. + */ + max_tu_symbol = TU_SIZE_RECOMMENDED - 1; + dev_dbg(tc->dev, "set mode %dx%d\n", mode->hdisplay, mode->vdisplay); dev_dbg(tc->dev, "H margin %d,%d sync %d\n", @@ -668,13 +676,18 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) dev_dbg(tc->dev, "total: %dx%d\n", mode->htotal, mode->vtotal); - /* LCD Ctl Frame Size */ - tc_write(VPCTRL0, (0x40 << 20) /* VSDELAY */ | + /* + * LCD Ctl Frame Size + * datasheet is not clear of vsdelay in case of DPI + * assume we do not need any delay when DPI is a source of + * sync signals + */ + tc_write(VPCTRL0, (0 << 20) /* VSDELAY */ | OPXLFMT_RGB888 | FRMSYNC_DISABLED | MSF_DISABLED); - tc_write(HTIM01, (left_margin << 16) | /* H back porch */ - (hsync_len << 0)); /* Hsync */ - tc_write(HTIM02, (right_margin << 16) | /* H front porch */ - (mode->hdisplay << 0)); /* width */ + tc_write(HTIM01, (ALIGN(left_margin, 2) << 16) | /* H back porch */ + (ALIGN(hsync_len, 2) << 0)); /* Hsync */ + tc_write(HTIM02, (ALIGN(right_margin, 2) << 16) | /* H front porch */ + (ALIGN(mode->hdisplay, 2) << 0)); /* width */ tc_write(VTIM01, (upper_margin << 16) | /* V back porch */ (vsync_len << 0)); /* Vsync */ tc_write(VTIM02, (lower_margin << 16) | /* V front porch */ @@ -693,7 +706,7 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) /* DP Main Stream Attributes */ vid_sync_dly = hsync_len + left_margin + mode->hdisplay; tc_write(DP0_VIDSYNCDELAY, - (0x003e << 16) | /* thresh_dly */ + (max_tu_symbol << 16) | /* thresh_dly */ (vid_sync_dly << 0)); tc_write(DP0_TOTALVAL, (mode->vtotal << 16) | (mode->htotal)); @@ -709,13 +722,6 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) tc_write(DPIPXLFMT, VS_POL_ACTIVE_LOW | HS_POL_ACTIVE_LOW | DE_POL_ACTIVE_HIGH | SUB_CFG_TYPE_CONFIG1 | DPI_BPP_RGB888); - /* - * Recommended maximum number of symbols transferred in a transfer unit: - * DIV_ROUND_UP((input active video bandwidth in bytes) * tu_size, - * (output active video bandwidth in bytes)) - * Must be less than tu_size. - */ - max_tu_symbol = TU_SIZE_RECOMMENDED - 1; tc_write(DP0_MISC, (max_tu_symbol << 23) | (TU_SIZE_RECOMMENDED << 16) | BPC_8); From 8ae615fecee51e7ec7ee62898b2d7072764e18d6 Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:23 +0300 Subject: [PATCH 274/705] drm/bridge: tc358767: fix AUXDATAn registers access [ Upstream commit 9217c1abbc145a77d65c476cf2004a3df02104c7 ] First four bytes should go to DP0_AUXWDATA0. Due to bug if len > 4 first four bytes was writen to DP0_AUXWDATA1 and all data get shifted by 4 bytes. Fix it. Acked-by: Philipp Zabel Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-6-git-send-email-andrey.gusakov@cogentembedded.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/tc358767.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 821b93f68eef..6d0772864935 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -318,7 +318,7 @@ static ssize_t tc_aux_transfer(struct drm_dp_aux *aux, tmp = (tmp << 8) | buf[i]; i++; if (((i % 4) == 0) || (i == size)) { - tc_write(DP0_AUXWDATA(i >> 2), tmp); + tc_write(DP0_AUXWDATA((i - 1) >> 2), tmp); tmp = 0; } } From f7170eb80aff7daf2221c94a92ba88dae5f971be Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:24 +0300 Subject: [PATCH 275/705] drm/bridge: tc358767: fix 1-lane behavior [ Upstream commit 4dbd6c03fbf88299c573d676838896c6e06aade2 ] Use drm_dp_channel_eq_ok helper Acked-by: Philipp Zabel Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-7-git-send-email-andrey.gusakov@cogentembedded.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/tc358767.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 6d0772864935..f64f35cdc2ff 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -819,8 +819,6 @@ static int tc_main_link_setup(struct tc_data *tc) unsigned int rate; u32 dp_phy_ctrl; int timeout; - bool aligned; - bool ready; u32 value; int ret; u8 tmp[8]; @@ -965,16 +963,15 @@ static int tc_main_link_setup(struct tc_data *tc) ret = drm_dp_dpcd_read_link_status(aux, tmp + 2); if (ret < 0) goto err_dpcd_read; - ready = (tmp[2] == ((DP_CHANNEL_EQ_BITS << 4) | /* Lane1 */ - DP_CHANNEL_EQ_BITS)); /* Lane0 */ - aligned = tmp[4] & DP_INTERLANE_ALIGN_DONE; - } while ((--timeout) && !(ready && aligned)); + } while ((--timeout) && + !(drm_dp_channel_eq_ok(tmp + 2, tc->link.base.num_lanes))); if (timeout == 0) { /* Read DPCD 0x200-0x201 */ ret = drm_dp_dpcd_read(aux, DP_SINK_COUNT, tmp, 2); if (ret < 0) goto err_dpcd_read; + dev_err(dev, "channel(s) EQ not ok\n"); dev_info(dev, "0x0200 SINK_COUNT: 0x%02x\n", tmp[0]); dev_info(dev, "0x0201 DEVICE_SERVICE_IRQ_VECTOR: 0x%02x\n", tmp[1]); @@ -985,10 +982,6 @@ static int tc_main_link_setup(struct tc_data *tc) dev_info(dev, "0x0206 ADJUST_REQUEST_LANE0_1: 0x%02x\n", tmp[6]); - if (!ready) - dev_err(dev, "Lane0/1 not ready\n"); - if (!aligned) - dev_err(dev, "Lane0/1 not aligned\n"); return -EAGAIN; } From 9301165c46234b06f482589dddd765e33d1136f4 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 24 Sep 2017 08:01:03 +0200 Subject: [PATCH 276/705] drm/omap: Fix error handling path in 'omap_dmm_probe()' [ Upstream commit 8677b1ac2db021ab30bb1fa34f1e56ebe0051ec3 ] If we don't find a matching device node, we must free the memory allocated in 'omap_dmm' a few lines above. Fixes: 7cb0d6c17b96 ("drm/omap: fix TILER on OMAP5") Signed-off-by: Christophe JAILLET Signed-off-by: Tomi Valkeinen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/omapdrm/omap_dmm_tiler.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c index 4ceed7a9762f..4b83e9eeab06 100644 --- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c +++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c @@ -638,7 +638,8 @@ static int omap_dmm_probe(struct platform_device *dev) match = of_match_node(dmm_of_match, dev->dev.of_node); if (!match) { dev_err(&dev->dev, "failed to find matching device node\n"); - return -ENODEV; + ret = -ENODEV; + goto fail; } omap_dmm->plat_data = match->data; From d96024440ee5c6126bd080e84e6e3ae5a89a39c5 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 27 Nov 2017 09:50:17 -0800 Subject: [PATCH 277/705] xfs: ubsan fixes [ Upstream commit 22a6c83777ac7c17d6c63891beeeac24cf5da450 ] Fix some complaints from the UBSAN about signed integer addition overflows. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_aops.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index d31cd1ebd8e9..f3acecf3869d 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -391,7 +391,7 @@ xfs_map_blocks( (ip->i_df.if_flags & XFS_IFEXTENTS)); ASSERT(offset <= mp->m_super->s_maxbytes); - if (offset + count > mp->m_super->s_maxbytes) + if ((xfs_ufsize_t)offset + count > mp->m_super->s_maxbytes) count = mp->m_super->s_maxbytes - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); offset_fsb = XFS_B_TO_FSBT(mp, offset); @@ -1295,7 +1295,7 @@ xfs_map_trim_size( if (mapping_size > size) mapping_size = size; if (offset < i_size_read(inode) && - offset + mapping_size >= i_size_read(inode)) { + (xfs_ufsize_t)offset + mapping_size >= i_size_read(inode)) { /* limit mapping to block that spans EOF */ mapping_size = roundup_64(i_size_read(inode) - offset, i_blocksize(inode)); @@ -1347,7 +1347,7 @@ __xfs_get_blocks( lockmode = xfs_ilock_data_map_shared(ip); ASSERT(offset <= mp->m_super->s_maxbytes); - if (offset + size > mp->m_super->s_maxbytes) + if ((xfs_ufsize_t)offset + size > mp->m_super->s_maxbytes) size = mp->m_super->s_maxbytes - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); offset_fsb = XFS_B_TO_FSBT(mp, offset); From fde77c712ac003f023ea6ba3da30a0284c71ad92 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Tue, 28 Nov 2017 08:54:10 -0800 Subject: [PATCH 278/705] xfs: Properly retry failed dquot items in case of error during buffer writeback [ Upstream commit 373b0589dc8d58bc09c9a28d03611ae4fb216057 ] Once the inode item writeback errors is already fixed, it's time to fix the same problem in dquot code. Although there were no reports of users hitting this bug in dquot code (at least none I've seen), the bug is there and I was already planning to fix it when the correct approach to fix the inodes part was decided. This patch aims to fix the same problem in dquot code, regarding failed buffers being unable to be resubmitted once they are flush locked. Tested with the recently test-case sent to fstests list by Hou Tao. Reviewed-by: Brian Foster Signed-off-by: Carlos Maiolino Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_dquot.c | 14 +++++++++++--- fs/xfs/xfs_dquot_item.c | 40 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 9d06cc30e875..7a7b3ccf2273 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -1004,14 +1004,22 @@ xfs_qm_dqflush_done( * holding the lock before removing the dquot from the AIL. */ if ((lip->li_flags & XFS_LI_IN_AIL) && - lip->li_lsn == qip->qli_flush_lsn) { + ((lip->li_lsn == qip->qli_flush_lsn) || + (lip->li_flags & XFS_LI_FAILED))) { /* xfs_trans_ail_delete() drops the AIL lock. */ spin_lock(&ailp->xa_lock); - if (lip->li_lsn == qip->qli_flush_lsn) + if (lip->li_lsn == qip->qli_flush_lsn) { xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE); - else + } else { + /* + * Clear the failed state since we are about to drop the + * flush lock + */ + if (lip->li_flags & XFS_LI_FAILED) + xfs_clear_li_failed(lip); spin_unlock(&ailp->xa_lock); + } } /* diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 2c7a1629e064..664dea105e76 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -137,6 +137,26 @@ xfs_qm_dqunpin_wait( wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0)); } +/* + * Callback used to mark a buffer with XFS_LI_FAILED when items in the buffer + * have been failed during writeback + * + * this informs the AIL that the dquot is already flush locked on the next push, + * and acquires a hold on the buffer to ensure that it isn't reclaimed before + * dirty data makes it to disk. + */ +STATIC void +xfs_dquot_item_error( + struct xfs_log_item *lip, + struct xfs_buf *bp) +{ + struct xfs_dquot *dqp; + + dqp = DQUOT_ITEM(lip)->qli_dquot; + ASSERT(!completion_done(&dqp->q_flush)); + xfs_set_li_failed(lip, bp); +} + STATIC uint xfs_qm_dquot_logitem_push( struct xfs_log_item *lip, @@ -144,13 +164,28 @@ xfs_qm_dquot_logitem_push( __acquires(&lip->li_ailp->xa_lock) { struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; - struct xfs_buf *bp = NULL; + struct xfs_buf *bp = lip->li_buf; uint rval = XFS_ITEM_SUCCESS; int error; if (atomic_read(&dqp->q_pincount) > 0) return XFS_ITEM_PINNED; + /* + * The buffer containing this item failed to be written back + * previously. Resubmit the buffer for IO + */ + if (lip->li_flags & XFS_LI_FAILED) { + if (!xfs_buf_trylock(bp)) + return XFS_ITEM_LOCKED; + + if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list)) + rval = XFS_ITEM_FLUSHING; + + xfs_buf_unlock(bp); + return rval; + } + if (!xfs_dqlock_nowait(dqp)) return XFS_ITEM_LOCKED; @@ -242,7 +277,8 @@ static const struct xfs_item_ops xfs_dquot_item_ops = { .iop_unlock = xfs_qm_dquot_logitem_unlock, .iop_committed = xfs_qm_dquot_logitem_committed, .iop_push = xfs_qm_dquot_logitem_push, - .iop_committing = xfs_qm_dquot_logitem_committing + .iop_committing = xfs_qm_dquot_logitem_committing, + .iop_error = xfs_dquot_item_error }; /* From fa64914313c1437c7ba843581a0b09bc2870bec3 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Fri, 17 Nov 2017 19:14:55 -0200 Subject: [PATCH 279/705] scsi: aacraid: Prevent crash in case of free interrupt during scsi EH path [ Upstream commit e4717292ddebcfe231651b5aff9fa19ca158d178 ] As part of the scsi EH path, aacraid performs a reinitialization of the adapter, which encompass freeing resources and IRQs, NULLifying lots of pointers, and then initialize it all over again. We've identified a problem during the free IRQ portion of this path if CONFIG_DEBUG_SHIRQ is enabled on kernel config file. Happens that, in case this flag was set, right after free_irq() effectively clears the interrupt, it checks if it was requested as IRQF_SHARED. In positive case, it performs another call to the IRQ handler on driver. Problem is: since aacraid currently free some resources *before* freeing the IRQ, once free_irq() path calls the handler again (due to CONFIG_DEBUG_SHIRQ), aacraid crashes due to NULL pointer dereference with the following trace: aac_src_intr_message+0xf8/0x740 [aacraid] __free_irq+0x33c/0x4a0 free_irq+0x78/0xb0 aac_free_irq+0x13c/0x150 [aacraid] aac_reset_adapter+0x2e8/0x970 [aacraid] aac_eh_reset+0x3a8/0x5d0 [aacraid] scsi_try_host_reset+0x74/0x180 scsi_eh_ready_devs+0xc70/0x1510 scsi_error_handler+0x624/0xa20 This patch prevents the crash by changing the order of the deinitialization in this path of aacraid: first we clear the IRQ, then we free other resources. No functional change intended. Signed-off-by: Guilherme G. Piccoli Reviewed-by: Raghava Aditya Renukunta Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/aacraid/commsup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index 0aeecec1f5ea..e2962f15c189 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -1416,13 +1416,13 @@ static int _aac_reset_adapter(struct aac_dev *aac, int forced) * will ensure that i/o is queisced and the card is flushed in that * case. */ + aac_free_irq(aac); aac_fib_map_free(aac); pci_free_consistent(aac->pdev, aac->comm_size, aac->comm_addr, aac->comm_phys); aac->comm_addr = NULL; aac->comm_phys = 0; kfree(aac->queues); aac->queues = NULL; - aac_free_irq(aac); kfree(aac->fsa_dev); aac->fsa_dev = NULL; quirks = aac_get_driver_ident(index)->quirks; From a248dc6a55b782ec8a86e3a7ee2750e60f428c45 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 20 Nov 2017 08:12:29 -0600 Subject: [PATCH 280/705] scsi: ufs: ufshcd: fix potential NULL pointer dereference in ufshcd_config_vreg [ Upstream commit 727535903bea924c4f73abb202c4b3e85fff0ca4 ] _vreg_ is being dereferenced before it is null checked, hence there is a potential null pointer dereference. Fix this by moving the pointer dereference after _vreg_ has been null checked. This issue was detected with the help of Coccinelle. Fixes: aa4976130934 ("ufs: Add regulator enable support") Signed-off-by: Gustavo A. R. Silva Reviewed-by: Subhash Jadavani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ufs/ufshcd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 530034bc2d13..2e9341233f66 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -5327,12 +5327,15 @@ static int ufshcd_config_vreg(struct device *dev, struct ufs_vreg *vreg, bool on) { int ret = 0; - struct regulator *reg = vreg->reg; - const char *name = vreg->name; + struct regulator *reg; + const char *name; int min_uV, uA_load; BUG_ON(!vreg); + reg = vreg->reg; + name = vreg->name; + if (regulator_count_voltages(reg) > 0) { min_uV = on ? vreg->min_uV : 0; ret = regulator_set_voltage(reg, min_uV, vreg->max_uV); From 9adb2a0f9a470b59ccca26e07ed279c11200f0db Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 15 Nov 2017 14:12:30 +0200 Subject: [PATCH 281/705] iwlwifi: mvm: fix the TX queue hang timeout for MONITOR vif type [ Upstream commit d1b275ffec459c5ae12b5c7086c84175696e5a9f ] The MONITOR type is missing in the interface type switch. Add it. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/mvm/utils.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index d04babd99b53..ff5ce1ed03c4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -1040,6 +1040,8 @@ unsigned int iwl_mvm_get_wd_timeout(struct iwl_mvm *mvm, return le32_to_cpu(txq_timer->p2p_go); case NL80211_IFTYPE_P2P_DEVICE: return le32_to_cpu(txq_timer->p2p_device); + case NL80211_IFTYPE_MONITOR: + return default_timeout; default: WARN_ON(1); return mvm->cfg->base_params->wd_timeout; From c16c193e3abcf6ce98ac6d1e87c796070fc4fbb7 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 7 Nov 2017 11:10:29 -0800 Subject: [PATCH 282/705] ARM: dts: NSP: Fix PPI interrupt types [ Upstream commit 5f1aa51c7a1eef1c5a60b8334e32c89904964245 ] Booting a kernel results in the kernel warning us about the following PPI interrupts configuration: [ 0.105127] smp: Bringing up secondary CPUs ... [ 0.110545] GIC: PPI11 is secure or misconfigured [ 0.110551] GIC: PPI13 is secure or misconfigured Fix this by using the appropriate edge configuration for PPI11 and PPI13, this is similar to what was fixed for Northstar (BCM5301X) in commit 0e34079cd1f6 ("ARM: dts: BCM5301X: Correct GIC_PPI interrupt flags"). Fixes: 7b2e987de207 ("ARM: NSP: add minimal Northstar Plus device tree") Fixes: 1a9d53cabaf4 ("ARM: dts: NSP: Add TWD Support to DT") Acked-by: Jon Mason Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/bcm-nsp.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/bcm-nsp.dtsi b/arch/arm/boot/dts/bcm-nsp.dtsi index 7c9e0fae9bb9..65e0db1d3bd7 100644 --- a/arch/arm/boot/dts/bcm-nsp.dtsi +++ b/arch/arm/boot/dts/bcm-nsp.dtsi @@ -85,7 +85,7 @@ timer@20200 { compatible = "arm,cortex-a9-global-timer"; reg = <0x20200 0x100>; - interrupts = ; + interrupts = ; clocks = <&periph_clk>; }; @@ -93,7 +93,7 @@ compatible = "arm,cortex-a9-twd-timer"; reg = <0x20600 0x20>; interrupts = ; + IRQ_TYPE_EDGE_RISING)>; clocks = <&periph_clk>; }; From b4bfc8ef594a459a23034682a464b6212408f3f0 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Sun, 16 Apr 2017 02:51:16 -0400 Subject: [PATCH 283/705] media: usbtv: add a new usbid [ Upstream commit 04226916d2360f56d57ad00bc48d2d1854d1e0b0 ] A new usbid of UTV007 is found in a newly bought device. The usbid is 1f71:3301. The ID on the chip is: UTV007 A89029.1 1520L18K1 Both video and audio is tested with the modified usbtv driver. Signed-off-by: Icenowy Zheng Acked-by: Lubomir Rintel Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/usbtv/usbtv-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/usb/usbtv/usbtv-core.c b/drivers/media/usb/usbtv/usbtv-core.c index dc76fd41e00f..0324633ede42 100644 --- a/drivers/media/usb/usbtv/usbtv-core.c +++ b/drivers/media/usb/usbtv/usbtv-core.c @@ -141,6 +141,7 @@ static void usbtv_disconnect(struct usb_interface *intf) static struct usb_device_id usbtv_id_table[] = { { USB_DEVICE(0x1b71, 0x3002) }, + { USB_DEVICE(0x1f71, 0x3301) }, {} }; MODULE_DEVICE_TABLE(usb, usbtv_id_table); From 0e216b0a0f74b06da20c03576daf6e75a77209f5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 14 Nov 2017 16:18:28 +0000 Subject: [PATCH 284/705] usb: gadget: don't dereference g until after it has been null checked [ Upstream commit b2fc059fa549fe6881d4c1f8d698b0f50bcd16ec ] Avoid dereferencing pointer g until after g has been sanity null checked; move the assignment of cdev much later when it is required into a more local scope. Detected by CoverityScan, CID#1222135 ("Dereference before null check") Fixes: b785ea7ce662 ("usb: gadget: composite: fix ep->maxburst initialization") Signed-off-by: Colin Ian King Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 325bf21ba13b..406758ed0b23 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -150,7 +150,6 @@ int config_ep_by_speed(struct usb_gadget *g, struct usb_function *f, struct usb_ep *_ep) { - struct usb_composite_dev *cdev = get_gadget_data(g); struct usb_endpoint_descriptor *chosen_desc = NULL; struct usb_descriptor_header **speed_desc = NULL; @@ -229,8 +228,12 @@ ep_found: _ep->maxburst = comp_desc->bMaxBurst + 1; break; default: - if (comp_desc->bMaxBurst != 0) + if (comp_desc->bMaxBurst != 0) { + struct usb_composite_dev *cdev; + + cdev = get_gadget_data(g); ERROR(cdev, "ep0 bMaxBurst must be 0\n"); + } _ep->maxburst = 1; break; } From ace1911b7620af13e83621049e3df9a53d38fd4e Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sat, 25 Nov 2017 13:32:38 -0600 Subject: [PATCH 285/705] staging: rtl8188eu: Fix incorrect response to SIOCGIWESSID [ Upstream commit b77992d2df9e47144354d1b25328b180afa33442 ] When not associated with an AP, wifi device drivers should respond to the SIOCGIWESSID ioctl with a zero-length string for the SSID, which is the behavior expected by dhcpcd. Currently, this driver returns an error code (-1) from the ioctl call, which causes dhcpcd to assume that the device is not a wireless interface and therefore it fails to work correctly with it thereafter. This problem was reported and tested at https://github.com/lwfinger/rtl8188eu/issues/234. Signed-off-by: Larry Finger Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/os_dep/ioctl_linux.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c index 4de9dbc93380..c7bf8ab26192 100644 --- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c @@ -1397,19 +1397,13 @@ static int rtw_wx_get_essid(struct net_device *dev, if ((check_fwstate(pmlmepriv, _FW_LINKED)) || (check_fwstate(pmlmepriv, WIFI_ADHOC_MASTER_STATE))) { len = pcur_bss->Ssid.SsidLength; - - wrqu->essid.length = len; - memcpy(extra, pcur_bss->Ssid.Ssid, len); - - wrqu->essid.flags = 1; } else { - ret = -1; - goto exit; + len = 0; + *extra = 0; } - -exit: - + wrqu->essid.length = len; + wrqu->essid.flags = 1; return ret; } From f94b238fb856432ae84b18ddde23518b5e5e358b Mon Sep 17 00:00:00 2001 From: Stefan Schake Date: Fri, 29 Dec 2017 17:05:43 +0100 Subject: [PATCH 286/705] drm/vc4: Move IRQ enable to PM path [ Upstream commit ce9caf2f79a5aa170a4b6456a03db639eed9c988 ] We were calling enable_irq on bind, where it was already enabled previously by the IRQ helper. Additionally, dev->irq is not set correctly until after postinstall and so was always zero here, triggering a warning in 4.15. Fix both by moving the enable to the power management resume path, where we know there was a previous disable invocation during suspend. Fixes: 253696ccd613 ("drm/vc4: Account for interrupts in flight") Signed-off-by: Stefan Schake Signed-off-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/1514563543-32511-1-git-send-email-stschake@gmail.com Tested-by: Stefan Wahren Reviewed-by: Eric Anholt Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vc4/vc4_irq.c | 3 --- drivers/gpu/drm/vc4/vc4_v3d.c | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c index d45a7c0a7915..d96c084d3a76 100644 --- a/drivers/gpu/drm/vc4/vc4_irq.c +++ b/drivers/gpu/drm/vc4/vc4_irq.c @@ -208,9 +208,6 @@ vc4_irq_postinstall(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); - /* Undo the effects of a previous vc4_irq_uninstall. */ - enable_irq(dev->irq); - /* Enable both the render done and out of memory interrupts. */ V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS); diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c index 7cc346ad9b0b..ce7c21d250cf 100644 --- a/drivers/gpu/drm/vc4/vc4_v3d.c +++ b/drivers/gpu/drm/vc4/vc4_v3d.c @@ -173,6 +173,9 @@ static int vc4_v3d_runtime_resume(struct device *dev) struct vc4_dev *vc4 = v3d->vc4; vc4_v3d_init_hw(vc4->dev); + + /* We disabled the IRQ as part of vc4_irq_uninstall in suspend. */ + enable_irq(vc4->dev->irq); vc4_irq_postinstall(vc4->dev); return 0; From 383e0620b70bc654619c12faf3fd3b2384a42a4c Mon Sep 17 00:00:00 2001 From: Dmitry Eremin Date: Thu, 25 Jan 2018 16:51:04 +0300 Subject: [PATCH 287/705] staging: lustre: separate a connection destroy from free struct kib_conn commit 9b046013e5837f8a58453d1e9f8e01d03adb7fe7 upstream. The logic of the original commit 4d99b2581eff ("staging: lustre: avoid intensive reconnecting for ko2iblnd") was assumed conditional free of struct kib_conn if the second argument free_conn in function kiblnd_destroy_conn(struct kib_conn *conn, bool free_conn) is true. But this hunk of code was dropped from original commit. As result the logic works wrong and current code use struct kib_conn after free. > drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c > 3317 kiblnd_destroy_conn(conn, !peer); > ^^^^ Freed always (but should be conditionally) > 3318 > 3319 spin_lock_irqsave(lock, flags); > 3320 if (!peer) > 3321 continue; > 3322 > 3323 conn->ibc_peer = peer; > ^^^^^^^^^^^^^^ Use after free > 3324 if (peer->ibp_reconnected < KIB_RECONN_HIGH_RACE) > 3325 list_add_tail(&conn->ibc_list, > ^^^^^^^^^^^^^^ Use after free > 3326 &kiblnd_data.kib_reconn_list); > 3327 else > 3328 list_add_tail(&conn->ibc_list, > ^^^^^^^^^^^^^^ Use after free > 3329 &kiblnd_data.kib_reconn_wait); To avoid confusion this fix moved the freeing a struct kib_conn outside of the function kiblnd_destroy_conn() and free as it was intended in original commit. Fixes: 4d99b2581eff ("staging: lustre: avoid intensive reconnecting for ko2iblnd") Signed-off-by: Dmitry Eremin Reviewed-by: Andreas Dilger Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c | 7 +++---- drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h | 2 +- drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c | 6 ++++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c index 9e8802181452..e8d9db4d8179 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c @@ -824,14 +824,15 @@ struct kib_conn *kiblnd_create_conn(struct kib_peer *peer, struct rdma_cm_id *cm return conn; failed_2: - kiblnd_destroy_conn(conn, true); + kiblnd_destroy_conn(conn); + LIBCFS_FREE(conn, sizeof(*conn)); failed_1: LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr)); failed_0: return NULL; } -void kiblnd_destroy_conn(struct kib_conn *conn, bool free_conn) +void kiblnd_destroy_conn(struct kib_conn *conn) { struct rdma_cm_id *cmid = conn->ibc_cmid; struct kib_peer *peer = conn->ibc_peer; @@ -894,8 +895,6 @@ void kiblnd_destroy_conn(struct kib_conn *conn, bool free_conn) rdma_destroy_id(cmid); atomic_dec(&net->ibn_nconns); } - - LIBCFS_FREE(conn, sizeof(*conn)); } int kiblnd_close_peer_conns_locked(struct kib_peer *peer, int why) diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h index 14576977200f..30cb2f5b3c15 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h @@ -1018,7 +1018,7 @@ int kiblnd_close_peer_conns_locked(struct kib_peer *peer, int why); struct kib_conn *kiblnd_create_conn(struct kib_peer *peer, struct rdma_cm_id *cmid, int state, int version); -void kiblnd_destroy_conn(struct kib_conn *conn, bool free_conn); +void kiblnd_destroy_conn(struct kib_conn *conn); void kiblnd_close_conn(struct kib_conn *conn, int error); void kiblnd_close_conn_locked(struct kib_conn *conn, int error); diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c index 995f2dac7f26..ea9a0c21d29d 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -3323,11 +3323,13 @@ kiblnd_connd(void *arg) spin_unlock_irqrestore(lock, flags); dropped_lock = 1; - kiblnd_destroy_conn(conn, !peer); + kiblnd_destroy_conn(conn); spin_lock_irqsave(lock, flags); - if (!peer) + if (!peer) { + kfree(conn); continue; + } conn->ibc_peer = peer; if (peer->ibp_reconnected < KIB_RECONN_HIGH_RACE) From 55eaecffe3d663d02084023b9fc06d5f39b97389 Mon Sep 17 00:00:00 2001 From: Gaurav Kohli Date: Tue, 23 Jan 2018 13:16:34 +0530 Subject: [PATCH 288/705] tty: fix data race between tty_init_dev and flush of buf commit b027e2298bd588d6fa36ed2eda97447fb3eac078 upstream. There can be a race, if receive_buf call comes before tty initialization completes in n_tty_open and tty->disc_data may be NULL. CPU0 CPU1 ---- ---- 000|n_tty_receive_buf_common() n_tty_open() -001|n_tty_receive_buf2() tty_ldisc_open.isra.3() -002|tty_ldisc_receive_buf(inline) tty_ldisc_setup() Using ldisc semaphore lock in tty_init_dev till disc_data initializes completely. Signed-off-by: Gaurav Kohli Reviewed-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 8 +++++++- drivers/tty/tty_ldisc.c | 4 ++-- include/linux/tty.h | 2 ++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 734a635e7363..8d9f9a803b42 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -1543,6 +1543,9 @@ struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx) "%s: %s driver does not set tty->port. This will crash the kernel later. Fix the driver!\n", __func__, tty->driver->name); + retval = tty_ldisc_lock(tty, 5 * HZ); + if (retval) + goto err_release_lock; tty->port->itty = tty; /* @@ -1553,6 +1556,7 @@ struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx) retval = tty_ldisc_setup(tty, tty->link); if (retval) goto err_release_tty; + tty_ldisc_unlock(tty); /* Return the tty locked so that it cannot vanish under the caller */ return tty; @@ -1565,9 +1569,11 @@ err_module_put: /* call the tty release_tty routine to clean out this slot */ err_release_tty: - tty_unlock(tty); + tty_ldisc_unlock(tty); tty_info_ratelimited(tty, "ldisc open failed (%d), clearing slot %d\n", retval, idx); +err_release_lock: + tty_unlock(tty); release_tty(tty, idx); return ERR_PTR(retval); } diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index b0500a0a87b8..3a9e2a2fd4c6 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -336,7 +336,7 @@ static inline void __tty_ldisc_unlock(struct tty_struct *tty) ldsem_up_write(&tty->ldisc_sem); } -static int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout) +int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout) { int ret; @@ -347,7 +347,7 @@ static int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout) return 0; } -static void tty_ldisc_unlock(struct tty_struct *tty) +void tty_ldisc_unlock(struct tty_struct *tty) { clear_bit(TTY_LDISC_HALTED, &tty->flags); __tty_ldisc_unlock(tty); diff --git a/include/linux/tty.h b/include/linux/tty.h index 40144f382516..a41244fe58d0 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -394,6 +394,8 @@ extern struct tty_struct *get_current_tty(void); /* tty_io.c */ extern int __init tty_init(void); extern const char *tty_name(const struct tty_struct *tty); +extern int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout); +extern void tty_ldisc_unlock(struct tty_struct *tty); #else static inline void console_init(void) { } From 2ef0d2ad5ce80410a1f770cf765d3ce6a9ba45e2 Mon Sep 17 00:00:00 2001 From: OKAMOTO Yoshiaki Date: Tue, 16 Jan 2018 09:51:17 +0000 Subject: [PATCH 289/705] usb: option: Add support for FS040U modem commit 69341bd15018da0a662847e210f9b2380c71e623 upstream. FS040U modem is manufactured by omega, and sold by Fujisoft. This patch adds ID of the modem to use option1 driver. Interface 3 is used as qmi_wwan, so the interface is ignored. Signed-off-by: Yoshiaki Okamoto Signed-off-by: Hiroyuki Yamamoto Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index a818c43a02ec..1799aa058a5b 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -383,6 +383,9 @@ static void option_instat_callback(struct urb *urb); #define FOUR_G_SYSTEMS_PRODUCT_W14 0x9603 #define FOUR_G_SYSTEMS_PRODUCT_W100 0x9b01 +/* Fujisoft products */ +#define FUJISOFT_PRODUCT_FS040U 0x9b02 + /* iBall 3.5G connect wireless modem */ #define IBALL_3_5G_CONNECT 0x9605 @@ -1897,6 +1900,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W100), .driver_info = (kernel_ulong_t)&four_g_w100_blacklist }, + {USB_DEVICE(LONGCHEER_VENDOR_ID, FUJISOFT_PRODUCT_FS040U), + .driver_info = (kernel_ulong_t)&net_intf3_blacklist}, { USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, SPEEDUP_PRODUCT_SU9800, 0xff) }, { USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, 0x9801, 0xff), .driver_info = (kernel_ulong_t)&net_intf3_blacklist }, From 068cc4ad2b233eaff1d60552353daee8c047f5e2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 25 Jan 2018 09:48:55 +0100 Subject: [PATCH 290/705] USB: serial: pl2303: new device id for Chilitag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d08dd3f3dd2ae351b793fc5b76abdbf0fd317b12 upstream. This adds a new device id for Chilitag devices to the pl2303 driver. Reported-by: "Chu.Mike [朱堅宜]" Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/pl2303.c | 1 + drivers/usb/serial/pl2303.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index a51b28379850..3da25ad267a2 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -39,6 +39,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_RSAQ2) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_DCU11) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_RSAQ3) }, + { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_CHILITAG) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_PHAROS) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_ALDIGA) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_MMX) }, diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index 3b5a15d1dc0d..123289085ee2 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -17,6 +17,7 @@ #define PL2303_PRODUCT_ID_DCU11 0x1234 #define PL2303_PRODUCT_ID_PHAROS 0xaaa0 #define PL2303_PRODUCT_ID_RSAQ3 0xaaa2 +#define PL2303_PRODUCT_ID_CHILITAG 0xaaa8 #define PL2303_PRODUCT_ID_ALDIGA 0x0611 #define PL2303_PRODUCT_ID_MMX 0x0612 #define PL2303_PRODUCT_ID_GPRS 0x0609 From c3b1f3137751ca44d90a416f741794465641b522 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 14 Jan 2018 16:09:00 +0100 Subject: [PATCH 291/705] USB: cdc-acm: Do not log urb submission errors on disconnect commit f0386c083c2ce85284dc0b419d7b89c8e567c09f upstream. When disconnected sometimes the cdc-acm driver logs errors like these: [20278.039417] cdc_acm 2-2:2.1: urb 9 failed submission with -19 [20278.042924] cdc_acm 2-2:2.1: urb 10 failed submission with -19 [20278.046449] cdc_acm 2-2:2.1: urb 11 failed submission with -19 [20278.049920] cdc_acm 2-2:2.1: urb 12 failed submission with -19 [20278.053442] cdc_acm 2-2:2.1: urb 13 failed submission with -19 [20278.056915] cdc_acm 2-2:2.1: urb 14 failed submission with -19 [20278.060418] cdc_acm 2-2:2.1: urb 15 failed submission with -19 Silence these by not logging errors when the result is -ENODEV. Signed-off-by: Hans de Goede Acked-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index ea20b2cc189f..ec8fb55d85f8 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -375,7 +375,7 @@ static int acm_submit_read_urb(struct acm *acm, int index, gfp_t mem_flags) res = usb_submit_urb(acm->read_urbs[index], mem_flags); if (res) { - if (res != -EPERM) { + if (res != -EPERM && res != -ENODEV) { dev_err(&acm->data->dev, "urb %d failed submission with %d\n", index, res); From aa6a93fd0c382cda250876825c981ed270059e50 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 18 Jan 2018 12:13:45 +0100 Subject: [PATCH 292/705] CDC-ACM: apply quirk for card reader commit df1cc78a52491f71d8170d513d0f6f114faa1bda upstream. This devices drops random bytes from messages if you talk to it too fast. Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index ec8fb55d85f8..34d23cc99fbd 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1706,6 +1706,9 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x0ace, 0x1611), /* ZyDAS 56K USB MODEM - new version */ .driver_info = SINGLE_RX_URB, /* firmware bug */ }, + { USB_DEVICE(0x11ca, 0x0201), /* VeriFone Mx870 Gadget Serial */ + .driver_info = SINGLE_RX_URB, + }, { USB_DEVICE(0x22b8, 0x7000), /* Motorola Q Phone */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, From ec719c52af164f2eaff0e5bff3b6074ded70c9bf Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Wed, 13 Dec 2017 20:34:36 +0800 Subject: [PATCH 293/705] USB: serial: io_edgeport: fix possible sleep-in-atomic commit c7b8f77872c73f69a16528a9eb87afefcccdc18b upstream. According to drivers/usb/serial/io_edgeport.c, the driver may sleep under a spinlock. The function call path is: edge_bulk_in_callback (acquire the spinlock) process_rcvd_data process_rcvd_status change_port_settings send_iosp_ext_cmd write_cmd_usb usb_kill_urb --> may sleep To fix it, the redundant usb_kill_urb() is removed from the error path after usb_submit_urb() fails. This possible bug is found by my static analysis tool (DSAC) and checked by my code review. Signed-off-by: Jia-Ju Bai Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/io_edgeport.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index 464db17b5328..de61271f2ba3 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c @@ -2215,7 +2215,6 @@ static int write_cmd_usb(struct edgeport_port *edge_port, /* something went wrong */ dev_err(dev, "%s - usb_submit_urb(write command) failed, status = %d\n", __func__, status); - usb_kill_urb(urb); usb_free_urb(urb); atomic_dec(&CmdUrbs); return status; From 4c6fcc3425e19d1f63fd5a7f4f4408ce8f688a48 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 17 Jan 2018 12:07:30 -0700 Subject: [PATCH 294/705] usbip: prevent bind loops on devices attached to vhci_hcd commit ef54cf0c600fb8f5737fb001a9e357edda1a1de8 upstream. usbip host binds to devices attached to vhci_hcd on the same server when user does attach over localhost or specifies the server as the remote. usbip attach -r localhost -b busid or usbip attach -r servername (or server IP) Unbind followed by bind works, however device is left in a bad state with accesses via the attached busid result in errors and system hangs during shutdown. Fix it to check and bail out if the device is already attached to vhci_hcd. Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/usb/usbip/src/usbip_bind.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/usb/usbip/src/usbip_bind.c b/tools/usb/usbip/src/usbip_bind.c index fa46141ae68b..e121cfb1746a 100644 --- a/tools/usb/usbip/src/usbip_bind.c +++ b/tools/usb/usbip/src/usbip_bind.c @@ -144,6 +144,7 @@ static int bind_device(char *busid) int rc; struct udev *udev; struct udev_device *dev; + const char *devpath; /* Check whether the device with this bus ID exists. */ udev = udev_new(); @@ -152,8 +153,16 @@ static int bind_device(char *busid) err("device with the specified bus ID does not exist"); return -1; } + devpath = udev_device_get_devpath(dev); udev_unref(udev); + /* If the device is already attached to vhci_hcd - bail out */ + if (strstr(devpath, USBIP_VHCI_DRV_NAME)) { + err("bind loop detected: device: %s is attached to %s\n", + devpath, USBIP_VHCI_DRV_NAME); + return -1; + } + rc = unbind_other(busid); if (rc == UNBIND_ST_FAILED) { err("could not unbind driver from device on busid %s", busid); From f80079536bb6b2c733776abb19c1f3e8c48923b6 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 17 Jan 2018 12:08:03 -0700 Subject: [PATCH 295/705] usbip: list: don't list devices attached to vhci_hcd commit ef824501f50846589f02173d73ce3fe6021a9d2a upstream. usbip host lists devices attached to vhci_hcd on the same server when user does attach over localhost or specifies the server as the remote. usbip attach -r localhost -b busid or usbip attach -r servername (or server IP) Fix it to check and not list devices that are attached to vhci_hcd. Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/usb/usbip/src/usbip_list.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/usb/usbip/src/usbip_list.c b/tools/usb/usbip/src/usbip_list.c index f1b38e866dd7..d65a9f444174 100644 --- a/tools/usb/usbip/src/usbip_list.c +++ b/tools/usb/usbip/src/usbip_list.c @@ -187,6 +187,7 @@ static int list_devices(bool parsable) const char *busid; char product_name[128]; int ret = -1; + const char *devpath; /* Create libudev context. */ udev = udev_new(); @@ -209,6 +210,14 @@ static int list_devices(bool parsable) path = udev_list_entry_get_name(dev_list_entry); dev = udev_device_new_from_syspath(udev, path); + /* Ignore devices attached to vhci_hcd */ + devpath = udev_device_get_devpath(dev); + if (strstr(devpath, USBIP_VHCI_DRV_NAME)) { + dbg("Skip the device %s already attached to %s\n", + devpath, USBIP_VHCI_DRV_NAME); + continue; + } + /* Get device information. */ idVendor = udev_device_get_sysattr_value(dev, "idVendor"); idProduct = udev_device_get_sysattr_value(dev, "idProduct"); From 800de0fab17ac3e8656c86214d23299fedbf6ca4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 18 Jan 2018 14:46:41 +1100 Subject: [PATCH 296/705] USB: serial: simple: add Motorola Tetra driver commit 46fe895e22ab3845515ec06b01eaf1282b342e29 upstream. Add new Motorola Tetra (simple) driver for Motorola Solutions TETRA PEI devices. D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=0cad ProdID=9011 Rev=24.16 S: Manufacturer=Motorola Solutions Inc. S: Product=Motorola Solutions TETRA PEI interface C: #Ifs= 2 Cfg#= 1 Atr=80 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) I: If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) Note that these devices do not support the CDC SET_CONTROL_LINE_STATE request (for any interface). Reported-by: Max Schulze Tested-by: Max Schulze Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/Kconfig | 1 + drivers/usb/serial/usb-serial-simple.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/usb/serial/Kconfig b/drivers/usb/serial/Kconfig index 56ecb8b5115d..584ae8cbaf1c 100644 --- a/drivers/usb/serial/Kconfig +++ b/drivers/usb/serial/Kconfig @@ -63,6 +63,7 @@ config USB_SERIAL_SIMPLE - Google USB serial devices - HP4x calculators - a number of Motorola phones + - Motorola Tetra devices - Novatel Wireless GPS receivers - Siemens USB/MPI adapter. - ViVOtech ViVOpay USB device. diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index e98b6e57b703..6aa7ff2c1cf7 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -80,6 +80,11 @@ DEVICE(vivopay, VIVOPAY_IDS); { USB_DEVICE(0x22b8, 0x2c64) } /* Motorola V950 phone */ DEVICE(moto_modem, MOTO_IDS); +/* Motorola Tetra driver */ +#define MOTOROLA_TETRA_IDS() \ + { USB_DEVICE(0x0cad, 0x9011) } /* Motorola Solutions TETRA PEI */ +DEVICE(motorola_tetra, MOTOROLA_TETRA_IDS); + /* Novatel Wireless GPS driver */ #define NOVATEL_IDS() \ { USB_DEVICE(0x09d7, 0x0100) } /* NovAtel FlexPack GPS */ @@ -110,6 +115,7 @@ static struct usb_serial_driver * const serial_drivers[] = { &google_device, &vivopay_device, &moto_modem_device, + &motorola_tetra_device, &novatel_gps_device, &hp4x_device, &suunto_device, @@ -125,6 +131,7 @@ static const struct usb_device_id id_table[] = { GOOGLE_IDS(), VIVOPAY_IDS(), MOTO_IDS(), + MOTOROLA_TETRA_IDS(), NOVATEL_IDS(), HP4X_IDS(), SUUNTO_IDS(), From f24d171a8100fb50a20fc6bb8c750fe9c452b9dc Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Tue, 9 Jan 2018 12:30:53 +0530 Subject: [PATCH 297/705] usb: f_fs: Prevent gadget unbind if it is already unbound commit ce5bf9a50daf2d9078b505aca1cea22e88ecb94a upstream. Upon usb composition switch there is possibility of ep0 file release happening after gadget driver bind. In case of composition switch from adb to a non-adb composition gadget will never gets bound again resulting into failure of usb device enumeration. Fix this issue by checking FFS_FL_BOUND flag and avoid extra gadget driver unbind if it is already done as part of composition switch. This fixes adb reconnection error reported on Android running v4.4 and above kernel versions. Verified on Hikey running vanilla v4.15-rc7 + few out of tree Mali patches. Reviewed-at: https://android-review.googlesource.com/#/c/582632/ Cc: Felipe Balbi Cc: Greg KH Cc: Michal Nazarewicz Cc: John Stultz Cc: Dmitry Shmidt Cc: Badhri Cc: Android Kernel Team Signed-off-by: Hemant Kumar [AmitP: Cherry-picked it from android-4.14 and updated the commit log] Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 7b107e43b1c4..d90bf57ba30e 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -3725,7 +3725,8 @@ static void ffs_closed(struct ffs_data *ffs) ci = opts->func_inst.group.cg_item.ci_parent->ci_parent; ffs_dev_unlock(); - unregister_gadget_item(ci); + if (test_bit(FFS_FL_BOUND, &ffs->flags)) + unregister_gadget_item(ci); return; done: ffs_dev_unlock(); From 92e64a1079fac557ebb48ce266d23070924109ef Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 11 Jan 2018 13:10:16 +0100 Subject: [PATCH 298/705] usb: uas: unconditionally bring back host after reset commit cbeef22fd611c4f47c494b821b2b105b8af970bb upstream. Quoting Hans: If we return 1 from our post_reset handler, then our disconnect handler will be called immediately afterwards. Since pre_reset blocks all scsi requests our disconnect handler will then hang in the scsi_remove_host call. This is esp. bad because our disconnect handler hanging for ever also stops the USB subsys from enumerating any new USB devices, causes commands like lsusb to hang, etc. In practice this happens when unplugging some uas devices because the hub code may see the device as needing a warm-reset and calls usb_reset_device before seeing the disconnect. In this case uas_configure_endpoints fails with -ENODEV. We do not want to print an error for this, so this commit also silences the shost_printk for -ENODEV. ENDQUOTE However, if we do that we better drop any unconditional execution and report to the SCSI subsystem that we have undergone a reset but we are not operational now. Signed-off-by: Oliver Neukum Reported-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 9876af4ab64e..6891e9092775 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -1076,20 +1076,19 @@ static int uas_post_reset(struct usb_interface *intf) return 0; err = uas_configure_endpoints(devinfo); - if (err) { + if (err && err != ENODEV) shost_printk(KERN_ERR, shost, "%s: alloc streams error %d after reset", __func__, err); - return 1; - } + /* we must unblock the host in every case lest we deadlock */ spin_lock_irqsave(shost->host_lock, flags); scsi_report_bus_reset(shost, 0); spin_unlock_irqrestore(shost->host_lock, flags); scsi_unblock_requests(shost); - return 0; + return err ? 1 : 0; } static int uas_suspend(struct usb_interface *intf, pm_message_t message) From 57d4bb1beecb1b34237ddc8a08e819a0da4243b1 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 12 Jan 2018 17:50:02 +1100 Subject: [PATCH 299/705] usb/gadget: Fix "high bandwidth" check in usb_gadget_ep_match_desc() commit 11fb37998759c48e4e4c200c974593cbeab25d3e upstream. The current code tries to test for bits that are masked out by usb_endpoint_maxp(). Instead, use the proper accessor to access the new high bandwidth bits. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index d685d82dcf48..e97539fc127e 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -913,7 +913,7 @@ int usb_gadget_ep_match_desc(struct usb_gadget *gadget, return 0; /* "high bandwidth" works only at high speed */ - if (!gadget_is_dualspeed(gadget) && usb_endpoint_maxp(desc) & (3<<11)) + if (!gadget_is_dualspeed(gadget) && usb_endpoint_maxp_mult(desc) > 1) return 0; switch (type) { From 9df847674ede722e12094def6520fd2ff98452df Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 26 Jan 2018 11:54:35 -0700 Subject: [PATCH 300/705] usbip: vhci_hcd: clear just the USB_PORT_STAT_POWER bit Upstream commit 1c9de5bf4286 ("usbip: vhci-hcd: Add USB3 SuperSpeed support") vhci_hcd clears all the bits port_status bits instead of clearing just the USB_PORT_STAT_POWER bit when it handles ClearPortFeature: USB_PORT_FEAT_POWER. This causes vhci_hcd attach to fail in a bad state, leaving device unusable by the client. The device is still attached and however client can't use it. The problem was fixed as part of larger change to add USB3 Super Speed support. This patch backports just the change to clear the USB_PORT_STAT_POWER. Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vhci_hcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index 7f161b095176..dbe615ba07c9 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -300,7 +300,7 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, case USB_PORT_FEAT_POWER: usbip_dbg_vhci_rh( " ClearPortFeature: USB_PORT_FEAT_POWER\n"); - dum->port_status[rhport] = 0; + dum->port_status[rhport] &= ~USB_PORT_STAT_POWER; dum->resuming = 0; break; case USB_PORT_FEAT_C_RESET: From 5846849a1ac75c4a4898f09f3835ac737bb888d5 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 4 Jan 2018 15:58:34 -0200 Subject: [PATCH 301/705] serial: imx: Only wakeup via RTSDEN bit if the system has RTS/CTS commit 38b1f0fb42f772b8c9aac53593883a18ff5eb9d7 upstream. The wakeup mechanism via RTSDEN bit relies on the system using the RTS/CTS lines, so only allow such wakeup method when the system actually has RTS/CTS support. Fixes: bc85734b126f ("serial: imx: allow waking up on RTSD") Signed-off-by: Fabio Estevam Reviewed-by: Martin Kaiser Acked-by: Fugang Duan Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index a70356dad1b7..521a6e450755 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -2239,12 +2239,14 @@ static void serial_imx_enable_wakeup(struct imx_port *sport, bool on) val &= ~UCR3_AWAKEN; writel(val, sport->port.membase + UCR3); - val = readl(sport->port.membase + UCR1); - if (on) - val |= UCR1_RTSDEN; - else - val &= ~UCR1_RTSDEN; - writel(val, sport->port.membase + UCR1); + if (sport->have_rtscts) { + val = readl(sport->port.membase + UCR1); + if (on) + val |= UCR1_RTSDEN; + else + val &= ~UCR1_RTSDEN; + writel(val, sport->port.membase + UCR1); + } } static int imx_serial_port_suspend_noirq(struct device *dev) From 1333c3e996eb799286ee2ef2c01752da45bf926f Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Sun, 7 Jan 2018 15:05:49 +0100 Subject: [PATCH 302/705] spi: imx: do not access registers while clocks disabled commit d593574aff0ab846136190b1729c151c736727ec upstream. Since clocks are disabled except during message transfer clocks are also disabled when spi_imx_remove gets called. Accessing registers leads to a freeeze at least on a i.MX 6ULL. Enable clocks before disabling accessing the MXC_CSPICTRL register. Fixes: 9e556dcc55774 ("spi: spi-imx: only enable the clocks when we start to transfer a message") Signed-off-by: Stefan Agner Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-imx.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index deb782f6556c..a6e34f05d44d 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -1307,12 +1307,23 @@ static int spi_imx_remove(struct platform_device *pdev) { struct spi_master *master = platform_get_drvdata(pdev); struct spi_imx_data *spi_imx = spi_master_get_devdata(master); + int ret; spi_bitbang_stop(&spi_imx->bitbang); + ret = clk_enable(spi_imx->clk_per); + if (ret) + return ret; + + ret = clk_enable(spi_imx->clk_ipg); + if (ret) { + clk_disable(spi_imx->clk_per); + return ret; + } + writel(0, spi_imx->base + MXC_CSPICTRL); - clk_unprepare(spi_imx->clk_ipg); - clk_unprepare(spi_imx->clk_per); + clk_disable_unprepare(spi_imx->clk_ipg); + clk_disable_unprepare(spi_imx->clk_per); spi_imx_sdma_exit(spi_imx); spi_master_put(master); From 331b057d4f3ccf2290e6e651b5728db81e9249c6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 3 Feb 2018 17:05:43 +0100 Subject: [PATCH 303/705] Linux 4.9.80 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4a7e6dff1c2e..9550b6939076 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 79 +SUBLEVEL = 80 EXTRAVERSION = NAME = Roaring Lionus From be6641a7e6f79b69446e7f1c44bab75bf20f1665 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 9 Jan 2018 03:52:05 +1100 Subject: [PATCH 304/705] powerpc/pseries: Add H_GET_CPU_CHARACTERISTICS flags & wrapper commit 191eccb1580939fb0d47deb405b82a85b0379070 upstream. A new hypervisor call has been defined to communicate various characteristics of the CPU to guests. Add definitions for the hcall number, flags and a wrapper function. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman [Balbir fixed conflicts in backport] Signed-off-by: Balbir Singh Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/hvcall.h | 17 +++++++++++++++++ arch/powerpc/include/asm/plpar_wrappers.h | 14 ++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 708edebcf147..0e12cb2437d1 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -240,6 +240,7 @@ #define H_GET_HCA_INFO 0x1B8 #define H_GET_PERF_COUNT 0x1BC #define H_MANAGE_TRACE 0x1C0 +#define H_GET_CPU_CHARACTERISTICS 0x1C8 #define H_FREE_LOGICAL_LAN_BUFFER 0x1D4 #define H_QUERY_INT_STATE 0x1E4 #define H_POLL_PENDING 0x1D8 @@ -306,6 +307,17 @@ #define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE 3 #define H_SET_MODE_RESOURCE_LE 4 +/* H_GET_CPU_CHARACTERISTICS return values */ +#define H_CPU_CHAR_SPEC_BAR_ORI31 (1ull << 63) // IBM bit 0 +#define H_CPU_CHAR_BCCTRL_SERIALISED (1ull << 62) // IBM bit 1 +#define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2 +#define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3 +#define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4 + +#define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0 +#define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1 +#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2 + #ifndef __ASSEMBLY__ /** @@ -433,6 +445,11 @@ static inline unsigned long cmo_get_page_size(void) } #endif /* CONFIG_PPC_PSERIES */ +struct h_cpu_char_result { + u64 character; + u64 behaviour; +}; + #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_HVCALL_H */ diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index 1b394247afc2..4e53b8570d1f 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -340,4 +340,18 @@ static inline long plapr_set_watchpoint0(unsigned long dawr0, unsigned long dawr return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR, dawr0, dawrx0); } +static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf); + if (rc == H_SUCCESS) { + p->character = retbuf[0]; + p->behaviour = retbuf[1]; + } + + return rc; +} + #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ From 8fd3f98d0f4d5a9044b900de950ab02164968d27 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: [PATCH 305/705] powerpc/64: Add macros for annotating the destination of rfid/hrfid commit 50e51c13b3822d14ff6df4279423e4b7b2269bc3 upstream. The rfid/hrfid ((Hypervisor) Return From Interrupt) instruction is used for switching from the kernel to userspace, and from the hypervisor to the guest kernel. However it can and is also used for other transitions, eg. from real mode kernel code to virtual mode kernel code, and it's not always clear from the code what the destination context is. To make it clearer when reading the code, add macros which encode the expected destination context. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/exception-64e.h | 6 +++++ arch/powerpc/include/asm/exception-64s.h | 29 ++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h index a703452d67b6..555e22d5e07f 100644 --- a/arch/powerpc/include/asm/exception-64e.h +++ b/arch/powerpc/include/asm/exception-64e.h @@ -209,5 +209,11 @@ exc_##label##_book3e: ori r3,r3,vector_offset@l; \ mtspr SPRN_IVOR##vector_number,r3; +#define RFI_TO_KERNEL \ + rfi + +#define RFI_TO_USER \ + rfi + #endif /* _ASM_POWERPC_EXCEPTION_64E_H */ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 9a3eee661297..6771cbe44594 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -51,6 +51,35 @@ #define EX_PPR 88 /* SMT thread status register (priority) */ #define EX_CTR 96 +/* Macros for annotating the expected destination of (h)rfid */ + +#define RFI_TO_KERNEL \ + rfid + +#define RFI_TO_USER \ + rfid + +#define RFI_TO_USER_OR_KERNEL \ + rfid + +#define RFI_TO_GUEST \ + rfid + +#define HRFI_TO_KERNEL \ + hrfid + +#define HRFI_TO_USER \ + hrfid + +#define HRFI_TO_USER_OR_KERNEL \ + hrfid + +#define HRFI_TO_GUEST \ + hrfid + +#define HRFI_TO_UNKNOWN \ + hrfid + #ifdef CONFIG_RELOCATABLE #define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \ mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ From 9d914324d966497f4d40cfa9333cbe55150cc09b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: [PATCH 306/705] powerpc/64: Convert fast_exception_return to use RFI_TO_USER/KERNEL commit a08f828cf47e6c605af21d2cdec68f84e799c318 upstream. Similar to the syscall return path, in fast_exception_return we may be returning to user or kernel context. We already have a test for that, because we conditionally restore r13. So use that existing test and branch, and bifurcate the return based on that. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/entry_64.S | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index caa659671599..d267d3bcde90 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -859,7 +859,7 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ACCOUNT_CPU_USER_EXIT(r13, r2, r4) REST_GPR(13, r1) -1: + mtspr SPRN_SRR1,r3 ld r2,_CCR(r1) @@ -872,8 +872,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ld r3,GPR3(r1) ld r4,GPR4(r1) ld r1,GPR1(r1) + RFI_TO_USER + b . /* prevent speculative execution */ - rfid +1: mtspr SPRN_SRR1,r3 + + ld r2,_CCR(r1) + mtcrf 0xFF,r2 + ld r2,_NIP(r1) + mtspr SPRN_SRR0,r2 + + ld r0,GPR0(r1) + ld r2,GPR2(r1) + ld r3,GPR3(r1) + ld r4,GPR4(r1) + ld r1,GPR1(r1) + RFI_TO_KERNEL b . /* prevent speculative execution */ #endif /* CONFIG_PPC_BOOK3E */ From 00e40620a51ebee4ea002ec2efcd64f1960cb964 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: [PATCH 307/705] powerpc/64: Convert the syscall exit path to use RFI_TO_USER/KERNEL commit b8e90cb7bc04a509e821e82ab6ed7a8ef11ba333 upstream. In the syscall exit path we may be returning to user or kernel context. We already have a test for that, because we conditionally restore r13. So use that existing test and branch, and bifurcate the return based on that. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/entry_64.S | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index d267d3bcde90..c33b69d10919 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -251,13 +251,23 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ld r13,GPR13(r1) /* only restore r13 if returning to usermode */ + ld r2,GPR2(r1) + ld r1,GPR1(r1) + mtlr r4 + mtcr r5 + mtspr SPRN_SRR0,r7 + mtspr SPRN_SRR1,r8 + RFI_TO_USER + b . /* prevent speculative execution */ + + /* exit to kernel */ 1: ld r2,GPR2(r1) ld r1,GPR1(r1) mtlr r4 mtcr r5 mtspr SPRN_SRR0,r7 mtspr SPRN_SRR1,r8 - RFI + RFI_TO_KERNEL b . /* prevent speculative execution */ syscall_error: From 48cc95d4e4d6a1265b7f728182d6dc62de849b05 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: [PATCH 308/705] powerpc/64s: Convert slb_miss_common to use RFI_TO_USER/KERNEL commit c7305645eb0c1621351cfc104038831ae87c0053 upstream. In the SLB miss handler we may be returning to user or kernel. We need to add a check early on and save the result in the cr4 register, and then we bifurcate the return path based on that. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Signed-off-by: Nicholas Piggin [mpe: Backport to 4.4 based on patch from Balbir] Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/exceptions-64s.S | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index fd68e19b9ef7..fc72f81411c4 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -655,6 +655,8 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) andi. r10,r12,MSR_RI /* check for unrecoverable exception */ beq- 2f + andi. r10,r12,MSR_PR /* check for user mode (PR != 0) */ + bne 1f /* All done -- return from exception. */ @@ -671,7 +673,23 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) ld r11,PACA_EXSLB+EX_R11(r13) ld r12,PACA_EXSLB+EX_R12(r13) ld r13,PACA_EXSLB+EX_R13(r13) - rfid + RFI_TO_KERNEL + b . /* prevent speculative execution */ + +1: +.machine push +.machine "power4" + mtcrf 0x80,r9 + mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ +.machine pop + + RESTORE_PPR_PACA(PACA_EXSLB, r9) + ld r9,PACA_EXSLB+EX_R9(r13) + ld r10,PACA_EXSLB+EX_R10(r13) + ld r11,PACA_EXSLB+EX_R11(r13) + ld r12,PACA_EXSLB+EX_R12(r13) + ld r13,PACA_EXSLB+EX_R13(r13) + RFI_TO_USER b . /* prevent speculative execution */ 2: mfspr r11,SPRN_SRR0 @@ -679,7 +697,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) mtspr SPRN_SRR0,r10 ld r10,PACAKMSR(r13) mtspr SPRN_SRR1,r10 - rfid + RFI_TO_KERNEL b . 8: mfspr r11,SPRN_SRR0 From c3b82ebee6e0d92431c92ee80393c023d550c8a1 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: [PATCH 309/705] powerpc/64s: Add support for RFI flush of L1-D cache commit aa8a5e0062ac940f7659394f4817c948dc8c0667 upstream. On some CPUs we can prevent the Meltdown vulnerability by flushing the L1-D cache on exit from kernel to user mode, and from hypervisor to guest. This is known to be the case on at least Power7, Power8 and Power9. At this time we do not know the status of the vulnerability on other CPUs such as the 970 (Apple G5), pasemi CPUs (AmigaOne X1000) or Freescale CPUs. As more information comes to light we can enable this, or other mechanisms on those CPUs. The vulnerability occurs when the load of an architecturally inaccessible memory region (eg. userspace load of kernel memory) is speculatively executed to the point where its result can influence the address of a subsequent speculatively executed load. In order for that to happen, the first load must hit in the L1, because before the load is sent to the L2 the permission check is performed. Therefore if no kernel addresses hit in the L1 the vulnerability can not occur. We can ensure that is the case by flushing the L1 whenever we return to userspace. Similarly for hypervisor vs guest. In order to flush the L1-D cache on exit, we add a section of nops at each (h)rfi location that returns to a lower privileged context, and patch that with some sequence. Newer firmwares are able to advertise to us that there is a special nop instruction that flushes the L1-D. If we do not see that advertised, we fall back to doing a displacement flush in software. For guest kernels we support migration between some CPU versions, and different CPUs may use different flush instructions. So that we are prepared to migrate to a machine with a different flush instruction activated, we may have to patch more than one flush instruction at boot if the hypervisor tells us to. In the end this patch is mostly the work of Nicholas Piggin and Michael Ellerman. However a cast of thousands contributed to analysis of the issue, earlier versions of the patch, back ports testing etc. Many thanks to all of them. Tested-by: Jon Masters Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman [Balbir - back ported to stable with changes] Signed-off-by: Balbir Singh Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/exception-64s.h | 40 ++++++++--- arch/powerpc/include/asm/feature-fixups.h | 15 ++++ arch/powerpc/include/asm/paca.h | 10 +++ arch/powerpc/include/asm/setup.h | 13 ++++ arch/powerpc/kernel/asm-offsets.c | 4 ++ arch/powerpc/kernel/exceptions-64s.S | 86 +++++++++++++++++++++++ arch/powerpc/kernel/setup_64.c | 79 +++++++++++++++++++++ arch/powerpc/kernel/vmlinux.lds.S | 9 +++ arch/powerpc/lib/feature-fixups.c | 42 +++++++++++ 9 files changed, 290 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 6771cbe44594..cab6d2a46c41 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -51,34 +51,58 @@ #define EX_PPR 88 /* SMT thread status register (priority) */ #define EX_CTR 96 -/* Macros for annotating the expected destination of (h)rfid */ +/* + * Macros for annotating the expected destination of (h)rfid + * + * The nop instructions allow us to insert one or more instructions to flush the + * L1-D cache when returning to userspace or a guest. + */ +#define RFI_FLUSH_SLOT \ + RFI_FLUSH_FIXUP_SECTION; \ + nop; \ + nop; \ + nop #define RFI_TO_KERNEL \ rfid #define RFI_TO_USER \ - rfid + RFI_FLUSH_SLOT; \ + rfid; \ + b rfi_flush_fallback #define RFI_TO_USER_OR_KERNEL \ - rfid + RFI_FLUSH_SLOT; \ + rfid; \ + b rfi_flush_fallback #define RFI_TO_GUEST \ - rfid + RFI_FLUSH_SLOT; \ + rfid; \ + b rfi_flush_fallback #define HRFI_TO_KERNEL \ hrfid #define HRFI_TO_USER \ - hrfid + RFI_FLUSH_SLOT; \ + hrfid; \ + b hrfi_flush_fallback #define HRFI_TO_USER_OR_KERNEL \ - hrfid + RFI_FLUSH_SLOT; \ + hrfid; \ + b hrfi_flush_fallback #define HRFI_TO_GUEST \ - hrfid + RFI_FLUSH_SLOT; \ + hrfid; \ + b hrfi_flush_fallback #define HRFI_TO_UNKNOWN \ - hrfid + RFI_FLUSH_SLOT; \ + hrfid; \ + b hrfi_flush_fallback #ifdef CONFIG_RELOCATABLE #define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \ diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index ddf54f5bbdd1..7b332342071c 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -189,4 +189,19 @@ void apply_feature_fixups(void); void setup_feature_keys(void); #endif +#define RFI_FLUSH_FIXUP_SECTION \ +951: \ + .pushsection __rfi_flush_fixup,"a"; \ + .align 2; \ +952: \ + FTR_ENTRY_OFFSET 951b-952b; \ + .popsection; + + +#ifndef __ASSEMBLY__ + +extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; + +#endif + #endif /* __ASM_POWERPC_FEATURE_FIXUPS_H */ diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 6a6792bb39fb..ea43897183fd 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -205,6 +205,16 @@ struct paca_struct { struct sibling_subcore_state *sibling_subcore_state; #endif #endif +#ifdef CONFIG_PPC_BOOK3S_64 + /* + * rfi fallback flush must be in its own cacheline to prevent + * other paca data leaking into the L1d + */ + u64 exrfi[13] __aligned(0x80); + void *rfi_flush_fallback_area; + u64 l1d_flush_congruence; + u64 l1d_flush_sets; +#endif }; #ifdef CONFIG_PPC_BOOK3S diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 654d64c9f3ac..6825a67cc3db 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -38,6 +38,19 @@ static inline void pseries_big_endian_exceptions(void) {} static inline void pseries_little_endian_exceptions(void) {} #endif /* CONFIG_PPC_PSERIES */ +void rfi_flush_enable(bool enable); + +/* These are bit flags */ +enum l1d_flush_type { + L1D_FLUSH_NONE = 0x1, + L1D_FLUSH_FALLBACK = 0x2, + L1D_FLUSH_ORI = 0x4, + L1D_FLUSH_MTTRIG = 0x8, +}; + +void __init setup_rfi_flush(enum l1d_flush_type, bool enable); +void do_rfi_flush_fixups(enum l1d_flush_type types); + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_SETUP_H */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index c833d88c423d..64bcbd580495 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -240,6 +240,10 @@ int main(void) #ifdef CONFIG_PPC_BOOK3S_64 DEFINE(PACAMCEMERGSP, offsetof(struct paca_struct, mc_emergency_sp)); DEFINE(PACA_IN_MCE, offsetof(struct paca_struct, in_mce)); + DEFINE(PACA_RFI_FLUSH_FALLBACK_AREA, offsetof(struct paca_struct, rfi_flush_fallback_area)); + DEFINE(PACA_EXRFI, offsetof(struct paca_struct, exrfi)); + DEFINE(PACA_L1D_FLUSH_CONGRUENCE, offsetof(struct paca_struct, l1d_flush_congruence)); + DEFINE(PACA_L1D_FLUSH_SETS, offsetof(struct paca_struct, l1d_flush_sets)); #endif DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state)); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index fc72f81411c4..96db6c3adebe 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1594,6 +1594,92 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) bl kernel_bad_stack b 1b + .globl rfi_flush_fallback +rfi_flush_fallback: + SET_SCRATCH0(r13); + GET_PACA(r13); + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + std r12,PACA_EXRFI+EX_R12(r13) + std r8,PACA_EXRFI+EX_R13(r13) + mfctr r9 + ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) + ld r11,PACA_L1D_FLUSH_SETS(r13) + ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13) + /* + * The load adresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ + addi r12,r12,8 + mtctr r11 + DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ + + /* order ld/st prior to dcbt stop all streams with flushing */ + sync +1: li r8,0 + .rept 8 /* 8-way set associative */ + ldx r11,r10,r8 + add r8,r8,r12 + xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not + add r8,r8,r11 // Add 0, this creates a dependency on the ldx + .endr + addi r10,r10,128 /* 128 byte cache line */ + bdnz 1b + + mtctr r9 + ld r9,PACA_EXRFI+EX_R9(r13) + ld r10,PACA_EXRFI+EX_R10(r13) + ld r11,PACA_EXRFI+EX_R11(r13) + ld r12,PACA_EXRFI+EX_R12(r13) + ld r8,PACA_EXRFI+EX_R13(r13) + GET_SCRATCH0(r13); + rfid + + .globl hrfi_flush_fallback +hrfi_flush_fallback: + SET_SCRATCH0(r13); + GET_PACA(r13); + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + std r12,PACA_EXRFI+EX_R12(r13) + std r8,PACA_EXRFI+EX_R13(r13) + mfctr r9 + ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) + ld r11,PACA_L1D_FLUSH_SETS(r13) + ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13) + /* + * The load adresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ + addi r12,r12,8 + mtctr r11 + DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ + + /* order ld/st prior to dcbt stop all streams with flushing */ + sync +1: li r8,0 + .rept 8 /* 8-way set associative */ + ldx r11,r10,r8 + add r8,r8,r12 + xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not + add r8,r8,r11 // Add 0, this creates a dependency on the ldx + .endr + addi r10,r10,128 /* 128 byte cache line */ + bdnz 1b + + mtctr r9 + ld r9,PACA_EXRFI+EX_R9(r13) + ld r10,PACA_EXRFI+EX_R10(r13) + ld r11,PACA_EXRFI+EX_R11(r13) + ld r12,PACA_EXRFI+EX_R12(r13) + ld r8,PACA_EXRFI+EX_R13(r13) + GET_SCRATCH0(r13); + hrfid + /* * Called from arch_local_irq_enable when an interrupt needs * to be resent. r3 contains 0x500, 0x900, 0xa00 or 0xe80 to indicate diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index a12be60181bf..849d086288c6 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -678,4 +678,83 @@ static int __init disable_hardlockup_detector(void) return 0; } early_initcall(disable_hardlockup_detector); + +#ifdef CONFIG_PPC_BOOK3S_64 +static enum l1d_flush_type enabled_flush_types; +static void *l1d_flush_fallback_area; +bool rfi_flush; + +static void do_nothing(void *unused) +{ + /* + * We don't need to do the flush explicitly, just enter+exit kernel is + * sufficient, the RFI exit handlers will do the right thing. + */ +} + +void rfi_flush_enable(bool enable) +{ + if (rfi_flush == enable) + return; + + if (enable) { + do_rfi_flush_fixups(enabled_flush_types); + on_each_cpu(do_nothing, NULL, 1); + } else + do_rfi_flush_fixups(L1D_FLUSH_NONE); + + rfi_flush = enable; +} + +static void init_fallback_flush(void) +{ + u64 l1d_size, limit; + int cpu; + + l1d_size = ppc64_caches.dsize; + limit = min(safe_stack_limit(), ppc64_rma_size); + + /* + * Align to L1d size, and size it at 2x L1d size, to catch possible + * hardware prefetch runoff. We don't have a recipe for load patterns to + * reliably avoid the prefetcher. + */ + l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit)); + memset(l1d_flush_fallback_area, 0, l1d_size * 2); + + for_each_possible_cpu(cpu) { + /* + * The fallback flush is currently coded for 8-way + * associativity. Different associativity is possible, but it + * will be treated as 8-way and may not evict the lines as + * effectively. + * + * 128 byte lines are mandatory. + */ + u64 c = l1d_size / 8; + + paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area; + paca[cpu].l1d_flush_congruence = c; + paca[cpu].l1d_flush_sets = c / 128; + } +} + +void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) +{ + if (types & L1D_FLUSH_FALLBACK) { + pr_info("rfi-flush: Using fallback displacement flush\n"); + init_fallback_flush(); + } + + if (types & L1D_FLUSH_ORI) + pr_info("rfi-flush: Using ori type flush\n"); + + if (types & L1D_FLUSH_MTTRIG) + pr_info("rfi-flush: Using mttrig type flush\n"); + + enabled_flush_types = types; + + rfi_flush_enable(enable); +} +#endif /* CONFIG_PPC_BOOK3S_64 */ #endif diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 7394b770ae1f..b61fb7902018 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -132,6 +132,15 @@ SECTIONS /* Read-only data */ RODATA +#ifdef CONFIG_PPC64 + . = ALIGN(8); + __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) { + __start___rfi_flush_fixup = .; + *(__rfi_flush_fixup) + __stop___rfi_flush_fixup = .; + } +#endif + EXCEPTION_TABLE(0) NOTES :kernel :notes diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 043415f0bdb1..e86bfa111f3c 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -23,6 +23,7 @@ #include #include #include +#include struct fixup_entry { unsigned long mask; @@ -115,6 +116,47 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) } } +#ifdef CONFIG_PPC_BOOK3S_64 +void do_rfi_flush_fixups(enum l1d_flush_type types) +{ + unsigned int instrs[3], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___rfi_flush_fixup), + end = PTRRELOC(&__stop___rfi_flush_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + + if (types & L1D_FLUSH_FALLBACK) + /* b .+16 to fallback flush */ + instrs[0] = 0x48000010; + + i = 0; + if (types & L1D_FLUSH_ORI) { + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + } + + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction(dest, instrs[0]); + patch_instruction(dest + 1, instrs[1]); + patch_instruction(dest + 2, instrs[2]); + } + + printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i); +} +#endif /* CONFIG_PPC_BOOK3S_64 */ + void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) { long *start, *end; From 0ef9f8289edf1d335fb2bd3c162521528823b585 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: [PATCH 310/705] powerpc/64s: Support disabling RFI flush with no_rfi_flush and nopti commit bc9c9304a45480797e13a8e1df96ffcf44fb62fe upstream. Because there may be some performance overhead of the RFI flush, add kernel command line options to disable it. We add a sensibly named 'no_rfi_flush' option, but we also hijack the x86 option 'nopti'. The RFI flush is not the same as KPTI, but if we see 'nopti' we can guess that the user is trying to avoid any overhead of Meltdown mitigations, and it means we don't have to educate every one about a different command line option. Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/setup_64.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 849d086288c6..e728bb437fbd 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -682,8 +682,29 @@ early_initcall(disable_hardlockup_detector); #ifdef CONFIG_PPC_BOOK3S_64 static enum l1d_flush_type enabled_flush_types; static void *l1d_flush_fallback_area; +static bool no_rfi_flush; bool rfi_flush; +static int __init handle_no_rfi_flush(char *p) +{ + pr_info("rfi-flush: disabled on command line."); + no_rfi_flush = true; + return 0; +} +early_param("no_rfi_flush", handle_no_rfi_flush); + +/* + * The RFI flush is not KPTI, but because users will see doco that says to use + * nopti we hijack that option here to also disable the RFI flush. + */ +static int __init handle_no_pti(char *p) +{ + pr_info("rfi-flush: disabling due to 'nopti' on command line.\n"); + handle_no_rfi_flush(NULL); + return 0; +} +early_param("nopti", handle_no_pti); + static void do_nothing(void *unused) { /* @@ -754,7 +775,8 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) enabled_flush_types = types; - rfi_flush_enable(enable); + if (!no_rfi_flush) + rfi_flush_enable(enable); } #endif /* CONFIG_PPC_BOOK3S_64 */ #endif From 7db0fff62f52c3f23c39262b9e037d8b43dfc88d Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: [PATCH 311/705] powerpc/pseries: Query hypervisor for RFI flush settings commit 8989d56878a7735dfdb234707a2fee6faf631085 upstream. A new hypervisor call is available which tells the guest settings related to the RFI flush. Use it to query the appropriate flush instruction(s), and whether the flush is required. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/pseries/setup.c | 35 ++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 97aa3f332f24..1845fc611912 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -450,6 +450,39 @@ static void __init find_and_init_phbs(void) of_pci_check_probe_only(); } +static void pseries_setup_rfi_flush(void) +{ + struct h_cpu_char_result result; + enum l1d_flush_type types; + bool enable; + long rc; + + /* Enable by default */ + enable = true; + + rc = plpar_get_cpu_characteristics(&result); + if (rc == H_SUCCESS) { + types = L1D_FLUSH_NONE; + + if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2) + types |= L1D_FLUSH_MTTRIG; + if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30) + types |= L1D_FLUSH_ORI; + + /* Use fallback if nothing set in hcall */ + if (types == L1D_FLUSH_NONE) + types = L1D_FLUSH_FALLBACK; + + if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) + enable = false; + } else { + /* Default to fallback if case hcall is not available */ + types = L1D_FLUSH_FALLBACK; + } + + setup_rfi_flush(types, enable); +} + static void __init pSeries_setup_arch(void) { set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); @@ -467,6 +500,8 @@ static void __init pSeries_setup_arch(void) fwnmi_init(); + pseries_setup_rfi_flush(); + /* By default, only probe PCI (can be overridden by rtas_pci) */ pci_add_flags(PCI_PROBE_ONLY); From 6aec12e1869e31839f317c02f81a92c393222f71 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: [PATCH 312/705] powerpc/powernv: Check device-tree for RFI flush settings commit 6e032b350cd1fdb830f18f8320ef0e13b4e24094 upstream. New device-tree properties are available which tell the hypervisor settings related to the RFI flush. Use them to determine the appropriate flush instruction to use, and whether the flush is required. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/setup.c | 50 ++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index b33faa0015cc..6f8b4c19373a 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -35,13 +35,63 @@ #include #include #include +#include +#include #include "powernv.h" +static void pnv_setup_rfi_flush(void) +{ + struct device_node *np, *fw_features; + enum l1d_flush_type type; + int enable; + + /* Default to fallback in case fw-features are not available */ + type = L1D_FLUSH_FALLBACK; + enable = 1; + + np = of_find_node_by_name(NULL, "ibm,opal"); + fw_features = of_get_child_by_name(np, "fw-features"); + of_node_put(np); + + if (fw_features) { + np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2"); + if (np && of_property_read_bool(np, "enabled")) + type = L1D_FLUSH_MTTRIG; + + of_node_put(np); + + np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0"); + if (np && of_property_read_bool(np, "enabled")) + type = L1D_FLUSH_ORI; + + of_node_put(np); + + /* Enable unless firmware says NOT to */ + enable = 2; + np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0"); + if (np && of_property_read_bool(np, "disabled")) + enable--; + + of_node_put(np); + + np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1"); + if (np && of_property_read_bool(np, "disabled")) + enable--; + + of_node_put(np); + of_node_put(fw_features); + } + + setup_rfi_flush(type, enable > 0); +} + static void __init pnv_setup_arch(void) { set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); + pnv_setup_rfi_flush(); + /* Initialize SMP */ pnv_smp_init(); From 1f0c936f431d98611fff5ef7082380f087da1578 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 16 Jan 2018 21:20:05 +1100 Subject: [PATCH 313/705] powerpc/64s: Wire up cpu_show_meltdown() commit fd6e440f20b1a4304553775fc55938848ff617c9 upstream. The recent commit 87590ce6e373 ("sysfs/cpu: Add vulnerability folder") added a generic folder and set of files for reporting information on CPU vulnerabilities. One of those was for meltdown: /sys/devices/system/cpu/vulnerabilities/meltdown This commit wires up that file for 64-bit Book3S powerpc. For now we default to "Vulnerable" unless the RFI flush is enabled. That may not actually be true on all hardware, further patches will refine the reporting based on the CPU/platform etc. But for now we default to being pessimists. Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/Kconfig | 1 + arch/powerpc/kernel/setup_64.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 6eda5abbd719..0a6bb48854e3 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -128,6 +128,7 @@ config PPC select ARCH_HAS_GCOV_PROFILE_ALL select GENERIC_SMP_IDLE_THREAD select GENERIC_CMOS_UPDATE + select GENERIC_CPU_VULNERABILITIES if PPC_BOOK3S_64 select GENERIC_TIME_VSYSCALL_OLD select GENERIC_CLOCKEVENTS select GENERIC_CLOCKEVENTS_BROADCAST if SMP diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index e728bb437fbd..79136dc981be 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -778,5 +778,13 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) if (!no_rfi_flush) rfi_flush_enable(enable); } + +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) +{ + if (rfi_flush) + return sprintf(buf, "Mitigation: RFI Flush\n"); + + return sprintf(buf, "Vulnerable\n"); +} #endif /* CONFIG_PPC_BOOK3S_64 */ #endif From 9fed3978c39b27b27eeb676301b7ba960a74170c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 16 Jan 2018 22:17:18 +1100 Subject: [PATCH 314/705] powerpc/64s: Allow control of RFI flush via debugfs commit 236003e6b5443c45c18e613d2b0d776a9f87540e upstream. Expose the state of the RFI flush (enabled/disabled) via debugfs, and allow it to be enabled/disabled at runtime. eg: $ cat /sys/kernel/debug/powerpc/rfi_flush 1 $ echo 0 > /sys/kernel/debug/powerpc/rfi_flush $ cat /sys/kernel/debug/powerpc/rfi_flush 0 Signed-off-by: Michael Ellerman Reviewed-by: Nicholas Piggin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/setup_64.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 79136dc981be..7c30a91c1f86 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -779,6 +780,35 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) rfi_flush_enable(enable); } +#ifdef CONFIG_DEBUG_FS +static int rfi_flush_set(void *data, u64 val) +{ + if (val == 1) + rfi_flush_enable(true); + else if (val == 0) + rfi_flush_enable(false); + else + return -EINVAL; + + return 0; +} + +static int rfi_flush_get(void *data, u64 *val) +{ + *val = rfi_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); + +static __init int rfi_flush_debugfs_init(void) +{ + debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); + return 0; +} +device_initcall(rfi_flush_debugfs_init); +#endif + ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) { if (rfi_flush) From 781a2d683110864fc136d85df7a2fb3fc145e5e8 Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Wed, 10 Jan 2018 17:41:10 +0100 Subject: [PATCH 315/705] auxdisplay: img-ascii-lcd: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE commit 09c479f7f1fbfaf848e5813996793966cd50be81 upstream. This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in drivers/auxdisplay/img-ascii-lcd.o see include/linux/module.h for more information This adds the license as "GPL", which matches the header of the file. MODULE_DESCRIPTION and MODULE_AUTHOR are also added. Signed-off-by: Jesse Chan Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/auxdisplay/img-ascii-lcd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/auxdisplay/img-ascii-lcd.c b/drivers/auxdisplay/img-ascii-lcd.c index 83f1439e57fd..6e8eaa7fe7a6 100644 --- a/drivers/auxdisplay/img-ascii-lcd.c +++ b/drivers/auxdisplay/img-ascii-lcd.c @@ -442,3 +442,7 @@ static struct platform_driver img_ascii_lcd_driver = { .remove = img_ascii_lcd_remove, }; module_platform_driver(img_ascii_lcd_driver); + +MODULE_DESCRIPTION("Imagination Technologies ASCII LCD Display"); +MODULE_AUTHOR("Paul Burton "); +MODULE_LICENSE("GPL"); From 0ee4f5e7bbffc07d98cb7626446175700ef5fcce Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Mon, 20 Nov 2017 12:58:03 -0800 Subject: [PATCH 316/705] pinctrl: pxa: pxa2xx: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE commit 0b9335cbd38e3bd2025bcc23b5758df4ac035f75 upstream. This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in drivers/pinctrl/pxa/pinctrl-pxa2xx.o see include/linux/module.h for more information This adds the license as "GPL v2", which matches the header of the file. MODULE_DESCRIPTION and MODULE_AUTHOR are also added. Signed-off-by: Jesse Chan Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pxa/pinctrl-pxa2xx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c index 866aa3ce1ac9..6cf0006d4c8d 100644 --- a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c +++ b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c @@ -436,3 +436,7 @@ int pxa2xx_pinctrl_exit(struct platform_device *pdev) return 0; } EXPORT_SYMBOL_GPL(pxa2xx_pinctrl_exit); + +MODULE_AUTHOR("Robert Jarzmik "); +MODULE_DESCRIPTION("Marvell PXA2xx pinctrl driver"); +MODULE_LICENSE("GPL v2"); From 374c84de94af798cad81b5e09b81e901966a4eb0 Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Sun, 19 Nov 2017 23:45:49 -0800 Subject: [PATCH 317/705] ASoC: pcm512x: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE commit 0cab20cec0b663b7be8e2be5998d5a4113647f86 upstream. This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in sound/soc/codecs/snd-soc-pcm512x-spi.o see include/linux/module.h for more information This adds the license as "GPL v2", which matches the header of the file. MODULE_DESCRIPTION and MODULE_AUTHOR are also added. Signed-off-by: Jesse Chan Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/pcm512x-spi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/codecs/pcm512x-spi.c b/sound/soc/codecs/pcm512x-spi.c index 712ed6598c48..ebdf9bd5a64c 100644 --- a/sound/soc/codecs/pcm512x-spi.c +++ b/sound/soc/codecs/pcm512x-spi.c @@ -70,3 +70,7 @@ static struct spi_driver pcm512x_spi_driver = { }; module_spi_driver(pcm512x_spi_driver); + +MODULE_DESCRIPTION("ASoC PCM512x codec driver - SPI"); +MODULE_AUTHOR("Mark Brown "); +MODULE_LICENSE("GPL v2"); From 0a61cd6caed72ecac8cbb637f6498e17a33d73b1 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 29 Jan 2018 18:16:55 -0800 Subject: [PATCH 318/705] kaiser: fix intel_bts perf crashes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Vince reported perf_fuzzer quickly locks up on 4.15-rc7 with PTI; Robert reported Bad RIP with KPTI and Intel BTS also on 4.15-rc7: honggfuzz -f /tmp/somedirectorywithatleastonefile \ --linux_perf_bts_edge -s -- /bin/true (honggfuzz from https://github.com/google/honggfuzz) crashed with BUG: unable to handle kernel paging request at ffff9d3215100000 (then narrowed it down to perf record --per-thread -e intel_bts//u -- /bin/ls). The intel_bts driver does not use the 'normal' BTS buffer which is exposed through kaiser_add_mapping(), but instead uses the memory allocated for the perf AUX buffer. This obviously comes apart when using PTI, because then the kernel mapping, which includes that AUX buffer memory, disappears while switched to user page tables. Easily fixed in old-Kaiser backports, by applying kaiser_add_mapping() to those pages; perhaps not so easy for upstream, where 4.15-rc8 commit 99a9dc98ba52 ("x86,perf: Disable intel_bts when PTI") disables for now. Slightly reorganized surrounding code in bts_buffer_setup_aux(), so it can better match bts_buffer_free_aux(): free_aux with an #ifdef to avoid the loop when PTI is off, but setup_aux needs to loop anyway (and kaiser_add_mapping() is cheap when PTI config is off or "pti=off"). Reported-by: Vince Weaver Reported-by: Robert Święcki Analyzed-by: Peter Zijlstra Analyzed-by: Stephane Eranian Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andy Lutomirski Cc: Alexander Shishkin Cc: Linus Torvalds Cc: Vince Weaver Cc: Jiri Kosina Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/bts.c | 44 +++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 982c9e31daca..21298c173b0e 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -77,6 +78,23 @@ static size_t buf_size(struct page *page) return 1 << (PAGE_SHIFT + page_private(page)); } +static void bts_buffer_free_aux(void *data) +{ +#ifdef CONFIG_PAGE_TABLE_ISOLATION + struct bts_buffer *buf = data; + int nbuf; + + for (nbuf = 0; nbuf < buf->nr_bufs; nbuf++) { + struct page *page = buf->buf[nbuf].page; + void *kaddr = page_address(page); + size_t page_size = buf_size(page); + + kaiser_remove_mapping((unsigned long)kaddr, page_size); + } +#endif + kfree(data); +} + static void * bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite) { @@ -113,29 +131,33 @@ bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite) buf->real_size = size - size % BTS_RECORD_SIZE; for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) { - unsigned int __nr_pages; + void *kaddr = pages[pg]; + size_t page_size; + + page = virt_to_page(kaddr); + page_size = buf_size(page); + + if (kaiser_add_mapping((unsigned long)kaddr, + page_size, __PAGE_KERNEL) < 0) { + buf->nr_bufs = nbuf; + bts_buffer_free_aux(buf); + return NULL; + } - page = virt_to_page(pages[pg]); - __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1; buf->buf[nbuf].page = page; buf->buf[nbuf].offset = offset; buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0); - buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement; + buf->buf[nbuf].size = page_size - buf->buf[nbuf].displacement; pad = buf->buf[nbuf].size % BTS_RECORD_SIZE; buf->buf[nbuf].size -= pad; - pg += __nr_pages; - offset += __nr_pages << PAGE_SHIFT; + pg += page_size >> PAGE_SHIFT; + offset += page_size; } return buf; } -static void bts_buffer_free_aux(void *data) -{ - kfree(data); -} - static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx) { return buf->buf[idx].offset + buf->buf[idx].displacement; From ae1fc8de51b10dba40cbd54959b5ba0a311c0861 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 29 Jan 2018 18:17:26 -0800 Subject: [PATCH 319/705] x86/pti: Make unpoison of pgd for trusted boot work for real commit 445b69e3b75e42362a5bdc13c8b8f61599e2228a upstream The inital fix for trusted boot and PTI potentially misses the pgd clearing if pud_alloc() sets a PGD. It probably works in *practice* because for two adjacent calls to map_tboot_page() that share a PGD entry, the first will clear NX, *then* allocate and set the PGD (without NX clear). The second call will *not* allocate but will clear the NX bit. Defer the NX clearing to a point after it is known that all top-level allocations have occurred. Add a comment to clarify why. [ tglx: Massaged changelog ] [ hughd notes: I have not tested tboot, but this looks to me as necessary and as safe in old-Kaiser backports as it is upstream; I'm not submitting the commit-to-be-fixed 262b6b30087, since it was undone by 445b69e3b75e, and makes conflict trouble because of 5-level's p4d versus 4-level's pgd.] Fixes: 262b6b30087 ("x86/tboot: Unbreak tboot with PTI enabled") Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Reviewed-by: Andrea Arcangeli Cc: Jon Masters Cc: Tim Chen Cc: gnomes@lxorguk.ukuu.org.uk Cc: peterz@infradead.org Cc: ning.sun@intel.com Cc: tboot-devel@lists.sourceforge.net Cc: andi@firstfloor.org Cc: luto@kernel.org Cc: law@redhat.com Cc: pbonzini@redhat.com Cc: torvalds@linux-foundation.org Cc: gregkh@linux-foundation.org Cc: dwmw@amazon.co.uk Cc: nickc@redhat.com Link: https://lkml.kernel.org/r/20180110224939.2695CD47@viggo.jf.intel.com Cc: Jiri Kosina Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/tboot.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 8402907825b0..21454e254a4c 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -134,6 +134,16 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn, return -1; set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot)); pte_unmap(pte); + + /* + * PTI poisons low addresses in the kernel page tables in the + * name of making them unusable for userspace. To execute + * code at such a low address, the poison must be cleared. + * + * Note: 'pgd' actually gets set in pud_alloc(). + */ + pgd->pgd &= ~_PAGE_NX; + return 0; } From 400d3c8b0c7f6c942cd9bf201da1f32da9bdcc9f Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 29 Jan 2018 18:17:58 -0800 Subject: [PATCH 320/705] kaiser: allocate pgd with order 0 when pti=off The 4.9.77 version of "x86/pti/efi: broken conversion from efi to kernel page table" looked nicer than the 4.4.112 version, but was suboptimal on machines booted with "pti=off" (or on AMD machines): it allocated pgd with an order 1 page whatever the setting of kaiser_enabled. Fix that by moving the definition of PGD_ALLOCATION_ORDER from asm/pgalloc.h to asm/pgtable.h, which already defines kaiser_enabled. Fixes: 1b92c48a2eeb ("x86/pti/efi: broken conversion from efi to kernel page table") Reviewed-by: Pavel Tatashin Cc: Steven Sistare Cc: Jiri Kosina Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgalloc.h | 11 ----------- arch/x86/include/asm/pgtable.h | 6 ++++++ 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index 1178a51b77f3..b6d425999f99 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -27,17 +27,6 @@ static inline void paravirt_release_pud(unsigned long pfn) {} */ extern gfp_t __userpte_alloc_gfp; -#ifdef CONFIG_PAGE_TABLE_ISOLATION -/* - * Instead of one PGD, we acquire two PGDs. Being order-1, it is - * both 8k in size and 8k-aligned. That lets us just flip bit 12 - * in a pointer to swap between the two 4k halves. - */ -#define PGD_ALLOCATION_ORDER 1 -#else -#define PGD_ALLOCATION_ORDER 0 -#endif - /* * Allocate and free page tables. */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 2536f90cd30c..5af0401ccff2 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -20,9 +20,15 @@ #ifdef CONFIG_PAGE_TABLE_ISOLATION extern int kaiser_enabled; +/* + * Instead of one PGD, we acquire two PGDs. Being order-1, it is + * both 8k in size and 8k-aligned. That lets us just flip bit 12 + * in a pointer to swap between the two 4k halves. + */ #else #define kaiser_enabled 0 #endif +#define PGD_ALLOCATION_ORDER kaiser_enabled void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); void ptdump_walk_pgd_level_checkwx(void); From ffcf167d348ee5e8ddbe2c4116b71ebfff2258e0 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 11 Jan 2018 18:57:26 +0100 Subject: [PATCH 321/705] serial: core: mark port as initialized after successful IRQ change commit 44117a1d1732c513875d5a163f10d9adbe866c08 upstream. setserial changes the IRQ via uart_set_info(). It invokes uart_shutdown() which free the current used IRQ and clear TTY_PORT_INITIALIZED. It will then update the IRQ number and invoke uart_startup() before returning to the caller leaving TTY_PORT_INITIALIZED cleared. The next open will crash with | list_add double add: new=ffffffff839fcc98, prev=ffffffff839fcc98, next=ffffffff839fcc98. since the close from the IOCTL won't free the IRQ (and clean the list) due to the TTY_PORT_INITIALIZED check in uart_shutdown(). There is same pattern in uart_do_autoconfig() and I *think* it also needs to set TTY_PORT_INITIALIZED there. Is there a reason why uart_startup() does not set the flag by itself after the IRQ has been acquired (since it is cleared in uart_shutdown)? Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index f2303f390345..23973a8124fc 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -965,6 +965,8 @@ static int uart_set_info(struct tty_struct *tty, struct tty_port *port, } } else { retval = uart_startup(tty, state, 1); + if (retval == 0) + tty_port_set_initialized(port, true); if (retval > 0) retval = 0; } From 7d3d60ef2256f5fcfd4929963dd67f58028ebeee Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 31 Jan 2018 16:29:30 +0200 Subject: [PATCH 322/705] ip6mr: fix stale iterator [ Upstream commit 4adfa79fc254efb7b0eb3cd58f62c2c3f805f1ba ] When we dump the ip6mr mfc entries via proc, we initialize an iterator with the table to dump but we don't clear the cache pointer which might be initialized from a prior read on the same descriptor that ended. This can result in lock imbalance (an unnecessary unlock) leading to other crashes and hangs. Clear the cache pointer like ipmr does to fix the issue. Thanks for the reliable reproducer. Here's syzbot's trace: WARNING: bad unlock balance detected! 4.15.0-rc3+ #128 Not tainted syzkaller971460/3195 is trying to release lock (mrt_lock) at: [<000000006898068d>] ipmr_mfc_seq_stop+0xe1/0x130 net/ipv6/ip6mr.c:553 but there are no more locks to release! other info that might help us debug this: 1 lock held by syzkaller971460/3195: #0: (&p->lock){+.+.}, at: [<00000000744a6565>] seq_read+0xd5/0x13d0 fs/seq_file.c:165 stack backtrace: CPU: 1 PID: 3195 Comm: syzkaller971460 Not tainted 4.15.0-rc3+ #128 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:53 print_unlock_imbalance_bug+0x12f/0x140 kernel/locking/lockdep.c:3561 __lock_release kernel/locking/lockdep.c:3775 [inline] lock_release+0x5f9/0xda0 kernel/locking/lockdep.c:4023 __raw_read_unlock include/linux/rwlock_api_smp.h:225 [inline] _raw_read_unlock+0x1a/0x30 kernel/locking/spinlock.c:255 ipmr_mfc_seq_stop+0xe1/0x130 net/ipv6/ip6mr.c:553 traverse+0x3bc/0xa00 fs/seq_file.c:135 seq_read+0x96a/0x13d0 fs/seq_file.c:189 proc_reg_read+0xef/0x170 fs/proc/inode.c:217 do_loop_readv_writev fs/read_write.c:673 [inline] do_iter_read+0x3db/0x5b0 fs/read_write.c:897 compat_readv+0x1bf/0x270 fs/read_write.c:1140 do_compat_preadv64+0xdc/0x100 fs/read_write.c:1189 C_SYSC_preadv fs/read_write.c:1209 [inline] compat_SyS_preadv+0x3b/0x50 fs/read_write.c:1203 do_syscall_32_irqs_on arch/x86/entry/common.c:327 [inline] do_fast_syscall_32+0x3ee/0xf9d arch/x86/entry/common.c:389 entry_SYSENTER_compat+0x51/0x60 arch/x86/entry/entry_64_compat.S:125 RIP: 0023:0xf7f73c79 RSP: 002b:00000000e574a15c EFLAGS: 00000292 ORIG_RAX: 000000000000014d RAX: ffffffffffffffda RBX: 000000000000000f RCX: 0000000020a3afb0 RDX: 0000000000000001 RSI: 0000000000000067 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 BUG: sleeping function called from invalid context at lib/usercopy.c:25 in_atomic(): 1, irqs_disabled(): 0, pid: 3195, name: syzkaller971460 INFO: lockdep is turned off. CPU: 1 PID: 3195 Comm: syzkaller971460 Not tainted 4.15.0-rc3+ #128 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:53 ___might_sleep+0x2b2/0x470 kernel/sched/core.c:6060 __might_sleep+0x95/0x190 kernel/sched/core.c:6013 __might_fault+0xab/0x1d0 mm/memory.c:4525 _copy_to_user+0x2c/0xc0 lib/usercopy.c:25 copy_to_user include/linux/uaccess.h:155 [inline] seq_read+0xcb4/0x13d0 fs/seq_file.c:279 proc_reg_read+0xef/0x170 fs/proc/inode.c:217 do_loop_readv_writev fs/read_write.c:673 [inline] do_iter_read+0x3db/0x5b0 fs/read_write.c:897 compat_readv+0x1bf/0x270 fs/read_write.c:1140 do_compat_preadv64+0xdc/0x100 fs/read_write.c:1189 C_SYSC_preadv fs/read_write.c:1209 [inline] compat_SyS_preadv+0x3b/0x50 fs/read_write.c:1203 do_syscall_32_irqs_on arch/x86/entry/common.c:327 [inline] do_fast_syscall_32+0x3ee/0xf9d arch/x86/entry/common.c:389 entry_SYSENTER_compat+0x51/0x60 arch/x86/entry/entry_64_compat.S:125 RIP: 0023:0xf7f73c79 RSP: 002b:00000000e574a15c EFLAGS: 00000292 ORIG_RAX: 000000000000014d RAX: ffffffffffffffda RBX: 000000000000000f RCX: 0000000020a3afb0 RDX: 0000000000000001 RSI: 0000000000000067 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 WARNING: CPU: 1 PID: 3195 at lib/usercopy.c:26 _copy_to_user+0xb5/0xc0 lib/usercopy.c:26 Reported-by: syzbot Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6mr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 117405dd07a3..a30e7e925c9b 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -495,6 +495,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) return ERR_PTR(-ENOENT); it->mrt = mrt; + it->cache = NULL; return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) : SEQ_START_TOKEN; } From ce43c07fcef8cf5cdbc7a45db7a6b4a1a7473905 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 1 Feb 2018 10:26:57 -0800 Subject: [PATCH 323/705] net: igmp: add a missing rcu locking section [ Upstream commit e7aadb27a5415e8125834b84a74477bfbee4eff5 ] Newly added igmpv3_get_srcaddr() needs to be called under rcu lock. Timer callbacks do not ensure this locking. ============================= WARNING: suspicious RCU usage 4.15.0+ #200 Not tainted ----------------------------- ./include/linux/inetdevice.h:216 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 3 locks held by syzkaller616973/4074: #0: (&mm->mmap_sem){++++}, at: [<00000000bfce669e>] __do_page_fault+0x32d/0xc90 arch/x86/mm/fault.c:1355 #1: ((&im->timer)){+.-.}, at: [<00000000619d2f71>] lockdep_copy_map include/linux/lockdep.h:178 [inline] #1: ((&im->timer)){+.-.}, at: [<00000000619d2f71>] call_timer_fn+0x1c6/0x820 kernel/time/timer.c:1316 #2: (&(&im->lock)->rlock){+.-.}, at: [<000000005f833c5c>] spin_lock_bh include/linux/spinlock.h:315 [inline] #2: (&(&im->lock)->rlock){+.-.}, at: [<000000005f833c5c>] igmpv3_send_report+0x98/0x5b0 net/ipv4/igmp.c:600 stack backtrace: CPU: 0 PID: 4074 Comm: syzkaller616973 Not tainted 4.15.0+ #200 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:53 lockdep_rcu_suspicious+0x123/0x170 kernel/locking/lockdep.c:4592 __in_dev_get_rcu include/linux/inetdevice.h:216 [inline] igmpv3_get_srcaddr net/ipv4/igmp.c:329 [inline] igmpv3_newpack+0xeef/0x12e0 net/ipv4/igmp.c:389 add_grhead.isra.27+0x235/0x300 net/ipv4/igmp.c:432 add_grec+0xbd3/0x1170 net/ipv4/igmp.c:565 igmpv3_send_report+0xd5/0x5b0 net/ipv4/igmp.c:605 igmp_send_report+0xc43/0x1050 net/ipv4/igmp.c:722 igmp_timer_expire+0x322/0x5c0 net/ipv4/igmp.c:831 call_timer_fn+0x228/0x820 kernel/time/timer.c:1326 expire_timers kernel/time/timer.c:1363 [inline] __run_timers+0x7ee/0xb70 kernel/time/timer.c:1666 run_timer_softirq+0x4c/0x70 kernel/time/timer.c:1692 __do_softirq+0x2d7/0xb85 kernel/softirq.c:285 invoke_softirq kernel/softirq.c:365 [inline] irq_exit+0x1cc/0x200 kernel/softirq.c:405 exiting_irq arch/x86/include/asm/apic.h:541 [inline] smp_apic_timer_interrupt+0x16b/0x700 arch/x86/kernel/apic/apic.c:1052 apic_timer_interrupt+0xa9/0xb0 arch/x86/entry/entry_64.S:938 Fixes: a46182b00290 ("net: igmp: Use correct source address on IGMPv3 reports") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/igmp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 9c7a4cea1628..7f5fe07d0b13 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -386,7 +386,11 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) pip->frag_off = htons(IP_DF); pip->ttl = 1; pip->daddr = fl4.daddr; + + rcu_read_lock(); pip->saddr = igmpv3_get_srcaddr(dev, &fl4); + rcu_read_unlock(); + pip->protocol = IPPROTO_IGMP; pip->tot_len = 0; /* filled in later */ ip_select_ident(net, skb, NULL); From 97fe899816a68e3970b877e766bcb8d66013076b Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Mon, 29 Jan 2018 17:53:42 +0800 Subject: [PATCH 324/705] qlcnic: fix deadlock bug [ Upstream commit 233ac3891607f501f08879134d623b303838f478 ] The following soft lockup was caught. This is a deadlock caused by recusive locking. Process kworker/u40:1:28016 was holding spin lock "mbx->queue_lock" in qlcnic_83xx_mailbox_worker(), while a softirq came in and ask the same spin lock in qlcnic_83xx_enqueue_mbx_cmd(). This lock should be hold by disable bh.. [161846.962125] NMI watchdog: BUG: soft lockup - CPU#1 stuck for 22s! [kworker/u40:1:28016] [161846.962367] Modules linked in: tun ocfs2 xen_netback xen_blkback xen_gntalloc xen_gntdev xen_evtchn xenfs xen_privcmd autofs4 ocfs2_dlmfs ocfs2_stack_o2cb ocfs2_dlm ocfs2_nodemanager ocfs2_stackglue configfs bnx2fc fcoe libfcoe libfc sunrpc 8021q mrp garp bridge stp llc bonding dm_round_robin dm_multipath iTCO_wdt iTCO_vendor_support pcspkr sb_edac edac_core i2c_i801 shpchp lpc_ich mfd_core ioatdma ipmi_devintf ipmi_si ipmi_msghandler sg ext4 jbd2 mbcache2 sr_mod cdrom sd_mod igb i2c_algo_bit i2c_core ahci libahci megaraid_sas ixgbe dca ptp pps_core vxlan udp_tunnel ip6_udp_tunnel qla2xxx scsi_transport_fc qlcnic crc32c_intel be2iscsi bnx2i cnic uio cxgb4i cxgb4 cxgb3i libcxgbi ipv6 cxgb3 mdio libiscsi_tcp qla4xxx iscsi_boot_sysfs libiscsi scsi_transport_iscsi dm_mirror dm_region_hash dm_log dm_mod [161846.962454] [161846.962460] CPU: 1 PID: 28016 Comm: kworker/u40:1 Not tainted 4.1.12-94.5.9.el6uek.x86_64 #2 [161846.962463] Hardware name: Oracle Corporation SUN SERVER X4-2L /ASSY,MB,X4-2L , BIOS 26050100 09/19/2017 [161846.962489] Workqueue: qlcnic_mailbox qlcnic_83xx_mailbox_worker [qlcnic] [161846.962493] task: ffff8801f2e34600 ti: ffff88004ca5c000 task.ti: ffff88004ca5c000 [161846.962496] RIP: e030:[] [] xen_hypercall_sched_op+0xa/0x20 [161846.962506] RSP: e02b:ffff880202e43388 EFLAGS: 00000206 [161846.962509] RAX: 0000000000000000 RBX: ffff8801f6996b70 RCX: ffffffff810013aa [161846.962511] RDX: ffff880202e433cc RSI: ffff880202e433b0 RDI: 0000000000000003 [161846.962513] RBP: ffff880202e433d0 R08: 0000000000000000 R09: ffff8801fe893200 [161846.962516] R10: ffff8801fe400538 R11: 0000000000000206 R12: ffff880202e4b000 [161846.962518] R13: 0000000000000050 R14: 0000000000000001 R15: 000000000000020d [161846.962528] FS: 0000000000000000(0000) GS:ffff880202e40000(0000) knlGS:ffff880202e40000 [161846.962531] CS: e033 DS: 0000 ES: 0000 CR0: 0000000080050033 [161846.962533] CR2: 0000000002612640 CR3: 00000001bb796000 CR4: 0000000000042660 [161846.962536] Stack: [161846.962538] ffff880202e43608 0000000000000000 ffffffff813f0442 ffff880202e433b0 [161846.962543] 0000000000000000 ffff880202e433cc ffffffff00000001 0000000000000000 [161846.962547] 00000009813f03d6 ffff880202e433e0 ffffffff813f0460 ffff880202e43440 [161846.962552] Call Trace: [161846.962555] [161846.962565] [] ? xen_poll_irq_timeout+0x42/0x50 [161846.962570] [] xen_poll_irq+0x10/0x20 [161846.962578] [] xen_lock_spinning+0xe2/0x110 [161846.962583] [] __raw_callee_save_xen_lock_spinning+0x11/0x20 [161846.962592] [] ? _raw_spin_lock+0x57/0x80 [161846.962609] [] qlcnic_83xx_enqueue_mbx_cmd+0x7c/0xe0 [qlcnic] [161846.962623] [] qlcnic_83xx_issue_cmd+0x58/0x210 [qlcnic] [161846.962636] [] qlcnic_83xx_sre_macaddr_change+0x162/0x1d0 [qlcnic] [161846.962649] [] qlcnic_83xx_change_l2_filter+0x2b/0x30 [qlcnic] [161846.962657] [] ? __skb_flow_dissect+0x18b/0x650 [161846.962670] [] qlcnic_send_filter+0x205/0x250 [qlcnic] [161846.962682] [] qlcnic_xmit_frame+0x547/0x7b0 [qlcnic] [161846.962691] [] xmit_one+0x82/0x1a0 [161846.962696] [] dev_hard_start_xmit+0x50/0xa0 [161846.962701] [] sch_direct_xmit+0x112/0x220 [161846.962706] [] __dev_queue_xmit+0x1df/0x5e0 [161846.962710] [] dev_queue_xmit_sk+0x13/0x20 [161846.962721] [] bond_dev_queue_xmit+0x35/0x80 [bonding] [161846.962729] [] __bond_start_xmit+0x1cb/0x210 [bonding] [161846.962736] [] bond_start_xmit+0x31/0x60 [bonding] [161846.962740] [] xmit_one+0x82/0x1a0 [161846.962745] [] dev_hard_start_xmit+0x50/0xa0 [161846.962749] [] __dev_queue_xmit+0x4ee/0x5e0 [161846.962754] [] dev_queue_xmit_sk+0x13/0x20 [161846.962760] [] vlan_dev_hard_start_xmit+0xb2/0x150 [8021q] [161846.962764] [] xmit_one+0x82/0x1a0 [161846.962769] [] dev_hard_start_xmit+0x50/0xa0 [161846.962773] [] __dev_queue_xmit+0x4ee/0x5e0 [161846.962777] [] dev_queue_xmit_sk+0x13/0x20 [161846.962789] [] br_dev_queue_push_xmit+0x54/0xa0 [bridge] [161846.962797] [] br_forward_finish+0x2f/0x90 [bridge] [161846.962807] [] ? ttwu_do_wakeup+0x1d/0x100 [161846.962811] [] ? __alloc_skb+0x8b/0x1f0 [161846.962818] [] __br_forward+0x8d/0x120 [bridge] [161846.962822] [] ? __kmalloc_reserve+0x3b/0xa0 [161846.962829] [] ? update_rq_runnable_avg+0xee/0x230 [161846.962836] [] br_forward+0x96/0xb0 [bridge] [161846.962845] [] br_handle_frame_finish+0x1ae/0x420 [bridge] [161846.962853] [] br_handle_frame+0x17f/0x260 [bridge] [161846.962862] [] ? br_handle_frame_finish+0x420/0x420 [bridge] [161846.962867] [] __netif_receive_skb_core+0x1f7/0x870 [161846.962872] [] __netif_receive_skb+0x22/0x70 [161846.962877] [] netif_receive_skb_internal+0x23/0x90 [161846.962884] [] ? xenvif_idx_release+0xea/0x100 [xen_netback] [161846.962889] [] ? _raw_spin_unlock_irqrestore+0x20/0x50 [161846.962893] [] netif_receive_skb_sk+0x24/0x90 [161846.962899] [] xenvif_tx_submit+0x2ca/0x3f0 [xen_netback] [161846.962906] [] xenvif_tx_action+0x9c/0xd0 [xen_netback] [161846.962915] [] xenvif_poll+0x35/0x70 [xen_netback] [161846.962920] [] napi_poll+0xcb/0x1e0 [161846.962925] [] net_rx_action+0x90/0x1c0 [161846.962931] [] __do_softirq+0x10a/0x350 [161846.962938] [] irq_exit+0x125/0x130 [161846.962943] [] xen_evtchn_do_upcall+0x39/0x50 [161846.962950] [] xen_do_hypervisor_callback+0x1e/0x40 [161846.962952] [161846.962959] [] ? _raw_spin_lock+0x4a/0x80 [161846.962964] [] ? _raw_spin_lock_irqsave+0x1e/0xa0 [161846.962978] [] ? qlcnic_83xx_mailbox_worker+0xb9/0x2a0 [qlcnic] [161846.962991] [] ? process_one_work+0x151/0x4b0 [161846.962995] [] ? check_events+0x12/0x20 [161846.963001] [] ? worker_thread+0x120/0x480 [161846.963005] [] ? __schedule+0x30b/0x890 [161846.963010] [] ? process_one_work+0x4b0/0x4b0 [161846.963015] [] ? process_one_work+0x4b0/0x4b0 [161846.963021] [] ? kthread+0xce/0xf0 [161846.963025] [] ? kthread_freezable_should_stop+0x70/0x70 [161846.963031] [] ? ret_from_fork+0x42/0x70 [161846.963035] [] ? kthread_freezable_should_stop+0x70/0x70 [161846.963037] Code: cc 51 41 53 b8 1c 00 00 00 0f 05 41 5b 59 c3 cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc 51 41 53 b8 1d 00 00 00 0f 05 <41> 5b 59 c3 cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc Signed-off-by: Junxiao Bi Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index bdbcd2b088a0..c3c28f0960e5 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -3849,7 +3849,7 @@ static void qlcnic_83xx_flush_mbx_queue(struct qlcnic_adapter *adapter) struct list_head *head = &mbx->cmd_q; struct qlcnic_cmd_args *cmd = NULL; - spin_lock(&mbx->queue_lock); + spin_lock_bh(&mbx->queue_lock); while (!list_empty(head)) { cmd = list_entry(head->next, struct qlcnic_cmd_args, list); @@ -3860,7 +3860,7 @@ static void qlcnic_83xx_flush_mbx_queue(struct qlcnic_adapter *adapter) qlcnic_83xx_notify_cmd_completion(adapter, cmd); } - spin_unlock(&mbx->queue_lock); + spin_unlock_bh(&mbx->queue_lock); } static int qlcnic_83xx_check_mbx_status(struct qlcnic_adapter *adapter) @@ -3896,12 +3896,12 @@ static void qlcnic_83xx_dequeue_mbx_cmd(struct qlcnic_adapter *adapter, { struct qlcnic_mailbox *mbx = adapter->ahw->mailbox; - spin_lock(&mbx->queue_lock); + spin_lock_bh(&mbx->queue_lock); list_del(&cmd->list); mbx->num_cmds--; - spin_unlock(&mbx->queue_lock); + spin_unlock_bh(&mbx->queue_lock); qlcnic_83xx_notify_cmd_completion(adapter, cmd); } @@ -3966,7 +3966,7 @@ static int qlcnic_83xx_enqueue_mbx_cmd(struct qlcnic_adapter *adapter, init_completion(&cmd->completion); cmd->rsp_opcode = QLC_83XX_MBX_RESPONSE_UNKNOWN; - spin_lock(&mbx->queue_lock); + spin_lock_bh(&mbx->queue_lock); list_add_tail(&cmd->list, &mbx->cmd_q); mbx->num_cmds++; @@ -3974,7 +3974,7 @@ static int qlcnic_83xx_enqueue_mbx_cmd(struct qlcnic_adapter *adapter, *timeout = cmd->total_cmds * QLC_83XX_MBX_TIMEOUT; queue_work(mbx->work_q, &mbx->work); - spin_unlock(&mbx->queue_lock); + spin_unlock_bh(&mbx->queue_lock); return 0; } @@ -4070,15 +4070,15 @@ static void qlcnic_83xx_mailbox_worker(struct work_struct *work) mbx->rsp_status = QLC_83XX_MBX_RESPONSE_WAIT; spin_unlock_irqrestore(&mbx->aen_lock, flags); - spin_lock(&mbx->queue_lock); + spin_lock_bh(&mbx->queue_lock); if (list_empty(head)) { - spin_unlock(&mbx->queue_lock); + spin_unlock_bh(&mbx->queue_lock); return; } cmd = list_entry(head->next, struct qlcnic_cmd_args, list); - spin_unlock(&mbx->queue_lock); + spin_unlock_bh(&mbx->queue_lock); mbx_ops->encode_cmd(adapter, cmd); mbx_ops->nofity_fw(adapter, QLC_83XX_MBX_REQUEST); From 9f2f873d5a1c3f6b22b18db0e4c9f3a30164b8f0 Mon Sep 17 00:00:00 2001 From: Kristian Evensen Date: Tue, 30 Jan 2018 14:12:55 +0100 Subject: [PATCH 325/705] qmi_wwan: Add support for Quectel EP06 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c0b91a56a2e57a5a370655b25d677ae0ebf8a2d0 ] The Quectel EP06 is a Cat. 6 LTE modem. It uses the same interface as the EC20/EC25 for QMI, and requires the same "set DTR"-quirk to work. Signed-off-by: Kristian Evensen Acked-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index db65d9ad4488..e1e5e8438457 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -944,6 +944,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x2c7c, 0x0125, 4)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */ {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */ + {QMI_QUIRK_SET_DTR(0x2c7c, 0x0306, 4)}, /* Quectel EP06 Mini PCIe */ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ From 5db5cabbf09dd793bf42e65cea9abe66fad62c13 Mon Sep 17 00:00:00 2001 From: Chunhao Lin Date: Wed, 31 Jan 2018 01:32:36 +0800 Subject: [PATCH 326/705] r8169: fix RTL8168EP take too long to complete driver initialization. [ Upstream commit 086ca23d03c0d2f4088f472386778d293e15c5f6 ] Driver check the wrong register bit in rtl_ocp_tx_cond() that keep driver waiting until timeout. Fix this by waiting for the right register bit. Signed-off-by: Chunhao Lin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 298b74ebc1e9..18e68c91e651 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -1387,7 +1387,7 @@ DECLARE_RTL_COND(rtl_ocp_tx_cond) { void __iomem *ioaddr = tp->mmio_addr; - return RTL_R8(IBISR0) & 0x02; + return RTL_R8(IBISR0) & 0x20; } static void rtl8168ep_stop_cmac(struct rtl8169_private *tp) @@ -1395,7 +1395,7 @@ static void rtl8168ep_stop_cmac(struct rtl8169_private *tp) void __iomem *ioaddr = tp->mmio_addr; RTL_W8(IBCR2, RTL_R8(IBCR2) & ~0x01); - rtl_msleep_loop_wait_low(tp, &rtl_ocp_tx_cond, 50, 2000); + rtl_msleep_loop_wait_high(tp, &rtl_ocp_tx_cond, 50, 2000); RTL_W8(IBISR0, RTL_R8(IBISR0) | 0x20); RTL_W8(IBCR0, RTL_R8(IBCR0) & ~0x01); } From ee46a8614204e136e7ec6426e9c696bd21901405 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Fri, 26 Jan 2018 16:40:41 +0800 Subject: [PATCH 327/705] tcp: release sk_frag.page in tcp_disconnect [ Upstream commit 9b42d55a66d388e4dd5550107df051a9637564fc ] socket can be disconnected and gets transformed back to a listening socket, if sk_frag.page is not released, which will be cloned into a new socket by sk_clone_lock, but the reference count of this page is increased, lead to a use after free or double free issue Signed-off-by: Li RongQing Cc: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7efa6b062049..0d1a767db1bb 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2316,6 +2316,12 @@ int tcp_disconnect(struct sock *sk, int flags) WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); + if (sk->sk_frag.page) { + put_page(sk->sk_frag.page); + sk->sk_frag.page = NULL; + sk->sk_frag.offset = 0; + } + sk->sk_error_report(sk); return err; } From 73adb3b74efd2809735d2063b8fdaaf762bdd071 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 25 Jan 2018 22:03:52 +0800 Subject: [PATCH 328/705] vhost_net: stop device during reset owner [ Upstream commit 4cd879515d686849eec5f718aeac62a70b067d82 ] We don't stop device before reset owner, this means we could try to serve any virtqueue kick before reset dev->worker. This will result a warn since the work was pending at llist during owner resetting. Fix this by stopping device during owner reset. Reported-by: syzbot+eb17c6162478cc50632c@syzkaller.appspotmail.com Fixes: 3a4d5c94e9593 ("vhost_net: a kernel-level virtio server") Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 96a0661011fd..e5b7652234fc 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1078,6 +1078,7 @@ static long vhost_net_reset_owner(struct vhost_net *n) } vhost_net_stop(n, &tx_sock, &rx_sock); vhost_net_flush(n); + vhost_dev_stop(&n->dev); vhost_dev_reset_owner(&n->dev, umem); vhost_net_vq_reset(n); done: From b980f718f52561b58072e0e675320fd21d71b94a Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Wed, 31 Jan 2018 15:43:05 -0500 Subject: [PATCH 329/705] tcp_bbr: fix pacing_gain to always be unity when using lt_bw [ Upstream commit 3aff3b4b986e51bcf4ab249e5d48d39596e0df6a ] This commit fixes the pacing_gain to remain at BBR_UNIT (1.0) when using lt_bw and returning from the PROBE_RTT state to PROBE_BW. Previously, when using lt_bw, upon exiting PROBE_RTT and entering PROBE_BW the bbr_reset_probe_bw_mode() code could sometimes randomly end up with a cycle_idx of 0 and hence have bbr_advance_cycle_phase() set a pacing gain above 1.0. In such cases this would result in a pacing rate that is 1.25x higher than intended, potentially resulting in a high loss rate for a little while until we stop using the lt_bw a bit later. This commit is a stable candidate for kernels back as far as 4.9. Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Soheil Hassas Yeganeh Reported-by: Beyers Cronje Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_bbr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index e86a34fd5484..8ec60532be2b 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -452,7 +452,8 @@ static void bbr_advance_cycle_phase(struct sock *sk) bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1); bbr->cycle_mstamp = tp->delivered_mstamp; - bbr->pacing_gain = bbr_pacing_gain[bbr->cycle_idx]; + bbr->pacing_gain = bbr->lt_use_bw ? BBR_UNIT : + bbr_pacing_gain[bbr->cycle_idx]; } /* Gain cycling: cycle pacing gain to converge to fair share of available bw. */ @@ -461,8 +462,7 @@ static void bbr_update_cycle_phase(struct sock *sk, { struct bbr *bbr = inet_csk_ca(sk); - if ((bbr->mode == BBR_PROBE_BW) && !bbr->lt_use_bw && - bbr_is_next_cycle_phase(sk, rs)) + if (bbr->mode == BBR_PROBE_BW && bbr_is_next_cycle_phase(sk, rs)) bbr_advance_cycle_phase(sk); } From fa46d1437fabe9da3a3c084cef6ac310ecadb9f8 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 2 Feb 2018 16:02:22 +0100 Subject: [PATCH 330/705] cls_u32: add missing RCU annotation. [ Upstream commit 058a6c033488494a6b1477b05fe8e1a16e344462 ] In a couple of points of the control path, n->ht_down is currently accessed without the required RCU annotation. The accesses are safe, but sparse complaints. Since we already held the rtnl lock, let use rtnl_dereference(). Fixes: a1b7c5fd7fe9 ("net: sched: add cls_u32 offload hooks for netdevs") Fixes: de5df63228fc ("net: sched: cls_u32 changes to knode must appear atomic to readers") Signed-off-by: Paolo Abeni Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_u32.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index ae83c3aec308..da574a16e7b3 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -496,6 +496,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, u32 flags) { + struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_u32_offload u32_offload = {0}; struct tc_to_netdev offload; @@ -520,7 +521,7 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, offload.cls_u32->knode.sel = &n->sel; offload.cls_u32->knode.exts = &n->exts; if (n->ht_down) - offload.cls_u32->knode.link_handle = n->ht_down->handle; + offload.cls_u32->knode.link_handle = ht->handle; err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &offload); @@ -788,8 +789,9 @@ static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c, static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, struct tc_u_knode *n) { - struct tc_u_knode *new; + struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); struct tc_u32_sel *s = &n->sel; + struct tc_u_knode *new; new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL); @@ -807,11 +809,11 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, new->fshift = n->fshift; new->res = n->res; new->flags = n->flags; - RCU_INIT_POINTER(new->ht_down, n->ht_down); + RCU_INIT_POINTER(new->ht_down, ht); /* bump reference count as long as we hold pointer to structure */ - if (new->ht_down) - new->ht_down->refcnt++; + if (ht) + ht->refcnt++; #ifdef CONFIG_CLS_U32_PERF /* Statistics may be incremented by readers during update From 5771415d24bf66f199a4a16d900d2c3afaaa6423 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 24 Jan 2018 23:15:27 -0800 Subject: [PATCH 331/705] ipv6: Fix SO_REUSEPORT UDP socket with implicit sk_ipv6only [ Upstream commit 7ece54a60ee2ba7a386308cae73c790bd580589c ] If a sk_v6_rcv_saddr is !IPV6_ADDR_ANY and !IPV6_ADDR_MAPPED, it implicitly implies it is an ipv6only socket. However, in inet6_bind(), this addr_type checking and setting sk->sk_ipv6only to 1 are only done after sk->sk_prot->get_port(sk, snum) has been completed successfully. This inconsistency between sk_v6_rcv_saddr and sk_ipv6only confuses the 'get_port()'. In particular, when binding SO_REUSEPORT UDP sockets, udp_reuseport_add_sock(sk,...) is called. udp_reuseport_add_sock() checks "ipv6_only_sock(sk2) == ipv6_only_sock(sk)" before adding sk to sk2->sk_reuseport_cb. In this case, ipv6_only_sock(sk2) could be 1 while ipv6_only_sock(sk) is still 0 here. The end result is, reuseport_alloc(sk) is called instead of adding sk to the existing sk2->sk_reuseport_cb. It can be reproduced by binding two SO_REUSEPORT UDP sockets on an IPv6 address (!ANY and !MAPPED). Only one of the socket will receive packet. The fix is to set the implicit sk_ipv6only before calling get_port(). The original sk_ipv6only has to be saved such that it can be restored in case get_port() failed. The situation is similar to the inet_reset_saddr(sk) after get_port() has failed. Thanks to Calvin Owens who created an easy reproduction which leads to a fix. Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection") Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/af_inet6.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 5cad76f87536..421379014995 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -274,6 +274,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct net *net = sock_net(sk); __be32 v4addr = 0; unsigned short snum; + bool saved_ipv6only; int addr_type = 0; int err = 0; @@ -378,19 +379,21 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (!(addr_type & IPV6_ADDR_MULTICAST)) np->saddr = addr->sin6_addr; + saved_ipv6only = sk->sk_ipv6only; + if (addr_type != IPV6_ADDR_ANY && addr_type != IPV6_ADDR_MAPPED) + sk->sk_ipv6only = 1; + /* Make sure we are allowed to bind here. */ if ((snum || !inet->bind_address_no_port) && sk->sk_prot->get_port(sk, snum)) { + sk->sk_ipv6only = saved_ipv6only; inet_reset_saddr(sk); err = -EADDRINUSE; goto out; } - if (addr_type != IPV6_ADDR_ANY) { + if (addr_type != IPV6_ADDR_ANY) sk->sk_userlocks |= SOCK_BINDADDR_LOCK; - if (addr_type != IPV6_ADDR_MAPPED) - sk->sk_ipv6only = 1; - } if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; inet->inet_sport = htons(inet->inet_num); From b671f40419bb0d59c5af69c17af5a86bc467a273 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 2 Feb 2018 10:27:27 -0800 Subject: [PATCH 332/705] soreuseport: fix mem leak in reuseport_add_sock() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4db428a7c9ab07e08783e0fcdc4ca0f555da0567 ] reuseport_add_sock() needs to deal with attaching a socket having its own sk_reuseport_cb, after a prior setsockopt(SO_ATTACH_REUSEPORT_?BPF) Without this fix, not only a WARN_ONCE() was issued, but we were also leaking memory. Thanks to sysbot and Eric Biggers for providing us nice C repros. ------------[ cut here ]------------ socket already in reuseport group WARNING: CPU: 0 PID: 3496 at net/core/sock_reuseport.c:119   reuseport_add_sock+0x742/0x9b0 net/core/sock_reuseport.c:117 Kernel panic - not syncing: panic_on_warn set ... CPU: 0 PID: 3496 Comm: syzkaller869503 Not tainted 4.15.0-rc6+ #245 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS   Google 01/01/2011 Call Trace:   __dump_stack lib/dump_stack.c:17 [inline]   dump_stack+0x194/0x257 lib/dump_stack.c:53   panic+0x1e4/0x41c kernel/panic.c:183   __warn+0x1dc/0x200 kernel/panic.c:547   report_bug+0x211/0x2d0 lib/bug.c:184   fixup_bug.part.11+0x37/0x80 arch/x86/kernel/traps.c:178   fixup_bug arch/x86/kernel/traps.c:247 [inline]   do_error_trap+0x2d7/0x3e0 arch/x86/kernel/traps.c:296   do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:315   invalid_op+0x22/0x40 arch/x86/entry/entry_64.S:1079 Fixes: ef456144da8e ("soreuseport: define reuseport groups") Signed-off-by: Eric Dumazet Reported-by: syzbot+c0ea2226f77a42936bf7@syzkaller.appspotmail.com Acked-by: Craig Gallek Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/sock_reuseport.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index 77f396b679ce..5dce4291f0ed 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -93,6 +93,16 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) return more_reuse; } +static void reuseport_free_rcu(struct rcu_head *head) +{ + struct sock_reuseport *reuse; + + reuse = container_of(head, struct sock_reuseport, rcu); + if (reuse->prog) + bpf_prog_destroy(reuse->prog); + kfree(reuse); +} + /** * reuseport_add_sock - Add a socket to the reuseport group of another. * @sk: New socket to add to the group. @@ -101,7 +111,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) */ int reuseport_add_sock(struct sock *sk, struct sock *sk2) { - struct sock_reuseport *reuse; + struct sock_reuseport *old_reuse, *reuse; if (!rcu_access_pointer(sk2->sk_reuseport_cb)) { int err = reuseport_alloc(sk2); @@ -112,10 +122,13 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2) spin_lock_bh(&reuseport_lock); reuse = rcu_dereference_protected(sk2->sk_reuseport_cb, - lockdep_is_held(&reuseport_lock)), - WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb, - lockdep_is_held(&reuseport_lock)), - "socket already in reuseport group"); + lockdep_is_held(&reuseport_lock)); + old_reuse = rcu_dereference_protected(sk->sk_reuseport_cb, + lockdep_is_held(&reuseport_lock)); + if (old_reuse && old_reuse->num_socks != 1) { + spin_unlock_bh(&reuseport_lock); + return -EBUSY; + } if (reuse->num_socks == reuse->max_socks) { reuse = reuseport_grow(reuse); @@ -133,19 +146,11 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2) spin_unlock_bh(&reuseport_lock); + if (old_reuse) + call_rcu(&old_reuse->rcu, reuseport_free_rcu); return 0; } -static void reuseport_free_rcu(struct rcu_head *head) -{ - struct sock_reuseport *reuse; - - reuse = container_of(head, struct sock_reuseport, rcu); - if (reuse->prog) - bpf_prog_destroy(reuse->prog); - kfree(reuse); -} - void reuseport_detach_sock(struct sock *sk) { struct sock_reuseport *reuse; From 0a9b2dec6c12e30895ab7478000420ff2e811dce Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 28 Sep 2017 16:58:26 -0500 Subject: [PATCH 333/705] x86/asm: Fix inline asm call constraints for GCC 4.4 commit 520a13c530aeb5f63e011d668c42db1af19ed349 upstream. The kernel test bot (run by Xiaolong Ye) reported that the following commit: f5caf621ee35 ("x86/asm: Fix inline asm call constraints for Clang") is causing double faults in a kernel compiled with GCC 4.4. Linus subsequently diagnosed the crash pattern and the buggy commit and found that the issue is with this code: register unsigned int __asm_call_sp asm("esp"); #define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp) Even on a 64-bit kernel, it's using ESP instead of RSP. That causes GCC to produce the following bogus code: ffffffff8147461d: 89 e0 mov %esp,%eax ffffffff8147461f: 4c 89 f7 mov %r14,%rdi ffffffff81474622: 4c 89 fe mov %r15,%rsi ffffffff81474625: ba 20 00 00 00 mov $0x20,%edx ffffffff8147462a: 89 c4 mov %eax,%esp ffffffff8147462c: e8 bf 52 05 00 callq ffffffff814c98f0 Despite the absurdity of it backing up and restoring the stack pointer for no reason, the bug is actually the fact that it's only backing up and restoring the lower 32 bits of the stack pointer. The upper 32 bits are getting cleared out, corrupting the stack pointer. So change the '__asm_call_sp' register variable to be associated with the actual full-size stack pointer. This also requires changing the __ASM_SEL() macro to be based on the actual compiled arch size, rather than the CONFIG value, because CONFIG_X86_64 compiles some files with '-m32' (e.g., realmode and vdso). Otherwise Clang fails to build the kernel because it complains about the use of a 64-bit register (RSP) in a 32-bit file. Reported-and-Bisected-and-Tested-by: kernel test robot Diagnosed-by: Linus Torvalds Signed-off-by: Josh Poimboeuf Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dmitriy Vyukov Cc: LKP Cc: Linus Torvalds Cc: Matthias Kaehlcke Cc: Miguel Bernal Marin Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: f5caf621ee35 ("x86/asm: Fix inline asm call constraints for Clang") Link: http://lkml.kernel.org/r/20170928215826.6sdpmwtkiydiytim@treble Signed-off-by: Ingo Molnar Cc: Matthias Kaehlcke Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/asm.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 00523524edbf..7bb29a416b77 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -11,10 +11,12 @@ # define __ASM_FORM_COMMA(x) " " #x "," #endif -#ifdef CONFIG_X86_32 +#ifndef __x86_64__ +/* 32 bit */ # define __ASM_SEL(a,b) __ASM_FORM(a) # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a) #else +/* 64 bit */ # define __ASM_SEL(a,b) __ASM_FORM(b) # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b) #endif From dd7b14c3e05ef49bc2062ccc18a703604063fe1d Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 18 Dec 2016 17:44:13 +0100 Subject: [PATCH 334/705] x86/microcode/AMD: Do not load when running on a hypervisor commit a15a753539eca8ba243d576f02e7ca9c4b7d7042 upstream. Doing so is completely void of sense for multiple reasons so prevent it. Set dis_ucode_ldr to true and thus disable the microcode loader by default to address xen pv guests which execute the AP path but not the BSP path. By having it turned off by default, the APs won't run into the loader either. Also, check CPUID(1).ECX[31] which hypervisors set. Well almost, not the xen pv one. That one gets the aforementioned "fix". Also, improve the detection method by caching the final decision whether to continue loading in dis_ucode_ldr and do it once on the BSP. The APs then simply test that value. Signed-off-by: Borislav Petkov Tested-by: Juergen Gross Tested-by: Boris Ostrovsky Acked-by: Juergen Gross Link: http://lkml.kernel.org/r/20161218164414.9649-4-bp@alien8.de Signed-off-by: Thomas Gleixner Cc: Rolf Neugebauer Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/microcode/core.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 5ce5155f0695..dc0b9f8763ad 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -43,7 +43,7 @@ #define MICROCODE_VERSION "2.01" static struct microcode_ops *microcode_ops; -static bool dis_ucode_ldr; +static bool dis_ucode_ldr = true; /* * Synchronization. @@ -73,6 +73,7 @@ struct cpu_info_ctx { static bool __init check_loader_disabled_bsp(void) { static const char *__dis_opt_str = "dis_ucode_ldr"; + u32 a, b, c, d; #ifdef CONFIG_X86_32 const char *cmdline = (const char *)__pa_nodebug(boot_command_line); @@ -85,8 +86,23 @@ static bool __init check_loader_disabled_bsp(void) bool *res = &dis_ucode_ldr; #endif - if (cmdline_find_option_bool(cmdline, option)) - *res = true; + if (!have_cpuid_p()) + return *res; + + a = 1; + c = 0; + native_cpuid(&a, &b, &c, &d); + + /* + * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not + * completely accurate as xen pv guests don't see that CPUID bit set but + * that's good enough as they don't land on the BSP path anyway. + */ + if (c & BIT(31)) + return *res; + + if (cmdline_find_option_bool(cmdline, option) <= 0) + *res = false; return *res; } @@ -118,9 +134,6 @@ void __init load_ucode_bsp(void) if (check_loader_disabled_bsp()) return; - if (!have_cpuid_p()) - return; - vendor = x86_cpuid_vendor(); family = x86_cpuid_family(); @@ -154,9 +167,6 @@ void load_ucode_ap(void) if (check_loader_disabled_ap()) return; - if (!have_cpuid_p()) - return; - vendor = x86_cpuid_vendor(); family = x86_cpuid_family(); From 113d22965c78a79210d4da2d455dc9bcff5e2fb6 Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Mon, 20 Nov 2017 15:56:28 -0500 Subject: [PATCH 335/705] media: soc_camera: soc_scale_crop: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE commit 5331aec1bf9c9da557668174e0a4bfcee39f1121 upstream. This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in drivers/media/platform/soc_camera/soc_scale_crop.o see include/linux/module.h for more information This adds the license as "GPL", which matches the header of the file. MODULE_DESCRIPTION and MODULE_AUTHOR are also added. Signed-off-by: Jesse Chan Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/soc_camera/soc_scale_crop.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/media/platform/soc_camera/soc_scale_crop.c b/drivers/media/platform/soc_camera/soc_scale_crop.c index f77252d6ccd3..d29c24854c2c 100644 --- a/drivers/media/platform/soc_camera/soc_scale_crop.c +++ b/drivers/media/platform/soc_camera/soc_scale_crop.c @@ -418,3 +418,7 @@ void soc_camera_calc_client_output(struct soc_camera_device *icd, mf->height = soc_camera_shift_scale(rect->height, shift, scale_v); } EXPORT_SYMBOL(soc_camera_calc_client_output); + +MODULE_DESCRIPTION("soc-camera scaling-cropping functions"); +MODULE_AUTHOR("Guennadi Liakhovetski "); +MODULE_LICENSE("GPL"); From 0a01ecbd23a9547b60b1a1d7e83f60704b176925 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 4 May 2017 11:27:52 +0200 Subject: [PATCH 336/705] b43: Add missing MODULE_FIRMWARE() commit 3c89a72ad80c64bdbd5ff851ee9c328a191f7e01 upstream. Some firmware entries were forgotten to be added via MODULE_FIRMWARE(), which may result in the non-functional state when the driver is loaded in initrd. Link: http://bugzilla.opensuse.org/show_bug.cgi?id=1037344 Fixes: 15be8e89cdd9 ("b43: add more bcma cores") Signed-off-by: Takashi Iwai Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/broadcom/b43/main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/wireless/broadcom/b43/main.c b/drivers/net/wireless/broadcom/b43/main.c index 6e5d9095b195..a635fc6b1722 100644 --- a/drivers/net/wireless/broadcom/b43/main.c +++ b/drivers/net/wireless/broadcom/b43/main.c @@ -71,8 +71,18 @@ MODULE_FIRMWARE("b43/ucode11.fw"); MODULE_FIRMWARE("b43/ucode13.fw"); MODULE_FIRMWARE("b43/ucode14.fw"); MODULE_FIRMWARE("b43/ucode15.fw"); +MODULE_FIRMWARE("b43/ucode16_lp.fw"); MODULE_FIRMWARE("b43/ucode16_mimo.fw"); +MODULE_FIRMWARE("b43/ucode24_lcn.fw"); +MODULE_FIRMWARE("b43/ucode25_lcn.fw"); +MODULE_FIRMWARE("b43/ucode25_mimo.fw"); +MODULE_FIRMWARE("b43/ucode26_mimo.fw"); +MODULE_FIRMWARE("b43/ucode29_mimo.fw"); +MODULE_FIRMWARE("b43/ucode33_lcn40.fw"); +MODULE_FIRMWARE("b43/ucode30_mimo.fw"); MODULE_FIRMWARE("b43/ucode5.fw"); +MODULE_FIRMWARE("b43/ucode40.fw"); +MODULE_FIRMWARE("b43/ucode42.fw"); MODULE_FIRMWARE("b43/ucode9.fw"); static int modparam_bad_frames_preempt; From 9692602ab850eec484d8cc5a740803d34f00016c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 8 Jun 2017 14:48:18 +0100 Subject: [PATCH 337/705] KEYS: encrypted: fix buffer overread in valid_master_desc() commit 794b4bc292f5d31739d89c0202c54e7dc9bc3add upstream. With the 'encrypted' key type it was possible for userspace to provide a data blob ending with a master key description shorter than expected, e.g. 'keyctl add encrypted desc "new x" @s'. When validating such a master key description, validate_master_desc() could read beyond the end of the buffer. Fix this by using strncmp() instead of memcmp(). [Also clean up the code to deduplicate some logic.] Cc: Mimi Zohar Signed-off-by: Eric Biggers Signed-off-by: David Howells Signed-off-by: James Morris Signed-off-by: Jin Qian Signed-off-by: Greg Kroah-Hartman --- security/keys/encrypted-keys/encrypted.c | 31 ++++++++++++------------ 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index a871159bf03c..ead2fd60244d 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -141,23 +141,22 @@ static int valid_ecryptfs_desc(const char *ecryptfs_desc) */ static int valid_master_desc(const char *new_desc, const char *orig_desc) { - if (!memcmp(new_desc, KEY_TRUSTED_PREFIX, KEY_TRUSTED_PREFIX_LEN)) { - if (strlen(new_desc) == KEY_TRUSTED_PREFIX_LEN) - goto out; - if (orig_desc) - if (memcmp(new_desc, orig_desc, KEY_TRUSTED_PREFIX_LEN)) - goto out; - } else if (!memcmp(new_desc, KEY_USER_PREFIX, KEY_USER_PREFIX_LEN)) { - if (strlen(new_desc) == KEY_USER_PREFIX_LEN) - goto out; - if (orig_desc) - if (memcmp(new_desc, orig_desc, KEY_USER_PREFIX_LEN)) - goto out; - } else - goto out; + int prefix_len; + + if (!strncmp(new_desc, KEY_TRUSTED_PREFIX, KEY_TRUSTED_PREFIX_LEN)) + prefix_len = KEY_TRUSTED_PREFIX_LEN; + else if (!strncmp(new_desc, KEY_USER_PREFIX, KEY_USER_PREFIX_LEN)) + prefix_len = KEY_USER_PREFIX_LEN; + else + return -EINVAL; + + if (!new_desc[prefix_len]) + return -EINVAL; + + if (orig_desc && strncmp(new_desc, orig_desc, prefix_len)) + return -EINVAL; + return 0; -out: - return -EINVAL; } /* From 734e687d1d7bcddf2548eeb5a4935a186f452b69 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 22 Jan 2018 17:09:34 -0500 Subject: [PATCH 338/705] x86/retpoline: Remove the esp/rsp thunk (cherry picked from commit 1df37383a8aeabb9b418698f0bcdffea01f4b1b2) It doesn't make sense to have an indirect call thunk with esp/rsp as retpoline code won't work correctly with the stack pointer register. Removing it will help compiler writers to catch error in case such a thunk call is emitted incorrectly. Fixes: 76b043848fd2 ("x86/retpoline: Add initial retpoline support") Suggested-by: Jeff Law Signed-off-by: Waiman Long Signed-off-by: Thomas Gleixner Acked-by: David Woodhouse Cc: Tom Lendacky Cc: Kees Cook Cc: Andi Kleen Cc: Tim Chen Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Josh Poimboeuf Cc: Arjan van de Ven Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1516658974-27852-1-git-send-email-longman@redhat.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/asm-prototypes.h | 1 - arch/x86/lib/retpoline.S | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index b15aa4083dfd..5a25ada75aeb 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -37,5 +37,4 @@ INDIRECT_THUNK(dx) INDIRECT_THUNK(si) INDIRECT_THUNK(di) INDIRECT_THUNK(bp) -INDIRECT_THUNK(sp) #endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index dfb2ba91b670..c909961e678a 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -36,7 +36,6 @@ GENERATE_THUNK(_ASM_DX) GENERATE_THUNK(_ASM_SI) GENERATE_THUNK(_ASM_DI) GENERATE_THUNK(_ASM_BP) -GENERATE_THUNK(_ASM_SP) #ifdef CONFIG_64BIT GENERATE_THUNK(r8) GENERATE_THUNK(r9) From fea3c9a54012227b30e73f5fb3d132c103c7aa84 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Jan 2018 10:58:13 +0100 Subject: [PATCH 339/705] KVM: x86: Make indirect calls in emulator speculation safe (cherry picked from commit 1a29b5b7f347a1a9230c1e0af5b37e3e571588ab) Replace the indirect calls with CALL_NOSPEC. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: David Woodhouse Cc: Andrea Arcangeli Cc: Andi Kleen Cc: Ashok Raj Cc: Greg KH Cc: Jun Nakajima Cc: David Woodhouse Cc: Linus Torvalds Cc: rga@amazon.de Cc: Dave Hansen Cc: Asit Mallick Cc: Andy Lutomirski Cc: Josh Poimboeuf Cc: Jason Baron Cc: Paolo Bonzini Cc: Dan Williams Cc: Arjan Van De Ven Cc: Tim Chen Link: https://lkml.kernel.org/r/20180125095843.595615683@infradead.org [dwmw2: Use ASM_CALL_CONSTRAINT like upstream, now we have it] Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 6f5a3b076341..c8d573822e60 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "x86.h" #include "tss.h" @@ -1012,8 +1013,8 @@ static __always_inline u8 test_cc(unsigned int condition, unsigned long flags) void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf); flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; - asm("push %[flags]; popf; call *%[fastop]" - : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags)); + asm("push %[flags]; popf; " CALL_NOSPEC + : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags)); return rc; } @@ -5306,15 +5307,14 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt, static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) { - register void *__sp asm(_ASM_SP); ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; if (!(ctxt->d & ByteOp)) fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; - asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n" + asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n" : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags), - [fastop]"+S"(fop), "+r"(__sp) + [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT : "c"(ctxt->src2.val)); ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); From ec86a1dad0c00757ceb49e86b4c10dd7fbc380cb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Jan 2018 10:58:14 +0100 Subject: [PATCH 340/705] KVM: VMX: Make indirect call speculation safe (cherry picked from commit c940a3fb1e2e9b7d03228ab28f375fb5a47ff699) Replace indirect call with CALL_NOSPEC. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: David Woodhouse Cc: Andrea Arcangeli Cc: Andi Kleen Cc: Ashok Raj Cc: Greg KH Cc: Jun Nakajima Cc: David Woodhouse Cc: Linus Torvalds Cc: rga@amazon.de Cc: Dave Hansen Cc: Asit Mallick Cc: Andy Lutomirski Cc: Josh Poimboeuf Cc: Jason Baron Cc: Paolo Bonzini Cc: Dan Williams Cc: Arjan Van De Ven Cc: Tim Chen Link: https://lkml.kernel.org/r/20180125095843.645776917@infradead.org Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 178a344f55f8..fcdcc8802677 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8676,14 +8676,14 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) #endif "pushf\n\t" __ASM_SIZE(push) " $%c[cs]\n\t" - "call *%[entry]\n\t" + CALL_NOSPEC : #ifdef CONFIG_X86_64 [sp]"=&r"(tmp), #endif "+r"(__sp) : - [entry]"r"(entry), + THUNK_TARGET(entry), [ss]"i"(__KERNEL_DS), [cs]"i"(__KERNEL_CS) ); From a1745ad92f50e95b6c2d101cd9b77253969ddccc Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 25 Jan 2018 15:50:28 -0800 Subject: [PATCH 341/705] module/retpoline: Warn about missing retpoline in module (cherry picked from commit caf7501a1b4ec964190f31f9c3f163de252273b8) There's a risk that a kernel which has full retpoline mitigations becomes vulnerable when a module gets loaded that hasn't been compiled with the right compiler or the right option. To enable detection of that mismatch at module load time, add a module info string "retpoline" at build time when the module was compiled with retpoline support. This only covers compiled C source, but assembler source or prebuilt object files are not checked. If a retpoline enabled kernel detects a non retpoline protected module at load time, print a warning and report it in the sysfs vulnerability file. [ tglx: Massaged changelog ] Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Cc: David Woodhouse Cc: gregkh@linuxfoundation.org Cc: torvalds@linux-foundation.org Cc: jeyu@kernel.org Cc: arjan@linux.intel.com Link: https://lkml.kernel.org/r/20180125235028.31211-1-andi@firstfloor.org Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 17 ++++++++++++++++- include/linux/module.h | 9 +++++++++ kernel/module.c | 11 +++++++++++ scripts/mod/modpost.c | 9 +++++++++ 4 files changed, 45 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 8cacf62ec458..4cea7d49b363 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -92,6 +93,19 @@ static const char *spectre_v2_strings[] = { #define pr_fmt(fmt) "Spectre V2 mitigation: " fmt static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; +static bool spectre_v2_bad_module; + +#ifdef RETPOLINE +bool retpoline_module_ok(bool has_retpoline) +{ + if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline) + return true; + + pr_err("System may be vunerable to spectre v2\n"); + spectre_v2_bad_module = true; + return false; +} +#endif static void __init spec2_print_if_insecure(const char *reason) { @@ -277,6 +291,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return sprintf(buf, "Not affected\n"); - return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]); + return sprintf(buf, "%s%s\n", spectre_v2_strings[spectre_v2_enabled], + spectre_v2_bad_module ? " - vulnerable module loaded" : ""); } #endif diff --git a/include/linux/module.h b/include/linux/module.h index 0c3207d26ac0..d2224a09b4b5 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -791,6 +791,15 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr, static inline void module_bug_cleanup(struct module *mod) {} #endif /* CONFIG_GENERIC_BUG */ +#ifdef RETPOLINE +extern bool retpoline_module_ok(bool has_retpoline); +#else +static inline bool retpoline_module_ok(bool has_retpoline) +{ + return true; +} +#endif + #ifdef CONFIG_MODULE_SIG static inline bool module_sig_ok(struct module *module) { diff --git a/kernel/module.c b/kernel/module.c index 0e54d5bf0097..07bfb9971f2f 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2817,6 +2817,15 @@ static int check_modinfo_livepatch(struct module *mod, struct load_info *info) } #endif /* CONFIG_LIVEPATCH */ +static void check_modinfo_retpoline(struct module *mod, struct load_info *info) +{ + if (retpoline_module_ok(get_modinfo(info, "retpoline"))) + return; + + pr_warn("%s: loading module not compiled with retpoline compiler.\n", + mod->name); +} + /* Sets info->hdr and info->len. */ static int copy_module_from_user(const void __user *umod, unsigned long len, struct load_info *info) @@ -2969,6 +2978,8 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags) add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK); } + check_modinfo_retpoline(mod, info); + if (get_modinfo(info, "staging")) { add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK); pr_warn("%s: module is from the staging directory, the quality " diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 845eb9b800f3..238db4ffd30c 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -2130,6 +2130,14 @@ static void add_intree_flag(struct buffer *b, int is_intree) buf_printf(b, "\nMODULE_INFO(intree, \"Y\");\n"); } +/* Cannot check for assembler */ +static void add_retpoline(struct buffer *b) +{ + buf_printf(b, "\n#ifdef RETPOLINE\n"); + buf_printf(b, "MODULE_INFO(retpoline, \"Y\");\n"); + buf_printf(b, "#endif\n"); +} + static void add_staging_flag(struct buffer *b, const char *name) { static const char *staging_dir = "drivers/staging"; @@ -2474,6 +2482,7 @@ int main(int argc, char **argv) add_header(&buf, mod); add_intree_flag(&buf, !external_module); + add_retpoline(&buf); add_staging_flag(&buf, mod->name); err |= add_versions(&buf, mod); add_depends(&buf, mod, modules); From d3eba7744075dc55f367364b5ed2055b5e3d5687 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 25 Jan 2018 16:14:09 +0000 Subject: [PATCH 342/705] x86/cpufeatures: Add CPUID_7_EDX CPUID leaf (cherry picked from commit 95ca0ee8636059ea2800dfbac9ecac6212d6b38f) This is a pure feature bits leaf. There are two AVX512 feature bits in it already which were handled as scattered bits, and three more from this leaf are going to be added for speculation control features. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Borislav Petkov Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-2-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeature.h | 7 +++++-- arch/x86/include/asm/cpufeatures.h | 10 ++++++---- arch/x86/include/asm/disabled-features.h | 3 ++- arch/x86/include/asm/required-features.h | 3 ++- arch/x86/kernel/cpu/common.c | 1 + arch/x86/kernel/cpu/scattered.c | 2 -- 6 files changed, 16 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 9ea67a04ff4f..8c101579f535 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -28,6 +28,7 @@ enum cpuid_leafs CPUID_8000_000A_EDX, CPUID_7_ECX, CPUID_8000_0007_EBX, + CPUID_7_EDX, }; #ifdef CONFIG_X86_FEATURE_NAMES @@ -78,8 +79,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ REQUIRED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 18)) + BUILD_BUG_ON_ZERO(NCAPINTS != 19)) #define DISABLED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ @@ -100,8 +102,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ DISABLED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 18)) + BUILD_BUG_ON_ZERO(NCAPINTS != 19)) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 8537a21acd8b..9d4a422d5598 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -12,7 +12,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 18 /* N 32-bit words worth of info */ +#define NCAPINTS 19 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ /* @@ -197,9 +197,7 @@ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ -#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ -#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ @@ -295,6 +293,10 @@ #define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */ #define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */ +/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ +#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ +#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ + /* * BUG word(s) */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 21c5ac15657b..1f8cca459c6c 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -59,6 +59,7 @@ #define DISABLED_MASK15 0 #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE) #define DISABLED_MASK17 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) +#define DISABLED_MASK18 0 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index fac9a5c0abe9..6847d85400a8 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -100,6 +100,7 @@ #define REQUIRED_MASK15 0 #define REQUIRED_MASK16 0 #define REQUIRED_MASK17 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) +#define REQUIRED_MASK18 0 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d198ae02f2b7..426727318ea2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -737,6 +737,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); c->x86_capability[CPUID_7_0_EBX] = ebx; c->x86_capability[CPUID_7_ECX] = ecx; + c->x86_capability[CPUID_7_EDX] = edx; } /* Extended state features: level 0x0000000d */ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index b0dd9aec183d..afbb52532791 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -31,8 +31,6 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) const struct cpuid_bit *cb; static const struct cpuid_bit cpuid_bits[] = { - { X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 }, - { X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 }, { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 }, From 40532f65cccc5056b50cf1ab07a9a41445b24aa8 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 25 Jan 2018 16:14:10 +0000 Subject: [PATCH 343/705] x86/cpufeatures: Add Intel feature bits for Speculation Control (cherry picked from commit fc67dd70adb711a45d2ef34e12d1a8be75edde61) Add three feature bits exposed by new microcode on Intel CPUs for speculation control. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Borislav Petkov Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-3-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 9d4a422d5598..1f038882bf32 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -296,6 +296,9 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */ +#define X86_FEATURE_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ /* * BUG word(s) From c26a6bea26b356fb3539cdf5b2e348a5e528aa7b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 25 Jan 2018 16:14:11 +0000 Subject: [PATCH 344/705] x86/cpufeatures: Add AMD feature bits for Speculation Control (cherry picked from commit 5d10cbc91d9eb5537998b65608441b592eec65e7) AMD exposes the PRED_CMD/SPEC_CTRL MSRs slightly differently to Intel. See http://lkml.kernel.org/r/2b3e25cc-286d-8bd0-aeaf-9ac4aae39de8@amd.com Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Cc: Tom Lendacky Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-4-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 1f038882bf32..c4d03e70086b 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -258,6 +258,9 @@ /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ +#define X86_FEATURE_AMD_PRED_CMD (13*32+12) /* Prediction Command MSR (AMD) */ +#define X86_FEATURE_AMD_SPEC_CTRL (13*32+14) /* Speculation Control MSR only (AMD) */ +#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors (AMD) */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ From af57d43c908fe9b06dc555b400eca68562262eca Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 25 Jan 2018 16:14:12 +0000 Subject: [PATCH 345/705] x86/msr: Add definitions for new speculation control MSRs (cherry picked from commit 1e340c60d0dd3ae07b5bedc16a0469c14b9f3410) Add MSR and bit definitions for SPEC_CTRL, PRED_CMD and ARCH_CAPABILITIES. See Intel's 336996-Speculative-Execution-Side-Channel-Mitigations.pdf Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-5-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b11c4c072df8..c768bc1550a1 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -37,6 +37,13 @@ #define EFER_FFXSR (1<<_EFER_FFXSR) /* Intel MSRs. Some also available on other CPUs */ +#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ +#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ +#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ + +#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ +#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ + #define MSR_IA32_PERFCTR0 0x000000c1 #define MSR_IA32_PERFCTR1 0x000000c2 #define MSR_FSB_FREQ 0x000000cd @@ -50,6 +57,11 @@ #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) #define MSR_MTRRcap 0x000000fe + +#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a +#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ +#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ + #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e From a8799fd14d9f7f385a5a5c86cde247caf4bb0320 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 25 Jan 2018 16:14:13 +0000 Subject: [PATCH 346/705] x86/pti: Do not enable PTI on CPUs which are not vulnerable to Meltdown (cherry picked from commit fec9434a12f38d3aeafeb75711b71d8a1fdef621) Also, for CPUs which don't speculate at all, don't report that they're vulnerable to the Spectre variants either. Leave the cpu_no_meltdown[] match table with just X86_VENDOR_AMD in it for now, even though that could be done with a simple comparison, on the assumption that we'll have more to add. Based on suggestions from Dave Hansen and Alan Cox. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Borislav Petkov Acked-by: Dave Hansen Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-6-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 48 ++++++++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 426727318ea2..cfa026f323cf 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -44,6 +44,8 @@ #include #include #include +#include +#include #ifdef CONFIG_X86_LOCAL_APIC #include @@ -838,6 +840,41 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #endif } +static const __initdata struct x86_cpu_id cpu_no_speculation[] = { + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY }, + { X86_VENDOR_CENTAUR, 5 }, + { X86_VENDOR_INTEL, 5 }, + { X86_VENDOR_NSC, 5 }, + { X86_VENDOR_ANY, 4 }, + {} +}; + +static const __initdata struct x86_cpu_id cpu_no_meltdown[] = { + { X86_VENDOR_AMD }, + {} +}; + +static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c) +{ + u64 ia32_cap = 0; + + if (x86_match_cpu(cpu_no_meltdown)) + return false; + + if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); + + /* Rogue Data Cache Load? No! */ + if (ia32_cap & ARCH_CAP_RDCL_NO) + return false; + + return true; +} + /* * Do minimum CPU detection early. * Fields really needed: vendor, cpuid_level, family, model, mask, @@ -884,11 +921,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_cap(X86_FEATURE_ALWAYS); - if (c->x86_vendor != X86_VENDOR_AMD) - setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); - - setup_force_cpu_bug(X86_BUG_SPECTRE_V1); - setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + if (!x86_match_cpu(cpu_no_speculation)) { + if (cpu_vulnerable_to_meltdown(c)) + setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); + setup_force_cpu_bug(X86_BUG_SPECTRE_V1); + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + } fpu__init_system(c); From 6c5e49150a51b0210710246b61a972300a274dcd Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 25 Jan 2018 16:14:14 +0000 Subject: [PATCH 347/705] x86/cpufeature: Blacklist SPEC_CTRL/PRED_CMD on early Spectre v2 microcodes (cherry picked from commit a5b2966364538a0e68c9fa29bc0a3a1651799035) This doesn't refuse to load the affected microcodes; it just refuses to use the Spectre v2 mitigation features if they're detected, by clearing the appropriate feature bits. The AMD CPUID bits are handled here too, because hypervisors *may* have been exposing those bits even on Intel chips, for fine-grained control of what's available. It is non-trivial to use x86_match_cpu() for this table because that doesn't handle steppings. And the approach taken in commit bd9240a18 almost made me lose my lunch. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-7-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/intel-family.h | 7 ++- arch/x86/kernel/cpu/intel.c | 66 +++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 34a46dc076d3..75b748a1deb8 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -12,6 +12,7 @@ */ #define INTEL_FAM6_CORE_YONAH 0x0E + #define INTEL_FAM6_CORE2_MEROM 0x0F #define INTEL_FAM6_CORE2_MEROM_L 0x16 #define INTEL_FAM6_CORE2_PENRYN 0x17 @@ -21,6 +22,7 @@ #define INTEL_FAM6_NEHALEM_G 0x1F /* Auburndale / Havendale */ #define INTEL_FAM6_NEHALEM_EP 0x1A #define INTEL_FAM6_NEHALEM_EX 0x2E + #define INTEL_FAM6_WESTMERE 0x25 #define INTEL_FAM6_WESTMERE_EP 0x2C #define INTEL_FAM6_WESTMERE_EX 0x2F @@ -36,9 +38,9 @@ #define INTEL_FAM6_HASWELL_GT3E 0x46 #define INTEL_FAM6_BROADWELL_CORE 0x3D -#define INTEL_FAM6_BROADWELL_XEON_D 0x56 #define INTEL_FAM6_BROADWELL_GT3E 0x47 #define INTEL_FAM6_BROADWELL_X 0x4F +#define INTEL_FAM6_BROADWELL_XEON_D 0x56 #define INTEL_FAM6_SKYLAKE_MOBILE 0x4E #define INTEL_FAM6_SKYLAKE_DESKTOP 0x5E @@ -57,9 +59,10 @@ #define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */ #define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */ #define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */ -#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Annidale */ +#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */ #define INTEL_FAM6_ATOM_GOLDMONT 0x5C #define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */ +#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A /* Xeon Phi */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index fcd484d2bb03..4d23d787a217 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -61,6 +61,59 @@ void check_mpx_erratum(struct cpuinfo_x86 *c) } } +/* + * Early microcode releases for the Spectre v2 mitigation were broken. + * Information taken from; + * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf + * - https://kb.vmware.com/s/article/52345 + * - Microcode revisions observed in the wild + * - Release note from 20180108 microcode release + */ +struct sku_microcode { + u8 model; + u8 stepping; + u32 microcode; +}; +static const struct sku_microcode spectre_bad_microcodes[] = { + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 }, + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 }, + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 }, + { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 }, + { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 }, + { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, + { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, + { INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 }, + { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, + { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, + { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, + { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, + { INTEL_FAM6_BROADWELL_XEON_D, 0x03, 0x07000011 }, + { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 }, + { INTEL_FAM6_HASWELL_ULT, 0x01, 0x21 }, + { INTEL_FAM6_HASWELL_GT3E, 0x01, 0x18 }, + { INTEL_FAM6_HASWELL_CORE, 0x03, 0x23 }, + { INTEL_FAM6_HASWELL_X, 0x02, 0x3b }, + { INTEL_FAM6_HASWELL_X, 0x04, 0x10 }, + { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a }, + /* Updated in the 20180108 release; blacklist until we know otherwise */ + { INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 }, + /* Observed in the wild */ + { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b }, + { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 }, +}; + +static bool bad_spectre_microcode(struct cpuinfo_x86 *c) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { + if (c->x86_model == spectre_bad_microcodes[i].model && + c->x86_mask == spectre_bad_microcodes[i].stepping) + return (c->microcode <= spectre_bad_microcodes[i].microcode); + } + return false; +} + static void early_init_intel(struct cpuinfo_x86 *c) { u64 misc_enable; @@ -87,6 +140,19 @@ static void early_init_intel(struct cpuinfo_x86 *c) rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode); } + if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || + cpu_has(c, X86_FEATURE_STIBP) || + cpu_has(c, X86_FEATURE_AMD_SPEC_CTRL) || + cpu_has(c, X86_FEATURE_AMD_PRED_CMD) || + cpu_has(c, X86_FEATURE_AMD_STIBP)) && bad_spectre_microcode(c)) { + pr_warn("Intel Spectre v2 broken microcode detected; disabling SPEC_CTRL\n"); + clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL); + clear_cpu_cap(c, X86_FEATURE_STIBP); + clear_cpu_cap(c, X86_FEATURE_AMD_SPEC_CTRL); + clear_cpu_cap(c, X86_FEATURE_AMD_PRED_CMD); + clear_cpu_cap(c, X86_FEATURE_AMD_STIBP); + } + /* * Atom erratum AAE44/AAF40/AAG38/AAH41: * From 31fd9eda7f69e0be0d0410682fc0d4cd76fe3699 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 25 Jan 2018 16:14:15 +0000 Subject: [PATCH 348/705] x86/speculation: Add basic IBPB (Indirect Branch Prediction Barrier) support (cherry picked from commit 20ffa1caecca4db8f79fe665acdeaa5af815a24d) Expose indirect_branch_prediction_barrier() for use in subsequent patches. [ tglx: Add IBPB status to spectre_v2 sysfs file ] Co-developed-by: KarimAllah Ahmed Signed-off-by: KarimAllah Ahmed Signed-off-by: David Woodhouse Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-8-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 2 ++ arch/x86/include/asm/nospec-branch.h | 13 +++++++++++++ arch/x86/kernel/cpu/bugs.c | 10 +++++++++- 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index c4d03e70086b..390154576dad 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -202,6 +202,8 @@ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ +#define X86_FEATURE_IBPB ( 7*32+21) /* Indirect Branch Prediction Barrier enabled*/ + /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 4ad41087ce0e..34e384c7208f 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -218,5 +218,18 @@ static inline void vmexit_fill_RSB(void) #endif } +static inline void indirect_branch_prediction_barrier(void) +{ + asm volatile(ALTERNATIVE("", + "movl %[msr], %%ecx\n\t" + "movl %[val], %%eax\n\t" + "movl $0, %%edx\n\t" + "wrmsr", + X86_FEATURE_IBPB) + : : [msr] "i" (MSR_IA32_PRED_CMD), + [val] "i" (PRED_CMD_IBPB) + : "eax", "ecx", "edx", "memory"); +} + #endif /* __ASSEMBLY__ */ #endif /* __NOSPEC_BRANCH_H__ */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 4cea7d49b363..1c4b39de49bb 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -262,6 +262,13 @@ retpoline_auto: setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); pr_info("Filling RSB on context switch\n"); } + + /* Initialize Indirect Branch Prediction Barrier if supported */ + if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) || + boot_cpu_has(X86_FEATURE_AMD_PRED_CMD)) { + setup_force_cpu_cap(X86_FEATURE_IBPB); + pr_info("Enabling Indirect Branch Prediction Barrier\n"); + } } #undef pr_fmt @@ -291,7 +298,8 @@ ssize_t cpu_show_spectre_v2(struct device *dev, if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return sprintf(buf, "Not affected\n"); - return sprintf(buf, "%s%s\n", spectre_v2_strings[spectre_v2_enabled], + return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + boot_cpu_has(X86_FEATURE_IBPB) ? ", IPBP" : "", spectre_v2_bad_module ? " - vulnerable module loaded" : ""); } #endif From 18bc71dff630283333ccea760efb398dbe282a72 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 26 Jan 2018 13:11:37 +0100 Subject: [PATCH 349/705] x86/nospec: Fix header guards names (cherry picked from commit 7a32fc51ca938e67974cbb9db31e1a43f98345a9) ... to adhere to the _ASM_X86_ naming scheme. No functional change. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: riel@redhat.com Cc: ak@linux.intel.com Cc: peterz@infradead.org Cc: David Woodhouse Cc: jikos@kernel.org Cc: luto@amacapital.net Cc: dave.hansen@intel.com Cc: torvalds@linux-foundation.org Cc: keescook@google.com Cc: Josh Poimboeuf Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Cc: pjt@google.com Link: https://lkml.kernel.org/r/20180126121139.31959-3-bp@alien8.de Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 34e384c7208f..865192a2cc31 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __NOSPEC_BRANCH_H__ -#define __NOSPEC_BRANCH_H__ +#ifndef _ASM_X86_NOSPEC_BRANCH_H_ +#define _ASM_X86_NOSPEC_BRANCH_H_ #include #include @@ -232,4 +232,4 @@ static inline void indirect_branch_prediction_barrier(void) } #endif /* __ASSEMBLY__ */ -#endif /* __NOSPEC_BRANCH_H__ */ +#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ From 557cbfa2221110761d6c26085c6df43d48425598 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 26 Jan 2018 13:11:39 +0100 Subject: [PATCH 350/705] x86/bugs: Drop one "mitigation" from dmesg (cherry picked from commit 55fa19d3e51f33d9cd4056d25836d93abf9438db) Make [ 0.031118] Spectre V2 mitigation: Mitigation: Full generic retpoline into [ 0.031118] Spectre V2: Mitigation: Full generic retpoline to reduce the mitigation mitigations strings. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Cc: riel@redhat.com Cc: ak@linux.intel.com Cc: peterz@infradead.org Cc: David Woodhouse Cc: jikos@kernel.org Cc: luto@amacapital.net Cc: dave.hansen@intel.com Cc: torvalds@linux-foundation.org Cc: keescook@google.com Cc: Josh Poimboeuf Cc: tim.c.chen@linux.intel.com Cc: pjt@google.com Link: https://lkml.kernel.org/r/20180126121139.31959-5-bp@alien8.de Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 1c4b39de49bb..674ad4666f85 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -90,7 +90,7 @@ static const char *spectre_v2_strings[] = { }; #undef pr_fmt -#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt +#define pr_fmt(fmt) "Spectre V2 : " fmt static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; static bool spectre_v2_bad_module; From 98911226d51ea50dd8cb33cfb5a90be39872401b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 27 Jan 2018 15:45:14 +0100 Subject: [PATCH 351/705] x86/cpu/bugs: Make retpoline module warning conditional MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (cherry picked from commit e383095c7fe8d218e00ec0f83e4b95ed4e627b02) If sysfs is disabled and RETPOLINE not defined: arch/x86/kernel/cpu/bugs.c:97:13: warning: ‘spectre_v2_bad_module’ defined but not used [-Wunused-variable] static bool spectre_v2_bad_module; Hide it. Fixes: caf7501a1b4e ("module/retpoline: Warn about missing retpoline in module") Reported-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Andi Kleen Cc: David Woodhouse Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 674ad4666f85..efe55c58a921 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -93,9 +93,10 @@ static const char *spectre_v2_strings[] = { #define pr_fmt(fmt) "Spectre V2 : " fmt static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; -static bool spectre_v2_bad_module; #ifdef RETPOLINE +static bool spectre_v2_bad_module; + bool retpoline_module_ok(bool has_retpoline) { if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline) @@ -105,6 +106,13 @@ bool retpoline_module_ok(bool has_retpoline) spectre_v2_bad_module = true; return false; } + +static inline const char *spectre_v2_module_string(void) +{ + return spectre_v2_bad_module ? " - vulnerable module loaded" : ""; +} +#else +static inline const char *spectre_v2_module_string(void) { return ""; } #endif static void __init spec2_print_if_insecure(const char *reason) @@ -299,7 +307,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, return sprintf(buf, "Not affected\n"); return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], - boot_cpu_has(X86_FEATURE_IBPB) ? ", IPBP" : "", - spectre_v2_bad_module ? " - vulnerable module loaded" : ""); + boot_cpu_has(X86_FEATURE_IBPB) ? ", IBPB" : "", + spectre_v2_module_string()); } #endif From 77b3b3ee238664df176ed44dd4658e578186c6d3 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 27 Jan 2018 16:24:32 +0000 Subject: [PATCH 352/705] x86/cpufeatures: Clean up Spectre v2 related CPUID flags (cherry picked from commit 2961298efe1ea1b6fc0d7ee8b76018fa6c0bcef2) We want to expose the hardware features simply in /proc/cpuinfo as "ibrs", "ibpb" and "stibp". Since AMD has separate CPUID bits for those, use them as the user-visible bits. When the Intel SPEC_CTRL bit is set which indicates both IBRS and IBPB capability, set those (AMD) bits accordingly. Likewise if the Intel STIBP bit is set, set the AMD STIBP that's used for the generic hardware capability. Hide the rest from /proc/cpuinfo by putting "" in the comments. Including RETPOLINE and RETPOLINE_AMD which shouldn't be visible there. There are patches to make the sysfs vulnerabilities information non-readable by non-root, and the same should apply to all information about which mitigations are actually in use. Those *shouldn't* appear in /proc/cpuinfo. The feature bit for whether IBPB is actually used, which is needed for ALTERNATIVEs, is renamed to X86_FEATURE_USE_IBPB. Originally-by: Borislav Petkov Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: ak@linux.intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1517070274-12128-2-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 18 ++++++++-------- arch/x86/include/asm/nospec-branch.h | 2 +- arch/x86/kernel/cpu/bugs.c | 7 +++---- arch/x86/kernel/cpu/intel.c | 31 +++++++++++++++++++--------- 4 files changed, 34 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 390154576dad..8eb23f5cf7f4 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -194,15 +194,15 @@ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ -#define X86_FEATURE_IBPB ( 7*32+21) /* Indirect Branch Prediction Barrier enabled*/ +#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -260,9 +260,9 @@ /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ -#define X86_FEATURE_AMD_PRED_CMD (13*32+12) /* Prediction Command MSR (AMD) */ -#define X86_FEATURE_AMD_SPEC_CTRL (13*32+14) /* Speculation Control MSR only (AMD) */ -#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors (AMD) */ +#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ +#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ +#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ @@ -301,8 +301,8 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ -#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */ -#define X86_FEATURE_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ +#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ /* diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 865192a2cc31..19ecb5446b30 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -225,7 +225,7 @@ static inline void indirect_branch_prediction_barrier(void) "movl %[val], %%eax\n\t" "movl $0, %%edx\n\t" "wrmsr", - X86_FEATURE_IBPB) + X86_FEATURE_USE_IBPB) : : [msr] "i" (MSR_IA32_PRED_CMD), [val] "i" (PRED_CMD_IBPB) : "eax", "ecx", "edx", "memory"); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index efe55c58a921..3a06718257bf 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -272,9 +272,8 @@ retpoline_auto: } /* Initialize Indirect Branch Prediction Barrier if supported */ - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) || - boot_cpu_has(X86_FEATURE_AMD_PRED_CMD)) { - setup_force_cpu_cap(X86_FEATURE_IBPB); + if (boot_cpu_has(X86_FEATURE_IBPB)) { + setup_force_cpu_cap(X86_FEATURE_USE_IBPB); pr_info("Enabling Indirect Branch Prediction Barrier\n"); } } @@ -307,7 +306,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, return sprintf(buf, "Not affected\n"); return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], - boot_cpu_has(X86_FEATURE_IBPB) ? ", IBPB" : "", + boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", spectre_v2_module_string()); } #endif diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 4d23d787a217..2e257f87c527 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -140,17 +140,28 @@ static void early_init_intel(struct cpuinfo_x86 *c) rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode); } - if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || - cpu_has(c, X86_FEATURE_STIBP) || - cpu_has(c, X86_FEATURE_AMD_SPEC_CTRL) || - cpu_has(c, X86_FEATURE_AMD_PRED_CMD) || - cpu_has(c, X86_FEATURE_AMD_STIBP)) && bad_spectre_microcode(c)) { - pr_warn("Intel Spectre v2 broken microcode detected; disabling SPEC_CTRL\n"); - clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL); + /* + * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, + * and they also have a different bit for STIBP support. Also, + * a hypervisor might have set the individual AMD bits even on + * Intel CPUs, for finer-grained selection of what's available. + */ + if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { + set_cpu_cap(c, X86_FEATURE_IBRS); + set_cpu_cap(c, X86_FEATURE_IBPB); + } + if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) + set_cpu_cap(c, X86_FEATURE_STIBP); + + /* Now if any of them are set, check the blacklist and clear the lot */ + if ((cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || + cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) { + pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n"); + clear_cpu_cap(c, X86_FEATURE_IBRS); + clear_cpu_cap(c, X86_FEATURE_IBPB); clear_cpu_cap(c, X86_FEATURE_STIBP); - clear_cpu_cap(c, X86_FEATURE_AMD_SPEC_CTRL); - clear_cpu_cap(c, X86_FEATURE_AMD_PRED_CMD); - clear_cpu_cap(c, X86_FEATURE_AMD_STIBP); + clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL); + clear_cpu_cap(c, X86_FEATURE_INTEL_STIBP); } /* From 77d1424d2fb84ba2d3ce60a0ec4c3913e2e59abd Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 27 Jan 2018 16:24:33 +0000 Subject: [PATCH 353/705] x86/retpoline: Simplify vmexit_fill_RSB() (cherry picked from commit 1dde7415e99933bb7293d6b2843752cbdb43ec11) Simplify it to call an asm-function instead of pasting 41 insn bytes at every call site. Also, add alignment to the macro as suggested here: https://support.google.com/faqs/answer/7625886 [dwmw2: Clean up comments, let it clobber %ebx and just tell the compiler] Signed-off-by: Borislav Petkov Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: ak@linux.intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1517070274-12128-3-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_32.S | 3 +- arch/x86/entry/entry_64.S | 3 +- arch/x86/include/asm/asm-prototypes.h | 3 ++ arch/x86/include/asm/nospec-branch.h | 70 +++------------------------ arch/x86/lib/Makefile | 1 + arch/x86/lib/retpoline.S | 56 +++++++++++++++++++++ 6 files changed, 71 insertions(+), 65 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index a76dc738ec61..f5434b4670c1 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -237,7 +237,8 @@ ENTRY(__switch_to_asm) * exist, overwrite the RSB with entries which capture * speculative execution to prevent attack. */ - FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW + /* Clobbers %ebx */ + FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW #endif /* restore callee-saved registers */ diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index e729e1528584..d8bc3e001df2 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -435,7 +435,8 @@ ENTRY(__switch_to_asm) * exist, overwrite the RSB with entries which capture * speculative execution to prevent attack. */ - FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW + /* Clobbers %rbx */ + FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW #endif /* restore callee-saved registers */ diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 5a25ada75aeb..166654218329 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -37,4 +37,7 @@ INDIRECT_THUNK(dx) INDIRECT_THUNK(si) INDIRECT_THUNK(di) INDIRECT_THUNK(bp) +asmlinkage void __fill_rsb(void); +asmlinkage void __clear_rsb(void); + #endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 19ecb5446b30..df4ececa6ebf 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -7,50 +7,6 @@ #include #include -/* - * Fill the CPU return stack buffer. - * - * Each entry in the RSB, if used for a speculative 'ret', contains an - * infinite 'pause; lfence; jmp' loop to capture speculative execution. - * - * This is required in various cases for retpoline and IBRS-based - * mitigations for the Spectre variant 2 vulnerability. Sometimes to - * eliminate potentially bogus entries from the RSB, and sometimes - * purely to ensure that it doesn't get empty, which on some CPUs would - * allow predictions from other (unwanted!) sources to be used. - * - * We define a CPP macro such that it can be used from both .S files and - * inline assembly. It's possible to do a .macro and then include that - * from C via asm(".include ") but let's not go there. - */ - -#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ -#define RSB_FILL_LOOPS 16 /* To avoid underflow */ - -/* - * Google experimented with loop-unrolling and this turned out to be - * the optimal version — two calls, each with their own speculation - * trap should their return address end up getting used, in a loop. - */ -#define __FILL_RETURN_BUFFER(reg, nr, sp) \ - mov $(nr/2), reg; \ -771: \ - call 772f; \ -773: /* speculation trap */ \ - pause; \ - lfence; \ - jmp 773b; \ -772: \ - call 774f; \ -775: /* speculation trap */ \ - pause; \ - lfence; \ - jmp 775b; \ -774: \ - dec reg; \ - jnz 771b; \ - add $(BITS_PER_LONG/8) * nr, sp; - #ifdef __ASSEMBLY__ /* @@ -121,17 +77,10 @@ #endif .endm - /* - * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP - * monstrosity above, manually. - */ -.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req +/* This clobbers the BX register */ +.macro FILL_RETURN_BUFFER nr:req ftr:req #ifdef CONFIG_RETPOLINE - ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE "jmp .Lskip_rsb_\@", \ - __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ - \ftr -.Lskip_rsb_\@: + ALTERNATIVE "", "call __clear_rsb", \ftr #endif .endm @@ -206,15 +155,10 @@ extern char __indirect_thunk_end[]; static inline void vmexit_fill_RSB(void) { #ifdef CONFIG_RETPOLINE - unsigned long loops; - - asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE("jmp 910f", - __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), - X86_FEATURE_RETPOLINE) - "910:" - : "=r" (loops), ASM_CALL_CONSTRAINT - : : "memory" ); + alternative_input("", + "call __fill_rsb", + X86_FEATURE_RETPOLINE, + ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory")); #endif } diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 6bf1898ddf49..4ad7c4dd311c 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -26,6 +26,7 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o lib-$(CONFIG_RETPOLINE) += retpoline.o +OBJECT_FILES_NON_STANDARD_retpoline.o :=y obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index c909961e678a..480edc3a5e03 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -7,6 +7,7 @@ #include #include #include +#include .macro THUNK reg .section .text.__x86.indirect_thunk @@ -46,3 +47,58 @@ GENERATE_THUNK(r13) GENERATE_THUNK(r14) GENERATE_THUNK(r15) #endif + +/* + * Fill the CPU return stack buffer. + * + * Each entry in the RSB, if used for a speculative 'ret', contains an + * infinite 'pause; lfence; jmp' loop to capture speculative execution. + * + * This is required in various cases for retpoline and IBRS-based + * mitigations for the Spectre variant 2 vulnerability. Sometimes to + * eliminate potentially bogus entries from the RSB, and sometimes + * purely to ensure that it doesn't get empty, which on some CPUs would + * allow predictions from other (unwanted!) sources to be used. + * + * Google experimented with loop-unrolling and this turned out to be + * the optimal version - two calls, each with their own speculation + * trap should their return address end up getting used, in a loop. + */ +.macro STUFF_RSB nr:req sp:req + mov $(\nr / 2), %_ASM_BX + .align 16 +771: + call 772f +773: /* speculation trap */ + pause + lfence + jmp 773b + .align 16 +772: + call 774f +775: /* speculation trap */ + pause + lfence + jmp 775b + .align 16 +774: + dec %_ASM_BX + jnz 771b + add $((BITS_PER_LONG/8) * \nr), \sp +.endm + +#define RSB_FILL_LOOPS 16 /* To avoid underflow */ + +ENTRY(__fill_rsb) + STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP + ret +END(__fill_rsb) +EXPORT_SYMBOL_GPL(__fill_rsb) + +#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ + +ENTRY(__clear_rsb) + STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP + ret +END(__clear_rsb) +EXPORT_SYMBOL_GPL(__clear_rsb) From 9eedeb72c4127726b2f3d62b4c7a163f67d721ab Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Tue, 30 Jan 2018 14:13:50 +0800 Subject: [PATCH 354/705] x86/spectre: Check CONFIG_RETPOLINE in command line parser (cherry picked from commit 9471eee9186a46893726e22ebb54cade3f9bc043) The spectre_v2 option 'auto' does not check whether CONFIG_RETPOLINE is enabled. As a consequence it fails to emit the appropriate warning and sets feature flags which have no effect at all. Add the missing IS_ENABLED() check. Fixes: da285121560e ("x86/spectre: Add boot time option to select Spectre v2 mitigation") Signed-off-by: Dou Liyang Signed-off-by: Thomas Gleixner Cc: ak@linux.intel.com Cc: peterz@infradead.org Cc: Tomohiro Cc: dave.hansen@intel.com Cc: bp@alien8.de Cc: arjan@linux.intel.com Cc: dwmw@amazon.co.uk Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/f5892721-7528-3647-08fb-f8d10e65ad87@cn.fujitsu.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 3a06718257bf..51624c6f2a3b 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -212,10 +212,10 @@ static void __init spectre_v2_select_mitigation(void) return; case SPECTRE_V2_CMD_FORCE: - /* FALLTRHU */ case SPECTRE_V2_CMD_AUTO: - goto retpoline_auto; - + if (IS_ENABLED(CONFIG_RETPOLINE)) + goto retpoline_auto; + break; case SPECTRE_V2_CMD_RETPOLINE_AMD: if (IS_ENABLED(CONFIG_RETPOLINE)) goto retpoline_amd; From d7f8d17406d62f0c8b20a9100d34d0e203557fe1 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 28 Jan 2018 10:38:49 -0800 Subject: [PATCH 355/705] x86/entry/64: Remove the SYSCALL64 fast path (cherry picked from commit 21d375b6b34ff511a507de27bf316b3dde6938d9) The SYCALLL64 fast path was a nice, if small, optimization back in the good old days when syscalls were actually reasonably fast. Now there is PTI to slow everything down, and indirect branches are verboten, making everything messier. The retpoline code in the fast path is particularly nasty. Just get rid of the fast path. The slow path is barely slower. [ tglx: Split out the 'push all extra regs' part ] Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Cc: Borislav Petkov Cc: Linus Torvalds Cc: Kernel Hardening Link: https://lkml.kernel.org/r/462dff8d4d64dfbfc851fbf3130641809d980ecd.1517164461.git.luto@kernel.org Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 123 +----------------------------------- arch/x86/entry/syscall_64.c | 7 +- 2 files changed, 3 insertions(+), 127 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index d8bc3e001df2..2c154c980543 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -179,94 +179,11 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) pushq %r11 /* pt_regs->r11 */ sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ - /* - * If we need to do entry work or if we guess we'll need to do - * exit work, go straight to the slow path. - */ - movq PER_CPU_VAR(current_task), %r11 - testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) - jnz entry_SYSCALL64_slow_path - -entry_SYSCALL_64_fastpath: - /* - * Easy case: enable interrupts and issue the syscall. If the syscall - * needs pt_regs, we'll call a stub that disables interrupts again - * and jumps to the slow path. - */ - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) -#if __SYSCALL_MASK == ~0 - cmpq $__NR_syscall_max, %rax -#else - andl $__SYSCALL_MASK, %eax - cmpl $__NR_syscall_max, %eax -#endif - ja 1f /* return -ENOSYS (already in pt_regs->ax) */ - movq %r10, %rcx - - /* - * This call instruction is handled specially in stub_ptregs_64. - * It might end up jumping to the slow path. If it jumps, RAX - * and all argument registers are clobbered. - */ -#ifdef CONFIG_RETPOLINE - movq sys_call_table(, %rax, 8), %rax - call __x86_indirect_thunk_rax -#else - call *sys_call_table(, %rax, 8) -#endif -.Lentry_SYSCALL_64_after_fastpath_call: - - movq %rax, RAX(%rsp) -1: - - /* - * If we get here, then we know that pt_regs is clean for SYSRET64. - * If we see that no exit work is required (which we are required - * to check with IRQs off), then we can go straight to SYSRET64. - */ - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - movq PER_CPU_VAR(current_task), %r11 - testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) - jnz 1f - - LOCKDEP_SYS_EXIT - TRACE_IRQS_ON /* user mode is traced as IRQs on */ - movq RIP(%rsp), %rcx - movq EFLAGS(%rsp), %r11 - RESTORE_C_REGS_EXCEPT_RCX_R11 - /* - * This opens a window where we have a user CR3, but are - * running in the kernel. This makes using the CS - * register useless for telling whether or not we need to - * switch CR3 in NMIs. Normal interrupts are OK because - * they are off here. - */ - SWITCH_USER_CR3 - movq RSP(%rsp), %rsp - USERGS_SYSRET64 - -1: - /* - * The fast path looked good when we started, but something changed - * along the way and we need to switch to the slow path. Calling - * raise(3) will trigger this, for example. IRQs are off. - */ - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) - SAVE_EXTRA_REGS - movq %rsp, %rdi - call syscall_return_slowpath /* returns with IRQs disabled */ - jmp return_from_SYSCALL_64 - -entry_SYSCALL64_slow_path: /* IRQs are off. */ SAVE_EXTRA_REGS movq %rsp, %rdi call do_syscall_64 /* returns with IRQs disabled */ -return_from_SYSCALL_64: RESTORE_EXTRA_REGS TRACE_IRQS_IRETQ /* we're about to change IF */ @@ -339,6 +256,7 @@ return_from_SYSCALL_64: syscall_return_via_sysret: /* rcx and r11 are already restored (see code above) */ RESTORE_C_REGS_EXCEPT_RCX_R11 + /* * This opens a window where we have a user CR3, but are * running in the kernel. This makes using the CS @@ -363,45 +281,6 @@ opportunistic_sysret_failed: jmp restore_c_regs_and_iret END(entry_SYSCALL_64) -ENTRY(stub_ptregs_64) - /* - * Syscalls marked as needing ptregs land here. - * If we are on the fast path, we need to save the extra regs, - * which we achieve by trying again on the slow path. If we are on - * the slow path, the extra regs are already saved. - * - * RAX stores a pointer to the C function implementing the syscall. - * IRQs are on. - */ - cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp) - jne 1f - - /* - * Called from fast path -- disable IRQs again, pop return address - * and jump to slow path - */ - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - popq %rax - jmp entry_SYSCALL64_slow_path - -1: - JMP_NOSPEC %rax /* Called from C */ -END(stub_ptregs_64) - -.macro ptregs_stub func -ENTRY(ptregs_\func) - leaq \func(%rip), %rax - jmp stub_ptregs_64 -END(ptregs_\func) -.endm - -/* Instantiate ptregs_stub for each ptregs-using syscall */ -#define __SYSCALL_64_QUAL_(sym) -#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym -#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym) -#include - /* * %rdi: prev task * %rsi: next task diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index 9dbc5abb6162..6705edda4ac3 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -6,14 +6,11 @@ #include #include -#define __SYSCALL_64_QUAL_(sym) sym -#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym - -#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); +#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); #include #undef __SYSCALL_64 -#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym), +#define __SYSCALL_64(nr, sym, qual) [nr] = sym, extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); From 572e509178112895f6162ead5d4bf39d2b981729 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 28 Jan 2018 10:38:49 -0800 Subject: [PATCH 356/705] x86/entry/64: Push extra regs right away (cherry picked from commit d1f7732009e0549eedf8ea1db948dc37be77fd46) With the fast path removed there is no point in splitting the push of the normal and the extra register set. Just push the extra regs right away. [ tglx: Split out from 'x86/entry/64: Remove the SYSCALL64 fast path' ] Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Cc: Borislav Petkov Cc: Linus Torvalds Cc: Kernel Hardening Link: https://lkml.kernel.org/r/462dff8d4d64dfbfc851fbf3130641809d980ecd.1517164461.git.luto@kernel.org Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 2c154c980543..db5009ce065a 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -177,10 +177,14 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) pushq %r9 /* pt_regs->r9 */ pushq %r10 /* pt_regs->r10 */ pushq %r11 /* pt_regs->r11 */ - sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ + pushq %rbx /* pt_regs->rbx */ + pushq %rbp /* pt_regs->rbp */ + pushq %r12 /* pt_regs->r12 */ + pushq %r13 /* pt_regs->r13 */ + pushq %r14 /* pt_regs->r14 */ + pushq %r15 /* pt_regs->r15 */ /* IRQs are off. */ - SAVE_EXTRA_REGS movq %rsp, %rdi call do_syscall_64 /* returns with IRQs disabled */ From f03d00ba0b478963fc96c975d3b27fc1b9bc3a43 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 28 Jan 2018 10:38:50 -0800 Subject: [PATCH 357/705] x86/asm: Move 'status' from thread_struct to thread_info (cherry picked from commit 37a8f7c38339b22b69876d6f5a0ab851565284e3) The TS_COMPAT bit is very hot and is accessed from code paths that mostly also touch thread_info::flags. Move it into struct thread_info to improve cache locality. The only reason it was in thread_struct is that there was a brief period during which arch-specific fields were not allowed in struct thread_info. Linus suggested further changing: ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); to: if (unlikely(ti->status & (TS_COMPAT|TS_I386_REGS_POKED))) ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); on the theory that frequently dirtying the cacheline even in pure 64-bit code that never needs to modify status hurts performance. That could be a reasonable followup patch, but I suspect it matters less on top of this patch. Suggested-by: Linus Torvalds Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Linus Torvalds Cc: Borislav Petkov Cc: Kernel Hardening Link: https://lkml.kernel.org/r/03148bcc1b217100e6e8ecf6a5468c45cf4304b6.1517164461.git.luto@kernel.org Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/common.c | 4 ++-- arch/x86/include/asm/processor.h | 2 -- arch/x86/include/asm/syscall.h | 6 +++--- arch/x86/include/asm/thread_info.h | 3 ++- arch/x86/kernel/process_64.c | 4 ++-- arch/x86/kernel/ptrace.c | 2 +- arch/x86/kernel/signal.c | 2 +- 7 files changed, 11 insertions(+), 12 deletions(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index bdd9cc59d20f..bd1d1027384f 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -201,7 +201,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) * special case only applies after poking regs and before the * very next return to user mode. */ - current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED); + ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); #endif user_enter_irqoff(); @@ -299,7 +299,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) unsigned int nr = (unsigned int)regs->orig_ax; #ifdef CONFIG_IA32_EMULATION - current->thread.status |= TS_COMPAT; + ti->status |= TS_COMPAT; #endif if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) { diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 353f038ec645..cb866ae1bc5d 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -391,8 +391,6 @@ struct thread_struct { unsigned short gsindex; #endif - u32 status; /* thread synchronous flags */ - #ifdef CONFIG_X86_64 unsigned long fsbase; unsigned long gsbase; diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index e3c95e8e61c5..03eedc21246d 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task, * TS_COMPAT is set for 32-bit syscall entries and then * remains set until we return to user mode. */ - if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) + if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED)) /* * Sign-extend the value so (int)-EFOO becomes (long)-EFOO * and will match correctly in comparisons. @@ -116,7 +116,7 @@ static inline void syscall_get_arguments(struct task_struct *task, unsigned long *args) { # ifdef CONFIG_IA32_EMULATION - if (task->thread.status & TS_COMPAT) + if (task->thread_info.status & TS_COMPAT) switch (i) { case 0: if (!n--) break; @@ -177,7 +177,7 @@ static inline void syscall_set_arguments(struct task_struct *task, const unsigned long *args) { # ifdef CONFIG_IA32_EMULATION - if (task->thread.status & TS_COMPAT) + if (task->thread_info.status & TS_COMPAT) switch (i) { case 0: if (!n--) break; diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index bdf9c4c91572..89978b9c667a 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -54,6 +54,7 @@ struct task_struct; struct thread_info { unsigned long flags; /* low level flags */ + u32 status; /* thread synchronous flags */ }; #define INIT_THREAD_INFO(tsk) \ @@ -213,7 +214,7 @@ static inline int arch_within_stack_frames(const void * const stack, #define in_ia32_syscall() true #else #define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \ - current->thread.status & TS_COMPAT) + current_thread_info()->status & TS_COMPAT) #endif /* diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 0887d2ae3797..dffe81d3c261 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -538,7 +538,7 @@ void set_personality_ia32(bool x32) current->personality &= ~READ_IMPLIES_EXEC; /* in_compat_syscall() uses the presence of the x32 syscall bit flag to determine compat status */ - current->thread.status &= ~TS_COMPAT; + current_thread_info()->status &= ~TS_COMPAT; } else { set_thread_flag(TIF_IA32); clear_thread_flag(TIF_X32); @@ -546,7 +546,7 @@ void set_personality_ia32(bool x32) current->mm->context.ia32_compat = TIF_IA32; current->personality |= force_personality32; /* Prepare the first "return" to user space */ - current->thread.status |= TS_COMPAT; + current_thread_info()->status |= TS_COMPAT; } } EXPORT_SYMBOL_GPL(set_personality_ia32); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 0e63c0267f99..e497d374412a 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -934,7 +934,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value) */ regs->orig_ax = value; if (syscall_get_nr(child, regs) >= 0) - child->thread.status |= TS_I386_REGS_POKED; + child->thread_info.status |= TS_I386_REGS_POKED; break; case offsetof(struct user32, regs.eflags): diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 763af1d0de64..b1a5d252d482 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -785,7 +785,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) * than the tracee. */ #ifdef CONFIG_IA32_EMULATION - if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) + if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED)) return __NR_ia32_restart_syscall; #endif #ifdef CONFIG_X86_X32_ABI From 899ab2cf91389aa3e27c341a9858dc22985dae2f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 29 Jan 2018 17:02:16 -0800 Subject: [PATCH 358/705] Documentation: Document array_index_nospec (cherry picked from commit f84a56f73dddaeac1dba8045b007f742f61cd2da) Document the rationale and usage of the new array_index_nospec() helper. Signed-off-by: Mark Rutland Signed-off-by: Will Deacon Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Reviewed-by: Kees Cook Cc: linux-arch@vger.kernel.org Cc: Jonathan Corbet Cc: Peter Zijlstra Cc: gregkh@linuxfoundation.org Cc: kernel-hardening@lists.openwall.com Cc: torvalds@linux-foundation.org Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727413645.33451.15878817161436755393.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- Documentation/speculation.txt | 90 +++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 Documentation/speculation.txt diff --git a/Documentation/speculation.txt b/Documentation/speculation.txt new file mode 100644 index 000000000000..e9e6cbae2841 --- /dev/null +++ b/Documentation/speculation.txt @@ -0,0 +1,90 @@ +This document explains potential effects of speculation, and how undesirable +effects can be mitigated portably using common APIs. + +=========== +Speculation +=========== + +To improve performance and minimize average latencies, many contemporary CPUs +employ speculative execution techniques such as branch prediction, performing +work which may be discarded at a later stage. + +Typically speculative execution cannot be observed from architectural state, +such as the contents of registers. However, in some cases it is possible to +observe its impact on microarchitectural state, such as the presence or +absence of data in caches. Such state may form side-channels which can be +observed to extract secret information. + +For example, in the presence of branch prediction, it is possible for bounds +checks to be ignored by code which is speculatively executed. Consider the +following code: + + int load_array(int *array, unsigned int index) + { + if (index >= MAX_ARRAY_ELEMS) + return 0; + else + return array[index]; + } + +Which, on arm64, may be compiled to an assembly sequence such as: + + CMP , #MAX_ARRAY_ELEMS + B.LT less + MOV , #0 + RET + less: + LDR , [, ] + RET + +It is possible that a CPU mis-predicts the conditional branch, and +speculatively loads array[index], even if index >= MAX_ARRAY_ELEMS. This +value will subsequently be discarded, but the speculated load may affect +microarchitectural state which can be subsequently measured. + +More complex sequences involving multiple dependent memory accesses may +result in sensitive information being leaked. Consider the following +code, building on the prior example: + + int load_dependent_arrays(int *arr1, int *arr2, int index) + { + int val1, val2, + + val1 = load_array(arr1, index); + val2 = load_array(arr2, val1); + + return val2; + } + +Under speculation, the first call to load_array() may return the value +of an out-of-bounds address, while the second call will influence +microarchitectural state dependent on this value. This may provide an +arbitrary read primitive. + +==================================== +Mitigating speculation side-channels +==================================== + +The kernel provides a generic API to ensure that bounds checks are +respected even under speculation. Architectures which are affected by +speculation-based side-channels are expected to implement these +primitives. + +The array_index_nospec() helper in can be used to +prevent information from being leaked via side-channels. + +A call to array_index_nospec(index, size) returns a sanitized index +value that is bounded to [0, size) even under cpu speculation +conditions. + +This can be used to protect the earlier load_array() example: + + int load_array(int *array, unsigned int index) + { + if (index >= MAX_ARRAY_ELEMS) + return 0; + else { + index = array_index_nospec(index, MAX_ARRAY_ELEMS); + return array[index]; + } + } From 579ef9ea20d67a80a0bc5d093259ff6e97087d59 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:02:22 -0800 Subject: [PATCH 359/705] array_index_nospec: Sanitize speculative array de-references (cherry picked from commit f3804203306e098dae9ca51540fcd5eb700d7f40) array_index_nospec() is proposed as a generic mechanism to mitigate against Spectre-variant-1 attacks, i.e. an attack that bypasses boundary checks via speculative execution. The array_index_nospec() implementation is expected to be safe for current generation CPUs across multiple architectures (ARM, x86). Based on an original implementation by Linus Torvalds, tweaked to remove speculative flows by Alexei Starovoitov, and tweaked again by Linus to introduce an x86 assembly implementation for the mask generation. Co-developed-by: Linus Torvalds Co-developed-by: Alexei Starovoitov Suggested-by: Cyril Novikov Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: kernel-hardening@lists.openwall.com Cc: Peter Zijlstra Cc: Catalin Marinas Cc: Will Deacon Cc: Russell King Cc: gregkh@linuxfoundation.org Cc: torvalds@linux-foundation.org Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727414229.33451.18411580953862676575.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- include/linux/nospec.h | 72 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 include/linux/nospec.h diff --git a/include/linux/nospec.h b/include/linux/nospec.h new file mode 100644 index 000000000000..b99bced39ac2 --- /dev/null +++ b/include/linux/nospec.h @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2018 Linus Torvalds. All rights reserved. +// Copyright(c) 2018 Alexei Starovoitov. All rights reserved. +// Copyright(c) 2018 Intel Corporation. All rights reserved. + +#ifndef _LINUX_NOSPEC_H +#define _LINUX_NOSPEC_H + +/** + * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise + * @index: array element index + * @size: number of elements in array + * + * When @index is out of bounds (@index >= @size), the sign bit will be + * set. Extend the sign bit to all bits and invert, giving a result of + * zero for an out of bounds index, or ~0 if within bounds [0, @size). + */ +#ifndef array_index_mask_nospec +static inline unsigned long array_index_mask_nospec(unsigned long index, + unsigned long size) +{ + /* + * Warn developers about inappropriate array_index_nospec() usage. + * + * Even if the CPU speculates past the WARN_ONCE branch, the + * sign bit of @index is taken into account when generating the + * mask. + * + * This warning is compiled out when the compiler can infer that + * @index and @size are less than LONG_MAX. + */ + if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, + "array_index_nospec() limited to range of [0, LONG_MAX]\n")) + return 0; + + /* + * Always calculate and emit the mask even if the compiler + * thinks the mask is not needed. The compiler does not take + * into account the value of @index under speculation. + */ + OPTIMIZER_HIDE_VAR(index); + return ~(long)(index | (size - 1UL - index)) >> (BITS_PER_LONG - 1); +} +#endif + +/* + * array_index_nospec - sanitize an array index after a bounds check + * + * For a code sequence like: + * + * if (index < size) { + * index = array_index_nospec(index, size); + * val = array[index]; + * } + * + * ...if the CPU speculates past the bounds check then + * array_index_nospec() will clamp the index within the range of [0, + * size). + */ +#define array_index_nospec(index, size) \ +({ \ + typeof(index) _i = (index); \ + typeof(size) _s = (size); \ + unsigned long _mask = array_index_mask_nospec(_i, _s); \ + \ + BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \ + BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \ + \ + _i &= _mask; \ + _i; \ +}) +#endif /* _LINUX_NOSPEC_H */ From 8c33e2d23a6821cb7d608011c3d2f54accf4212c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:02:28 -0800 Subject: [PATCH 360/705] x86: Implement array_index_mask_nospec (cherry picked from commit babdde2698d482b6c0de1eab4f697cf5856c5859) array_index_nospec() uses a mask to sanitize user controllable array indexes, i.e. generate a 0 mask if 'index' >= 'size', and a ~0 mask otherwise. While the default array_index_mask_nospec() handles the carry-bit from the (index - size) result in software. The x86 array_index_mask_nospec() does the same, but the carry-bit is handled in the processor CF flag without conditional instructions in the control flow. Suggested-by: Linus Torvalds Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727414808.33451.1873237130672785331.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/barrier.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index bfb28caf97b1..ca22173e42dc 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -23,6 +23,30 @@ #define wmb() asm volatile("sfence" ::: "memory") #endif +/** + * array_index_mask_nospec() - generate a mask that is ~0UL when the + * bounds check succeeds and 0 otherwise + * @index: array element index + * @size: number of elements in array + * + * Returns: + * 0 - (index < size) + */ +static inline unsigned long array_index_mask_nospec(unsigned long index, + unsigned long size) +{ + unsigned long mask; + + asm ("cmp %1,%2; sbb %0,%0;" + :"=r" (mask) + :"r"(size),"r" (index) + :"cc"); + return mask; +} + +/* Override the default implementation from linux/nospec.h. */ +#define array_index_mask_nospec array_index_mask_nospec + #ifdef CONFIG_X86_PPRO_FENCE #define dma_rmb() rmb() #else From 1f03d140e2f509ae27f21d229930e707436a07ac Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:02:33 -0800 Subject: [PATCH 361/705] x86: Introduce barrier_nospec (cherry picked from commit b3d7ad85b80bbc404635dca80f5b129f6242bc7a) Rename the open coded form of this instruction sequence from rdtsc_ordered() into a generic barrier primitive, barrier_nospec(). One of the mitigations for Spectre variant1 vulnerabilities is to fence speculative execution after successfully validating a bounds check. I.e. force the result of a bounds check to resolve in the instruction pipeline to ensure speculative execution honors that result before potentially operating on out-of-bounds data. No functional changes. Suggested-by: Linus Torvalds Suggested-by: Andi Kleen Suggested-by: Ingo Molnar Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Tom Lendacky Cc: Kees Cook Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: Al Viro Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727415361.33451.9049453007262764675.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/barrier.h | 4 ++++ arch/x86/include/asm/msr.h | 3 +-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index ca22173e42dc..857590390397 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -47,6 +47,10 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, /* Override the default implementation from linux/nospec.h. */ #define array_index_mask_nospec array_index_mask_nospec +/* Prevent speculative execution past this barrier. */ +#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \ + "lfence", X86_FEATURE_LFENCE_RDTSC) + #ifdef CONFIG_X86_PPRO_FENCE #define dma_rmb() rmb() #else diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index b5fee97813cd..ed35b915b5c9 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -188,8 +188,7 @@ static __always_inline unsigned long long rdtsc_ordered(void) * that some other imaginary CPU is updating continuously with a * time stamp. */ - alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, - "lfence", X86_FEATURE_LFENCE_RDTSC); + barrier_nospec(); return rdtsc(); } From e06d7bfb223e6babd7a7f8586e9c25b87272b841 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:02:39 -0800 Subject: [PATCH 362/705] x86: Introduce __uaccess_begin_nospec() and uaccess_try_nospec (cherry picked from commit b3bbfb3fb5d25776b8e3f361d2eedaabb0b496cd) For __get_user() paths, do not allow the kernel to speculate on the value of a user controlled pointer. In addition to the 'stac' instruction for Supervisor Mode Access Protection (SMAP), a barrier_nospec() causes the access_ok() result to resolve in the pipeline before the CPU might take any speculative action on the pointer value. Given the cost of 'stac' the speculation barrier is placed after 'stac' to hopefully overlap the cost of disabling SMAP with the cost of flushing the instruction pipeline. Since __get_user is a major kernel interface that deals with user controlled pointers, the __uaccess_begin_nospec() mechanism will prevent speculative execution past an access_ok() permission check. While speculative execution past access_ok() is not enough to lead to a kernel memory leak, it is a necessary precondition. To be clear, __uaccess_begin_nospec() is addressing a class of potential problems near __get_user() usages. Note, that while the barrier_nospec() in __uaccess_begin_nospec() is used to protect __get_user(), pointer masking similar to array_index_nospec() will be used for get_user() since it incorporates a bounds check near the usage. uaccess_try_nospec provides the same mechanism for get_user_try. No functional changes. Suggested-by: Linus Torvalds Suggested-by: Andi Kleen Suggested-by: Ingo Molnar Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Tom Lendacky Cc: Kees Cook Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: Al Viro Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727415922.33451.5796614273104346583.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/uaccess.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index dead0f3921f3..43148457d255 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -123,6 +123,11 @@ extern int __get_user_bad(void); #define __uaccess_begin() stac() #define __uaccess_end() clac() +#define __uaccess_begin_nospec() \ +({ \ + stac(); \ + barrier_nospec(); \ +}) /* * This is a type: either unsigned long, if the argument fits into @@ -474,6 +479,10 @@ struct __large_struct { unsigned long buf[100]; }; __uaccess_begin(); \ barrier(); +#define uaccess_try_nospec do { \ + current->thread.uaccess_err = 0; \ + __uaccess_begin_nospec(); \ + #define uaccess_catch(err) \ __uaccess_end(); \ (err) |= (current->thread.uaccess_err ? -EFAULT : 0); \ From ae75f83e79e4b2de7981d34ddcceb54178098bea Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:02:44 -0800 Subject: [PATCH 363/705] x86/usercopy: Replace open coded stac/clac with __uaccess_{begin, end} (cherry picked from commit b5c4ae4f35325d520b230bab6eb3310613b72ac1) In preparation for converting some __uaccess_begin() instances to __uacess_begin_nospec(), make sure all 'from user' uaccess paths are using the _begin(), _end() helpers rather than open-coded stac() and clac(). No functional changes. Suggested-by: Ingo Molnar Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Tom Lendacky Cc: Kees Cook Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: Al Viro Cc: torvalds@linux-foundation.org Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727416438.33451.17309465232057176966.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/lib/usercopy_32.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 3bc7baf2a711..9b5fa0f6964e 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -570,12 +570,12 @@ do { \ unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned long n) { - stac(); + __uaccess_begin(); if (movsl_is_ok(to, from, n)) __copy_user(to, from, n); else n = __copy_user_intel(to, from, n); - clac(); + __uaccess_end(); return n; } EXPORT_SYMBOL(__copy_to_user_ll); @@ -627,7 +627,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache); unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, unsigned long n) { - stac(); + __uaccess_begin(); #ifdef CONFIG_X86_INTEL_USERCOPY if (n > 64 && static_cpu_has(X86_FEATURE_XMM2)) n = __copy_user_intel_nocache(to, from, n); @@ -636,7 +636,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr #else __copy_user(to, from, n); #endif - clac(); + __uaccess_end(); return n; } EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); From 065eae4be83d8d2bea325c11610ebaf836e1cebb Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:02:49 -0800 Subject: [PATCH 364/705] x86/uaccess: Use __uaccess_begin_nospec() and uaccess_try_nospec (cherry picked from commit 304ec1b050310548db33063e567123fae8fd0301) Quoting Linus: I do think that it would be a good idea to very expressly document the fact that it's not that the user access itself is unsafe. I do agree that things like "get_user()" want to be protected, but not because of any direct bugs or problems with get_user() and friends, but simply because get_user() is an excellent source of a pointer that is obviously controlled from a potentially attacking user space. So it's a prime candidate for then finding _subsequent_ accesses that can then be used to perturb the cache. __uaccess_begin_nospec() covers __get_user() and copy_from_iter() where the limit check is far away from the user pointer de-reference. In those cases a barrier_nospec() prevents speculation with a potential pointer to privileged memory. uaccess_try_nospec covers get_user_try. Suggested-by: Linus Torvalds Suggested-by: Andi Kleen Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Kees Cook Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: Al Viro Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727416953.33451.10508284228526170604.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/uaccess.h | 6 +++--- arch/x86/include/asm/uaccess_32.h | 12 ++++++------ arch/x86/include/asm/uaccess_64.h | 12 ++++++------ arch/x86/lib/usercopy_32.c | 4 ++-- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 43148457d255..a8d85a687cf4 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -437,7 +437,7 @@ do { \ ({ \ int __gu_err; \ __inttype(*(ptr)) __gu_val; \ - __uaccess_begin(); \ + __uaccess_begin_nospec(); \ __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ __uaccess_end(); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ @@ -547,7 +547,7 @@ struct __large_struct { unsigned long buf[100]; }; * get_user_ex(...); * } get_user_catch(err) */ -#define get_user_try uaccess_try +#define get_user_try uaccess_try_nospec #define get_user_catch(err) uaccess_catch(err) #define get_user_ex(x, ptr) do { \ @@ -582,7 +582,7 @@ extern void __cmpxchg_wrong_size(void) __typeof__(ptr) __uval = (uval); \ __typeof__(*(ptr)) __old = (old); \ __typeof__(*(ptr)) __new = (new); \ - __uaccess_begin(); \ + __uaccess_begin_nospec(); \ switch (size) { \ case 1: \ { \ diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 7d3bdd1ed697..d6d245088dd5 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -102,17 +102,17 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) switch (n) { case 1: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u8 *)to, from, 1, ret, 1); __uaccess_end(); return ret; case 2: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u16 *)to, from, 2, ret, 2); __uaccess_end(); return ret; case 4: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u32 *)to, from, 4, ret, 4); __uaccess_end(); return ret; @@ -130,17 +130,17 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to, switch (n) { case 1: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u8 *)to, from, 1, ret, 1); __uaccess_end(); return ret; case 2: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u16 *)to, from, 2, ret, 2); __uaccess_end(); return ret; case 4: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_size(*(u32 *)to, from, 4, ret, 4); __uaccess_end(); return ret; diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 673059a109fe..6e5cc08134ba 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -59,31 +59,31 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size) return copy_user_generic(dst, (__force void *)src, size); switch (size) { case 1: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u8 *)dst, (u8 __user *)src, ret, "b", "b", "=q", 1); __uaccess_end(); return ret; case 2: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u16 *)dst, (u16 __user *)src, ret, "w", "w", "=r", 2); __uaccess_end(); return ret; case 4: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u32 *)dst, (u32 __user *)src, ret, "l", "k", "=r", 4); __uaccess_end(); return ret; case 8: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 8); __uaccess_end(); return ret; case 10: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 10); if (likely(!ret)) @@ -93,7 +93,7 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size) __uaccess_end(); return ret; case 16: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 16); if (likely(!ret)) diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 9b5fa0f6964e..5c06dbffc52f 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -570,7 +570,7 @@ do { \ unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned long n) { - __uaccess_begin(); + __uaccess_begin_nospec(); if (movsl_is_ok(to, from, n)) __copy_user(to, from, n); else @@ -627,7 +627,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache); unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, unsigned long n) { - __uaccess_begin(); + __uaccess_begin_nospec(); #ifdef CONFIG_X86_INTEL_USERCOPY if (n > 64 && static_cpu_has(X86_FEATURE_XMM2)) n = __copy_user_intel_nocache(to, from, n); From 398a39311c0b67ff1d886f9861ae5251f8a7cad4 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:02:54 -0800 Subject: [PATCH 365/705] x86/get_user: Use pointer masking to limit speculation (cherry picked from commit c7f631cb07e7da06ac1d231ca178452339e32a94) Quoting Linus: I do think that it would be a good idea to very expressly document the fact that it's not that the user access itself is unsafe. I do agree that things like "get_user()" want to be protected, but not because of any direct bugs or problems with get_user() and friends, but simply because get_user() is an excellent source of a pointer that is obviously controlled from a potentially attacking user space. So it's a prime candidate for then finding _subsequent_ accesses that can then be used to perturb the cache. Unlike the __get_user() case get_user() includes the address limit check near the pointer de-reference. With that locality the speculation can be mitigated with pointer narrowing rather than a barrier, i.e. array_index_nospec(). Where the narrowing is performed by: cmp %limit, %ptr sbb %mask, %mask and %mask, %ptr With respect to speculation the value of %ptr is either less than %limit or NULL. Co-developed-by: Linus Torvalds Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Kees Cook Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: Al Viro Cc: Andy Lutomirski Cc: torvalds@linux-foundation.org Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727417469.33451.11804043010080838495.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/lib/getuser.S | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index 37b62d412148..b12b214713a6 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -39,6 +39,8 @@ ENTRY(__get_user_1) mov PER_CPU_VAR(current_task), %_ASM_DX cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ + and %_ASM_DX, %_ASM_AX ASM_STAC 1: movzbl (%_ASM_AX),%edx xor %eax,%eax @@ -53,6 +55,8 @@ ENTRY(__get_user_2) mov PER_CPU_VAR(current_task), %_ASM_DX cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ + and %_ASM_DX, %_ASM_AX ASM_STAC 2: movzwl -1(%_ASM_AX),%edx xor %eax,%eax @@ -67,6 +71,8 @@ ENTRY(__get_user_4) mov PER_CPU_VAR(current_task), %_ASM_DX cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ + and %_ASM_DX, %_ASM_AX ASM_STAC 3: movl -3(%_ASM_AX),%edx xor %eax,%eax @@ -82,6 +88,8 @@ ENTRY(__get_user_8) mov PER_CPU_VAR(current_task), %_ASM_DX cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ + and %_ASM_DX, %_ASM_AX ASM_STAC 4: movq -7(%_ASM_AX),%rdx xor %eax,%eax @@ -93,6 +101,8 @@ ENTRY(__get_user_8) mov PER_CPU_VAR(current_task), %_ASM_DX cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user_8 + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ + and %_ASM_DX, %_ASM_AX ASM_STAC 4: movl -7(%_ASM_AX),%edx 5: movl -3(%_ASM_AX),%ecx From c3193fd49f6f0a9b378379f8fd3a95f01172d477 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:02:59 -0800 Subject: [PATCH 366/705] x86/syscall: Sanitize syscall table de-references under speculation (cherry picked from commit 2fbd7af5af8665d18bcefae3e9700be07e22b681) The syscall table base is a user controlled function pointer in kernel space. Use array_index_nospec() to prevent any out of bounds speculation. While retpoline prevents speculating into a userspace directed target it does not stop the pointer de-reference, the concern is leaking memory relative to the syscall table base, by observing instruction cache behavior. Reported-by: Linus Torvalds Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: Andy Lutomirski Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727417984.33451.1216731042505722161.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/common.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index bd1d1027384f..b0cd306dc527 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -277,7 +278,8 @@ __visible void do_syscall_64(struct pt_regs *regs) * regs->orig_ax, which changes the behavior of some syscalls. */ if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) { - regs->ax = sys_call_table[nr & __SYSCALL_MASK]( + nr = array_index_nospec(nr & __SYSCALL_MASK, NR_syscalls); + regs->ax = sys_call_table[nr]( regs->di, regs->si, regs->dx, regs->r10, regs->r8, regs->r9); } @@ -313,6 +315,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) } if (likely(nr < IA32_NR_syscalls)) { + nr = array_index_nospec(nr, IA32_NR_syscalls); /* * It's possible that a 32-bit syscall implementation * takes a 64-bit parameter but nonetheless assumes that From c26ceec69576cb61157d2487812fb2776e125260 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:03:05 -0800 Subject: [PATCH 367/705] vfs, fdtable: Prevent bounds-check bypass via speculative execution (cherry picked from commit 56c30ba7b348b90484969054d561f711ba196507) 'fd' is a user controlled value that is used as a data dependency to read from the 'fdt->fd' array. In order to avoid potential leaks of kernel memory values, block speculative execution of the instruction stream that could issue reads based on an invalid 'file *' returned from __fcheck_files. Co-developed-by: Elena Reshetova Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: Al Viro Cc: torvalds@linux-foundation.org Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727418500.33451.17392199002892248656.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- include/linux/fdtable.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 6e84b2cae6ad..442b54a14cbc 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -81,8 +82,10 @@ static inline struct file *__fcheck_files(struct files_struct *files, unsigned i { struct fdtable *fdt = rcu_dereference_raw(files->fdt); - if (fd < fdt->max_fds) + if (fd < fdt->max_fds) { + fd = array_index_nospec(fd, fdt->max_fds); return rcu_dereference_raw(fdt->fd[fd]); + } return NULL; } From 0781a50a30d37230147954a3dd15bbfcebfc2398 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:03:15 -0800 Subject: [PATCH 368/705] nl80211: Sanitize array index in parse_txq_params (cherry picked from commit 259d8c1e984318497c84eef547bbb6b1d9f4eb05) Wireless drivers rely on parse_txq_params to validate that txq_params->ac is less than NL80211_NUM_ACS by the time the low-level driver's ->conf_tx() handler is called. Use a new helper, array_index_nospec(), to sanitize txq_params->ac with respect to speculation. I.e. ensure that any speculation into ->conf_tx() handlers is done with a value of txq_params->ac that is within the bounds of [0, NL80211_NUM_ACS). Reported-by: Christian Lamparter Reported-by: Elena Reshetova Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Acked-by: Johannes Berg Cc: linux-arch@vger.kernel.org Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: linux-wireless@vger.kernel.org Cc: torvalds@linux-foundation.org Cc: "David S. Miller" Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727419584.33451.7700736761686184303.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c626f679e1c8..91722e97cdd5 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -2014,20 +2015,22 @@ static const struct nla_policy txq_params_policy[NL80211_TXQ_ATTR_MAX + 1] = { static int parse_txq_params(struct nlattr *tb[], struct ieee80211_txq_params *txq_params) { + u8 ac; + if (!tb[NL80211_TXQ_ATTR_AC] || !tb[NL80211_TXQ_ATTR_TXOP] || !tb[NL80211_TXQ_ATTR_CWMIN] || !tb[NL80211_TXQ_ATTR_CWMAX] || !tb[NL80211_TXQ_ATTR_AIFS]) return -EINVAL; - txq_params->ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]); + ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]); txq_params->txop = nla_get_u16(tb[NL80211_TXQ_ATTR_TXOP]); txq_params->cwmin = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMIN]); txq_params->cwmax = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMAX]); txq_params->aifs = nla_get_u8(tb[NL80211_TXQ_ATTR_AIFS]); - if (txq_params->ac >= NL80211_NUM_ACS) + if (ac >= NL80211_NUM_ACS) return -EINVAL; - + txq_params->ac = array_index_nospec(ac, NL80211_NUM_ACS); return 0; } From 359fde6bd0ec91c284641965e5c5c45a1fab6d51 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:03:21 -0800 Subject: [PATCH 369/705] x86/spectre: Report get_user mitigation for spectre_v1 (cherry picked from commit edfbae53dab8348fca778531be9f4855d2ca0360) Reflect the presence of get_user(), __get_user(), and 'syscall' protections in sysfs. The expectation is that new and better tooling will allow the kernel to grow more usages of array_index_nospec(), for now, only claim mitigation for __user pointer de-references. Reported-by: Jiri Slaby Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: torvalds@linux-foundation.org Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727420158.33451.11658324346540434635.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 51624c6f2a3b..d4658e02ff93 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -296,7 +296,7 @@ ssize_t cpu_show_spectre_v1(struct device *dev, { if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) return sprintf(buf, "Not affected\n"); - return sprintf(buf, "Vulnerable\n"); + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); } ssize_t cpu_show_spectre_v2(struct device *dev, From f67e05d1506a24f329589c27952d776f977161b7 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 30 Jan 2018 19:32:18 +0000 Subject: [PATCH 370/705] x86/spectre: Fix spelling mistake: "vunerable"-> "vulnerable" (cherry picked from commit e698dcdfcda41efd0984de539767b4cddd235f1e) Trivial fix to spelling mistake in pr_err error message text. Signed-off-by: Colin Ian King Signed-off-by: Thomas Gleixner Cc: Andi Kleen Cc: Greg Kroah-Hartman Cc: kernel-janitors@vger.kernel.org Cc: Andy Lutomirski Cc: Borislav Petkov Cc: David Woodhouse Link: https://lkml.kernel.org/r/20180130193218.9271-1-colin.king@canonical.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d4658e02ff93..aec7dafc1f13 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -102,7 +102,7 @@ bool retpoline_module_ok(bool has_retpoline) if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline) return true; - pr_err("System may be vunerable to spectre v2\n"); + pr_err("System may be vulnerable to spectre v2\n"); spectre_v2_bad_module = true; return false; } From cda6b6074cc6f94ba4cb69a6c8928c1cd19fe291 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 30 Jan 2018 14:30:23 +0000 Subject: [PATCH 371/705] x86/cpuid: Fix up "virtual" IBRS/IBPB/STIBP feature bits on Intel (cherry picked from commit 7fcae1118f5fd44a862aa5c3525248e35ee67c3b) Despite the fact that all the other code there seems to be doing it, just using set_cpu_cap() in early_intel_init() doesn't actually work. For CPUs with PKU support, setup_pku() calls get_cpu_cap() after c->c_init() has set those feature bits. That resets those bits back to what was queried from the hardware. Turning the bits off for bad microcode is easy to fix. That can just use setup_clear_cpu_cap() to force them off for all CPUs. I was less keen on forcing the feature bits *on* that way, just in case of inconsistencies. I appreciate that the kernel is going to get this utterly wrong if CPU features are not consistent, because it has already applied alternatives by the time secondary CPUs are brought up. But at least if setup_force_cpu_cap() isn't being used, we might have a chance of *detecting* the lack of the corresponding bit and either panicking or refusing to bring the offending CPU online. So ensure that the appropriate feature bits are set within get_cpu_cap() regardless of how many extra times it's called. Fixes: 2961298e ("x86/cpufeatures: Clean up Spectre v2 related CPUID flags") Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: karahmed@amazon.de Cc: peterz@infradead.org Cc: bp@alien8.de Link: https://lkml.kernel.org/r/1517322623-15261-1-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 21 +++++++++++++++++++++ arch/x86/kernel/cpu/intel.c | 27 ++++++++------------------- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cfa026f323cf..60e537dac718 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -718,6 +718,26 @@ static void apply_forced_caps(struct cpuinfo_x86 *c) } } +static void init_speculation_control(struct cpuinfo_x86 *c) +{ + /* + * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, + * and they also have a different bit for STIBP support. Also, + * a hypervisor might have set the individual AMD bits even on + * Intel CPUs, for finer-grained selection of what's available. + * + * We use the AMD bits in 0x8000_0008 EBX as the generic hardware + * features, which are visible in /proc/cpuinfo and used by the + * kernel. So set those accordingly from the Intel bits. + */ + if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { + set_cpu_cap(c, X86_FEATURE_IBRS); + set_cpu_cap(c, X86_FEATURE_IBPB); + } + if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) + set_cpu_cap(c, X86_FEATURE_STIBP); +} + void get_cpu_cap(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; @@ -812,6 +832,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); init_scattered_cpuid_features(c); + init_speculation_control(c); } static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 2e257f87c527..4097b43cba2d 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -140,28 +140,17 @@ static void early_init_intel(struct cpuinfo_x86 *c) rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode); } - /* - * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, - * and they also have a different bit for STIBP support. Also, - * a hypervisor might have set the individual AMD bits even on - * Intel CPUs, for finer-grained selection of what's available. - */ - if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { - set_cpu_cap(c, X86_FEATURE_IBRS); - set_cpu_cap(c, X86_FEATURE_IBPB); - } - if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) - set_cpu_cap(c, X86_FEATURE_STIBP); - /* Now if any of them are set, check the blacklist and clear the lot */ - if ((cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || + if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || + cpu_has(c, X86_FEATURE_INTEL_STIBP) || + cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) { pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n"); - clear_cpu_cap(c, X86_FEATURE_IBRS); - clear_cpu_cap(c, X86_FEATURE_IBPB); - clear_cpu_cap(c, X86_FEATURE_STIBP); - clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL); - clear_cpu_cap(c, X86_FEATURE_INTEL_STIBP); + setup_clear_cpu_cap(X86_FEATURE_IBRS); + setup_clear_cpu_cap(X86_FEATURE_IBPB); + setup_clear_cpu_cap(X86_FEATURE_STIBP); + setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); + setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); } /* From 7552556f65af7db6b2bad828beb4c633cb7d8533 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 30 Jan 2018 22:13:33 -0600 Subject: [PATCH 372/705] x86/paravirt: Remove 'noreplace-paravirt' cmdline option (cherry picked from commit 12c69f1e94c89d40696e83804dd2f0965b5250cd) The 'noreplace-paravirt' option disables paravirt patching, leaving the original pv indirect calls in place. That's highly incompatible with retpolines, unless we want to uglify paravirt even further and convert the paravirt calls to retpolines. As far as I can tell, the option doesn't seem to be useful for much other than introducing surprising corner cases and making the kernel vulnerable to Spectre v2. It was probably a debug option from the early paravirt days. So just remove it. Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Reviewed-by: Juergen Gross Cc: Andrea Arcangeli Cc: Peter Zijlstra Cc: Andi Kleen Cc: Ashok Raj Cc: Greg KH Cc: Jun Nakajima Cc: Tim Chen Cc: Rusty Russell Cc: Dave Hansen Cc: Asit Mallick Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jason Baron Cc: Paolo Bonzini Cc: Alok Kataria Cc: Arjan Van De Ven Cc: David Woodhouse Cc: Dan Williams Link: https://lkml.kernel.org/r/20180131041333.2x6blhxirc2kclrq@treble Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 2 -- arch/x86/kernel/alternative.c | 14 -------------- 2 files changed, 16 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4c2667aa4634..466c039c622b 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2805,8 +2805,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. norandmaps Don't use address space randomization. Equivalent to echo 0 > /proc/sys/kernel/randomize_va_space - noreplace-paravirt [X86,IA-64,PV_OPS] Don't patch paravirt_ops - noreplace-smp [X86-32,SMP] Don't replace SMP instructions with UP alternatives diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 10d5a3d6affc..03b6e5c6cf23 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -46,17 +46,6 @@ static int __init setup_noreplace_smp(char *str) } __setup("noreplace-smp", setup_noreplace_smp); -#ifdef CONFIG_PARAVIRT -static int __initdata_or_module noreplace_paravirt = 0; - -static int __init setup_noreplace_paravirt(char *str) -{ - noreplace_paravirt = 1; - return 1; -} -__setup("noreplace-paravirt", setup_noreplace_paravirt); -#endif - #define DPRINTK(fmt, args...) \ do { \ if (debug_alternative) \ @@ -588,9 +577,6 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start, struct paravirt_patch_site *p; char insnbuf[MAX_PATCH_LEN]; - if (noreplace_paravirt) - return; - for (p = start; p < end; p++) { unsigned int used; From eb99bd6341cb3039d0b9d2b7f89d5f2ff98e9676 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 31 Jan 2018 17:47:03 -0800 Subject: [PATCH 373/705] x86/kvm: Update spectre-v1 mitigation (cherry picked from commit 085331dfc6bbe3501fb936e657331ca943827600) Commit 75f139aaf896 "KVM: x86: Add memory barrier on vmcs field lookup" added a raw 'asm("lfence");' to prevent a bounds check bypass of 'vmcs_field_to_offset_table'. The lfence can be avoided in this path by using the array_index_nospec() helper designed for these types of fixes. Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Acked-by: Paolo Bonzini Cc: Andrew Honig Cc: kvm@vger.kernel.org Cc: Jim Mattson Link: https://lkml.kernel.org/r/151744959670.6342.3001723920950249067.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index fcdcc8802677..feadff312131 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "kvm_cache_regs.h" #include "x86.h" @@ -856,21 +857,18 @@ static const unsigned short vmcs_field_to_offset_table[] = { static inline short vmcs_field_to_offset(unsigned long field) { - BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX); + const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table); + unsigned short offset; - if (field >= ARRAY_SIZE(vmcs_field_to_offset_table)) + BUILD_BUG_ON(size > SHRT_MAX); + if (field >= size) return -ENOENT; - /* - * FIXME: Mitigation for CVE-2017-5753. To be replaced with a - * generic mechanism. - */ - asm("lfence"); - - if (vmcs_field_to_offset_table[field] == 0) + field = array_index_nospec(field, size); + offset = vmcs_field_to_offset_table[field]; + if (offset == 0) return -ENOENT; - - return vmcs_field_to_offset_table[field]; + return offset; } static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) From fe4333893936daf7c02c3b8afc379bc600b5e286 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 1 Feb 2018 11:27:20 +0000 Subject: [PATCH 374/705] x86/retpoline: Avoid retpolines for built-in __init functions (cherry picked from commit 66f793099a636862a71c59d4a6ba91387b155e0c) There's no point in building init code with retpolines, since it runs before any potentially hostile userspace does. And before the retpoline is actually ALTERNATIVEd into place, for much of it. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: karahmed@amazon.de Cc: peterz@infradead.org Cc: bp@alien8.de Link: https://lkml.kernel.org/r/1517484441-1420-2-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- include/linux/init.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/include/linux/init.h b/include/linux/init.h index e30104ceb86d..8e346d1bd837 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -4,6 +4,13 @@ #include #include +/* Built-in __init functions needn't be compiled with retpoline */ +#if defined(RETPOLINE) && !defined(MODULE) +#define __noretpoline __attribute__((indirect_branch("keep"))) +#else +#define __noretpoline +#endif + /* These macros are used to mark some functions or * initialized data (doesn't apply to uninitialized data) * as `initialization' functions. The kernel can take this @@ -39,7 +46,7 @@ /* These are for everybody (although not all archs will actually discard it in modules) */ -#define __init __section(.init.text) __cold notrace __latent_entropy +#define __init __section(.init.text) __cold notrace __latent_entropy __noretpoline #define __initdata __section(.init.data) #define __initconst __section(.init.rodata) #define __exitdata __section(.exit.data) From 961cb14c615d13a823963585dafdc19ee9e9ba85 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Thu, 1 Feb 2018 11:27:21 +0000 Subject: [PATCH 375/705] x86/spectre: Simplify spectre_v2 command line parsing (cherry picked from commit 9005c6834c0ffdfe46afa76656bd9276cca864f6) [dwmw2: Use ARRAY_SIZE] Signed-off-by: KarimAllah Ahmed Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: peterz@infradead.org Cc: bp@alien8.de Link: https://lkml.kernel.org/r/1517484441-1420-3-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 84 +++++++++++++++++++++++++------------- 1 file changed, 55 insertions(+), 29 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index aec7dafc1f13..957ad443b786 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -118,13 +118,13 @@ static inline const char *spectre_v2_module_string(void) { return ""; } static void __init spec2_print_if_insecure(const char *reason) { if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - pr_info("%s\n", reason); + pr_info("%s selected on command line.\n", reason); } static void __init spec2_print_if_secure(const char *reason) { if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - pr_info("%s\n", reason); + pr_info("%s selected on command line.\n", reason); } static inline bool retp_compiler(void) @@ -139,42 +139,68 @@ static inline bool match_option(const char *arg, int arglen, const char *opt) return len == arglen && !strncmp(arg, opt, len); } +static const struct { + const char *option; + enum spectre_v2_mitigation_cmd cmd; + bool secure; +} mitigation_options[] = { + { "off", SPECTRE_V2_CMD_NONE, false }, + { "on", SPECTRE_V2_CMD_FORCE, true }, + { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, + { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, + { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, + { "auto", SPECTRE_V2_CMD_AUTO, false }, +}; + static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) { char arg[20]; - int ret; + int ret, i; + enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; - ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, - sizeof(arg)); - if (ret > 0) { - if (match_option(arg, ret, "off")) { - goto disable; - } else if (match_option(arg, ret, "on")) { - spec2_print_if_secure("force enabled on command line."); - return SPECTRE_V2_CMD_FORCE; - } else if (match_option(arg, ret, "retpoline")) { - spec2_print_if_insecure("retpoline selected on command line."); - return SPECTRE_V2_CMD_RETPOLINE; - } else if (match_option(arg, ret, "retpoline,amd")) { - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { - pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); - return SPECTRE_V2_CMD_AUTO; - } - spec2_print_if_insecure("AMD retpoline selected on command line."); - return SPECTRE_V2_CMD_RETPOLINE_AMD; - } else if (match_option(arg, ret, "retpoline,generic")) { - spec2_print_if_insecure("generic retpoline selected on command line."); - return SPECTRE_V2_CMD_RETPOLINE_GENERIC; - } else if (match_option(arg, ret, "auto")) { + if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) + return SPECTRE_V2_CMD_NONE; + else { + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, + sizeof(arg)); + if (ret < 0) + return SPECTRE_V2_CMD_AUTO; + + for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { + if (!match_option(arg, ret, mitigation_options[i].option)) + continue; + cmd = mitigation_options[i].cmd; + break; + } + + if (i >= ARRAY_SIZE(mitigation_options)) { + pr_err("unknown option (%s). Switching to AUTO select\n", + mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; } } - if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2")) + if ((cmd == SPECTRE_V2_CMD_RETPOLINE || + cmd == SPECTRE_V2_CMD_RETPOLINE_AMD || + cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) && + !IS_ENABLED(CONFIG_RETPOLINE)) { + pr_err("%s selected but not compiled in. Switching to AUTO select\n", + mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; -disable: - spec2_print_if_insecure("disabled on command line."); - return SPECTRE_V2_CMD_NONE; + } + + if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { + pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); + return SPECTRE_V2_CMD_AUTO; + } + + if (mitigation_options[i].secure) + spec2_print_if_secure(mitigation_options[i].option); + else + spec2_print_if_insecure(mitigation_options[i].option); + + return cmd; } /* Check for Skylake-like CPUs (for RSB handling) */ From 4b234a253e52b1ae98449d390cd00f07b96ca7e7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 2 Feb 2018 22:39:23 +0100 Subject: [PATCH 376/705] x86/pti: Mark constant arrays as __initconst (cherry picked from commit 4bf5d56d429cbc96c23d809a08f63cd29e1a702e) I'm seeing build failures from the two newly introduced arrays that are marked 'const' and '__initdata', which are mutually exclusive: arch/x86/kernel/cpu/common.c:882:43: error: 'cpu_no_speculation' causes a section type conflict with 'e820_table_firmware_init' arch/x86/kernel/cpu/common.c:895:43: error: 'cpu_no_meltdown' causes a section type conflict with 'e820_table_firmware_init' The correct annotation is __initconst. Fixes: fec9434a12f3 ("x86/pti: Do not enable PTI on CPUs which are not vulnerable to Meltdown") Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Cc: Ricardo Neri Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Thomas Garnier Cc: David Woodhouse Link: https://lkml.kernel.org/r/20180202213959.611210-1-arnd@arndb.de Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 60e537dac718..08e89ed6aa87 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -861,7 +861,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #endif } -static const __initdata struct x86_cpu_id cpu_no_speculation[] = { +static const __initconst struct x86_cpu_id cpu_no_speculation[] = { { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY }, { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY }, { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY }, @@ -874,7 +874,7 @@ static const __initdata struct x86_cpu_id cpu_no_speculation[] = { {} }; -static const __initdata struct x86_cpu_id cpu_no_meltdown[] = { +static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { { X86_VENDOR_AMD }, {} }; From 34900390e96663cfe5c23e254baaf49664be8836 Mon Sep 17 00:00:00 2001 From: Darren Kenny Date: Fri, 2 Feb 2018 19:12:20 +0000 Subject: [PATCH 377/705] x86/speculation: Fix typo IBRS_ATT, which should be IBRS_ALL (cherry picked from commit af189c95a371b59f493dbe0f50c0a09724868881) Fixes: 117cc7a908c83 ("x86/retpoline: Fill return stack buffer on vmexit") Signed-off-by: Darren Kenny Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Cc: Tom Lendacky Cc: Andi Kleen Cc: Borislav Petkov Cc: Masami Hiramatsu Cc: Arjan van de Ven Cc: David Woodhouse Link: https://lkml.kernel.org/r/20180202191220.blvgkgutojecxr3b@starbug-vm.ie.oracle.com Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index df4ececa6ebf..300cc159b4a0 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -150,7 +150,7 @@ extern char __indirect_thunk_end[]; * On VMEXIT we must ensure that no RSB predictions learned in the guest * can be followed in the host, by overwriting the RSB completely. Both * retpoline and IBRS mitigations for Spectre v2 need this; only on future - * CPUs with IBRS_ATT *might* it be avoided. + * CPUs with IBRS_ALL *might* it be avoided. */ static inline void vmexit_fill_RSB(void) { From 19b1d4bdfe5ce543e54292fb1a45c7300398e6aa Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 25 Jan 2017 11:58:57 +0100 Subject: [PATCH 378/705] KVM: nVMX: kmap() can't fail commit 42cf014d38d8822cce63703a467e00f65d000952 upstream. kmap() can't fail, therefore it will always return a valid pointer. Let's just get rid of the unnecessary checks. Signed-off-by: David Hildenbrand Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index feadff312131..0726a66c07ab 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4756,10 +4756,6 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) return 0; vapic_page = kmap(vmx->nested.virtual_apic_page); - if (!vapic_page) { - WARN_ON(1); - return -ENOMEM; - } __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page); kunmap(vmx->nested.virtual_apic_page); @@ -9584,11 +9580,6 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, if (!page) return false; msr_bitmap_l1 = (unsigned long *)kmap(page); - if (!msr_bitmap_l1) { - nested_release_page_clean(page); - WARN_ON(1); - return false; - } memset(msr_bitmap_l0, 0xff, PAGE_SIZE); From 1edccf20b9d82d318f0003ad67b8afed299ae93e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 25 Jan 2017 11:58:58 +0100 Subject: [PATCH 379/705] KVM: nVMX: vmx_complete_nested_posted_interrupt() can't fail (cherry picked from commit 6342c50ad12e8ce0736e722184a7dbdea4a3477f) vmx_complete_nested_posted_interrupt() can't fail, let's turn it into a void function. Signed-off-by: David Hildenbrand Signed-off-by: Paolo Bonzini Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0726a66c07ab..91d17ab53e2b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4736,7 +4736,7 @@ static bool vmx_get_enable_apicv(void) return enable_apicv; } -static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) +static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); int max_irr; @@ -4747,13 +4747,13 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) vmx->nested.pi_pending) { vmx->nested.pi_pending = false; if (!pi_test_and_clear_on(vmx->nested.pi_desc)) - return 0; + return; max_irr = find_last_bit( (unsigned long *)vmx->nested.pi_desc->pir, 256); if (max_irr == 256) - return 0; + return; vapic_page = kmap(vmx->nested.virtual_apic_page); __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page); @@ -4766,7 +4766,6 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) vmcs_write16(GUEST_INTR_STATUS, status); } } - return 0; } static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu) @@ -10482,7 +10481,8 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) return 0; } - return vmx_complete_nested_posted_interrupt(vcpu); + vmx_complete_nested_posted_interrupt(vcpu); + return 0; } static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu) From b7649e1776706c28d837d423412c1763900e521e Mon Sep 17 00:00:00 2001 From: David Matlack Date: Tue, 1 Aug 2017 14:00:40 -0700 Subject: [PATCH 380/705] KVM: nVMX: mark vmcs12 pages dirty on L2 exit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (cherry picked from commit c9f04407f2e0b3fc9ff7913c65fcfcb0a4b61570) The host physical addresses of L1's Virtual APIC Page and Posted Interrupt descriptor are loaded into the VMCS02. The CPU may write to these pages via their host physical address while L2 is running, bypassing address-translation-based dirty tracking (e.g. EPT write protection). Mark them dirty on every exit from L2 to prevent them from getting out of sync with dirty tracking. Also mark the virtual APIC page and the posted interrupt descriptor dirty when KVM is virtualizing posted interrupt processing. Signed-off-by: David Matlack Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 53 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 91d17ab53e2b..e91deda4864e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4736,6 +4736,28 @@ static bool vmx_get_enable_apicv(void) return enable_apicv; } +static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + gfn_t gfn; + + /* + * Don't need to mark the APIC access page dirty; it is never + * written to by the CPU during APIC virtualization. + */ + + if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { + gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT; + kvm_vcpu_mark_page_dirty(vcpu, gfn); + } + + if (nested_cpu_has_posted_intr(vmcs12)) { + gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT; + kvm_vcpu_mark_page_dirty(vcpu, gfn); + } +} + + static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -4743,18 +4765,15 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) void *vapic_page; u16 status; - if (vmx->nested.pi_desc && - vmx->nested.pi_pending) { - vmx->nested.pi_pending = false; - if (!pi_test_and_clear_on(vmx->nested.pi_desc)) - return; + if (!vmx->nested.pi_desc || !vmx->nested.pi_pending) + return; - max_irr = find_last_bit( - (unsigned long *)vmx->nested.pi_desc->pir, 256); - - if (max_irr == 256) - return; + vmx->nested.pi_pending = false; + if (!pi_test_and_clear_on(vmx->nested.pi_desc)) + return; + max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256); + if (max_irr != 256) { vapic_page = kmap(vmx->nested.virtual_apic_page); __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page); kunmap(vmx->nested.virtual_apic_page); @@ -4766,6 +4785,8 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) vmcs_write16(GUEST_INTR_STATUS, status); } } + + nested_mark_vmcs12_pages_dirty(vcpu); } static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu) @@ -8022,6 +8043,18 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) vmcs_read32(VM_EXIT_INTR_ERROR_CODE), KVM_ISA_VMX); + /* + * The host physical addresses of some pages of guest memory + * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU + * may write to these pages via their host physical address while + * L2 is running, bypassing any address-translation-based dirty + * tracking (e.g. EPT write protection). + * + * Mark them dirty on every exit from L2 to prevent them from + * getting out of sync with dirty tracking. + */ + nested_mark_vmcs12_pages_dirty(vcpu); + if (vmx->nested.nested_run_pending) return false; From 46e24dfc2dfe0062a77538698c84f13b24ce9b2c Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Mon, 27 Nov 2017 17:22:25 -0600 Subject: [PATCH 381/705] KVM: nVMX: Eliminate vmcs02 pool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (cherry picked from commit de3a0021a60635de96aa92713c1a31a96747d72c) The potential performance advantages of a vmcs02 pool have never been realized. To simplify the code, eliminate the pool. Instead, a single vmcs02 is allocated per VCPU when the VCPU enters VMX operation. Cc: stable@vger.kernel.org # prereq for Spectre mitigation Signed-off-by: Jim Mattson Signed-off-by: Mark Kanda Reviewed-by: Ameya More Reviewed-by: David Hildenbrand Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 146 +++++++-------------------------------------- 1 file changed, 23 insertions(+), 123 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e91deda4864e..44e40952c7fc 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -174,7 +174,6 @@ module_param(ple_window_max, int, S_IRUGO); extern const ulong vmx_return; #define NR_AUTOLOAD_MSRS 8 -#define VMCS02_POOL_SIZE 1 struct vmcs { u32 revision_id; @@ -208,7 +207,7 @@ struct shared_msr_entry { * stored in guest memory specified by VMPTRLD, but is opaque to the guest, * which must access it using VMREAD/VMWRITE/VMCLEAR instructions. * More than one of these structures may exist, if L1 runs multiple L2 guests. - * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the + * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the * underlying hardware which will be used to run L2. * This structure is packed to ensure that its layout is identical across * machines (necessary for live migration). @@ -387,13 +386,6 @@ struct __packed vmcs12 { */ #define VMCS12_SIZE 0x1000 -/* Used to remember the last vmcs02 used for some recently used vmcs12s */ -struct vmcs02_list { - struct list_head list; - gpa_t vmptr; - struct loaded_vmcs vmcs02; -}; - /* * The nested_vmx structure is part of vcpu_vmx, and holds information we need * for correct emulation of VMX (i.e., nested VMX) on this vcpu. @@ -420,15 +412,15 @@ struct nested_vmx { */ bool sync_shadow_vmcs; - /* vmcs02_list cache of VMCSs recently used to run L2 guests */ - struct list_head vmcs02_pool; - int vmcs02_num; bool change_vmcs01_virtual_x2apic_mode; /* L2 must run next, and mustn't decide to exit to L1. */ bool nested_run_pending; + + struct loaded_vmcs vmcs02; + /* - * Guest pages referred to in vmcs02 with host-physical pointers, so - * we must keep them pinned while L2 runs. + * Guest pages referred to in the vmcs02 with host-physical + * pointers, so we must keep them pinned while L2 runs. */ struct page *apic_access_page; struct page *virtual_apic_page; @@ -6677,94 +6669,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu) return handle_nop(vcpu); } -/* - * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12. - * We could reuse a single VMCS for all the L2 guests, but we also want the - * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this - * allows keeping them loaded on the processor, and in the future will allow - * optimizations where prepare_vmcs02 doesn't need to set all the fields on - * every entry if they never change. - * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE - * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first. - * - * The following functions allocate and free a vmcs02 in this pool. - */ - -/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */ -static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) -{ - struct vmcs02_list *item; - list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) - if (item->vmptr == vmx->nested.current_vmptr) { - list_move(&item->list, &vmx->nested.vmcs02_pool); - return &item->vmcs02; - } - - if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) { - /* Recycle the least recently used VMCS. */ - item = list_last_entry(&vmx->nested.vmcs02_pool, - struct vmcs02_list, list); - item->vmptr = vmx->nested.current_vmptr; - list_move(&item->list, &vmx->nested.vmcs02_pool); - return &item->vmcs02; - } - - /* Create a new VMCS */ - item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); - if (!item) - return NULL; - item->vmcs02.vmcs = alloc_vmcs(); - item->vmcs02.shadow_vmcs = NULL; - if (!item->vmcs02.vmcs) { - kfree(item); - return NULL; - } - loaded_vmcs_init(&item->vmcs02); - item->vmptr = vmx->nested.current_vmptr; - list_add(&(item->list), &(vmx->nested.vmcs02_pool)); - vmx->nested.vmcs02_num++; - return &item->vmcs02; -} - -/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */ -static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr) -{ - struct vmcs02_list *item; - list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) - if (item->vmptr == vmptr) { - free_loaded_vmcs(&item->vmcs02); - list_del(&item->list); - kfree(item); - vmx->nested.vmcs02_num--; - return; - } -} - -/* - * Free all VMCSs saved for this vcpu, except the one pointed by - * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs - * must be &vmx->vmcs01. - */ -static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) -{ - struct vmcs02_list *item, *n; - - WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01); - list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) { - /* - * Something will leak if the above WARN triggers. Better than - * a use-after-free. - */ - if (vmx->loaded_vmcs == &item->vmcs02) - continue; - - free_loaded_vmcs(&item->vmcs02); - list_del(&item->list); - kfree(item); - vmx->nested.vmcs02_num--; - } -} - /* * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), * set the success or error code of an emulated VMX instruction, as specified @@ -7078,6 +6982,12 @@ static int handle_vmon(struct kvm_vcpu *vcpu) return 1; } + vmx->nested.vmcs02.vmcs = alloc_vmcs(); + vmx->nested.vmcs02.shadow_vmcs = NULL; + if (!vmx->nested.vmcs02.vmcs) + goto out_vmcs02; + loaded_vmcs_init(&vmx->nested.vmcs02); + if (cpu_has_vmx_msr_bitmap()) { vmx->nested.msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); @@ -7100,9 +7010,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu) vmx->vmcs01.shadow_vmcs = shadow_vmcs; } - INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); - vmx->nested.vmcs02_num = 0; - hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; @@ -7120,6 +7027,9 @@ out_cached_vmcs12: free_page((unsigned long)vmx->nested.msr_bitmap); out_msr_bitmap: + free_loaded_vmcs(&vmx->nested.vmcs02); + +out_vmcs02: return -ENOMEM; } @@ -7205,7 +7115,7 @@ static void free_nested(struct vcpu_vmx *vmx) vmx->vmcs01.shadow_vmcs = NULL; } kfree(vmx->nested.cached_vmcs12); - /* Unpin physical memory we referred to in current vmcs02 */ + /* Unpin physical memory we referred to in the vmcs02 */ if (vmx->nested.apic_access_page) { nested_release_page(vmx->nested.apic_access_page); vmx->nested.apic_access_page = NULL; @@ -7221,7 +7131,7 @@ static void free_nested(struct vcpu_vmx *vmx) vmx->nested.pi_desc = NULL; } - nested_free_all_saved_vmcss(vmx); + free_loaded_vmcs(&vmx->nested.vmcs02); } /* Emulate the VMXOFF instruction */ @@ -7255,8 +7165,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) vmptr + offsetof(struct vmcs12, launch_state), &zero, sizeof(zero)); - nested_free_vmcs02(vmx, vmptr); - skip_emulated_instruction(vcpu); nested_vmx_succeed(vcpu); return 1; @@ -8045,10 +7953,11 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) /* * The host physical addresses of some pages of guest memory - * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU - * may write to these pages via their host physical address while - * L2 is running, bypassing any address-translation-based dirty - * tracking (e.g. EPT write protection). + * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC + * Page). The CPU may write to these pages via their host + * physical address while L2 is running, bypassing any + * address-translation-based dirty tracking (e.g. EPT write + * protection). * * Mark them dirty on every exit from L2 to prevent them from * getting out of sync with dirty tracking. @@ -10212,7 +10121,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) struct vmcs12 *vmcs12; struct vcpu_vmx *vmx = to_vmx(vcpu); int cpu; - struct loaded_vmcs *vmcs02; bool ia32e; u32 msr_entry_idx; @@ -10352,17 +10260,13 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) * the nested entry. */ - vmcs02 = nested_get_current_vmcs02(vmx); - if (!vmcs02) - return -ENOMEM; - enter_guest_mode(vcpu); if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); cpu = get_cpu(); - vmx->loaded_vmcs = vmcs02; + vmx->loaded_vmcs = &vmx->nested.vmcs02; vmx_vcpu_put(vcpu); vmx_vcpu_load(vcpu, cpu); vcpu->cpu = cpu; @@ -10877,10 +10781,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vm_exit_controls_reset_shadow(vmx); vmx_segment_cache_clear(vmx); - /* if no vmcs02 cache requested, remove the one we used */ - if (VMCS02_POOL_SIZE == 0) - nested_free_vmcs02(vmx, vmx->nested.current_vmptr); - load_vmcs12_host_state(vcpu, vmcs12); /* Update any VMCS fields that might have changed while L2 ran */ From ff546f9d83d320bc81b72bd2ccd4ebae45d3d714 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 11 Jan 2018 12:16:15 +0100 Subject: [PATCH 382/705] KVM: VMX: introduce alloc_loaded_vmcs (cherry picked from commit f21f165ef922c2146cc5bdc620f542953c41714b) Group together the calls to alloc_vmcs and loaded_vmcs_init. Soon we'll also allocate an MSR bitmap there. Cc: stable@vger.kernel.org # prereq for Spectre mitigation Signed-off-by: Paolo Bonzini Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 44e40952c7fc..da3cc7975556 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3522,11 +3522,6 @@ static struct vmcs *alloc_vmcs_cpu(int cpu) return vmcs; } -static struct vmcs *alloc_vmcs(void) -{ - return alloc_vmcs_cpu(raw_smp_processor_id()); -} - static void free_vmcs(struct vmcs *vmcs) { free_pages((unsigned long)vmcs, vmcs_config.order); @@ -3545,6 +3540,22 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) WARN_ON(loaded_vmcs->shadow_vmcs != NULL); } +static struct vmcs *alloc_vmcs(void) +{ + return alloc_vmcs_cpu(raw_smp_processor_id()); +} + +static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) +{ + loaded_vmcs->vmcs = alloc_vmcs(); + if (!loaded_vmcs->vmcs) + return -ENOMEM; + + loaded_vmcs->shadow_vmcs = NULL; + loaded_vmcs_init(loaded_vmcs); + return 0; +} + static void free_kvm_area(void) { int cpu; @@ -6943,6 +6954,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) struct vmcs *shadow_vmcs; const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; + int r; /* The Intel VMX Instruction Reference lists a bunch of bits that * are prerequisite to running VMXON, most notably cr4.VMXE must be @@ -6982,11 +6994,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu) return 1; } - vmx->nested.vmcs02.vmcs = alloc_vmcs(); - vmx->nested.vmcs02.shadow_vmcs = NULL; - if (!vmx->nested.vmcs02.vmcs) + r = alloc_loaded_vmcs(&vmx->nested.vmcs02); + if (r < 0) goto out_vmcs02; - loaded_vmcs_init(&vmx->nested.vmcs02); if (cpu_has_vmx_msr_bitmap()) { vmx->nested.msr_bitmap = @@ -9107,17 +9117,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (!vmx->guest_msrs) goto free_pml; - vmx->loaded_vmcs = &vmx->vmcs01; - vmx->loaded_vmcs->vmcs = alloc_vmcs(); - vmx->loaded_vmcs->shadow_vmcs = NULL; - if (!vmx->loaded_vmcs->vmcs) - goto free_msrs; if (!vmm_exclusive) kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id()))); - loaded_vmcs_init(vmx->loaded_vmcs); + err = alloc_loaded_vmcs(&vmx->vmcs01); if (!vmm_exclusive) kvm_cpu_vmxoff(); + if (err < 0) + goto free_msrs; + vmx->loaded_vmcs = &vmx->vmcs01; cpu = get_cpu(); vmx_vcpu_load(&vmx->vcpu, cpu); vmx->vcpu.cpu = cpu; From 6236b782eba37a028972bdfd654773ff2e283a22 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 16 Jan 2018 16:51:18 +0100 Subject: [PATCH 383/705] KVM: VMX: make MSR bitmaps per-VCPU (cherry picked from commit 904e14fb7cb96401a7dc803ca2863fd5ba32ffe6) Place the MSR bitmap in struct loaded_vmcs, and update it in place every time the x2apic or APICv state can change. This is rare and the loop can handle 64 MSRs per iteration, in a similar fashion as nested_vmx_prepare_msr_bitmap. This prepares for choosing, on a per-VM basis, whether to intercept the SPEC_CTRL and PRED_CMD MSRs. Cc: stable@vger.kernel.org # prereq for Spectre mitigation Suggested-by: Jim Mattson Signed-off-by: Paolo Bonzini Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 316 +++++++++++++++++---------------------------- 1 file changed, 115 insertions(+), 201 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index da3cc7975556..d772e88ae8f3 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -110,6 +110,14 @@ static u64 __read_mostly host_xss; static bool __read_mostly enable_pml = 1; module_param_named(pml, enable_pml, bool, S_IRUGO); +#define MSR_TYPE_R 1 +#define MSR_TYPE_W 2 +#define MSR_TYPE_RW 3 + +#define MSR_BITMAP_MODE_X2APIC 1 +#define MSR_BITMAP_MODE_X2APIC_APICV 2 +#define MSR_BITMAP_MODE_LM 4 + #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL /* Guest_tsc -> host_tsc conversion requires 64-bit division. */ @@ -191,6 +199,7 @@ struct loaded_vmcs { struct vmcs *shadow_vmcs; int cpu; int launched; + unsigned long *msr_bitmap; struct list_head loaded_vmcss_on_cpu_link; }; @@ -429,8 +438,6 @@ struct nested_vmx { bool pi_pending; u16 posted_intr_nv; - unsigned long *msr_bitmap; - struct hrtimer preemption_timer; bool preemption_timer_expired; @@ -531,6 +538,7 @@ struct vcpu_vmx { unsigned long host_rsp; u8 fail; bool nmi_known_unmasked; + u8 msr_bitmap_mode; u32 exit_intr_info; u32 idt_vectoring_info; ulong rflags; @@ -902,6 +910,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var); static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); static int alloc_identity_pagetable(struct kvm *kvm); +static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -921,12 +930,6 @@ static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); static unsigned long *vmx_io_bitmap_a; static unsigned long *vmx_io_bitmap_b; -static unsigned long *vmx_msr_bitmap_legacy; -static unsigned long *vmx_msr_bitmap_longmode; -static unsigned long *vmx_msr_bitmap_legacy_x2apic; -static unsigned long *vmx_msr_bitmap_longmode_x2apic; -static unsigned long *vmx_msr_bitmap_legacy_x2apic_apicv_inactive; -static unsigned long *vmx_msr_bitmap_longmode_x2apic_apicv_inactive; static unsigned long *vmx_vmread_bitmap; static unsigned long *vmx_vmwrite_bitmap; @@ -2520,36 +2523,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) vmx->guest_msrs[from] = tmp; } -static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) -{ - unsigned long *msr_bitmap; - - if (is_guest_mode(vcpu)) - msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap; - else if (cpu_has_secondary_exec_ctrls() && - (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { - if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) { - if (is_long_mode(vcpu)) - msr_bitmap = vmx_msr_bitmap_longmode_x2apic; - else - msr_bitmap = vmx_msr_bitmap_legacy_x2apic; - } else { - if (is_long_mode(vcpu)) - msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv_inactive; - else - msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv_inactive; - } - } else { - if (is_long_mode(vcpu)) - msr_bitmap = vmx_msr_bitmap_longmode; - else - msr_bitmap = vmx_msr_bitmap_legacy; - } - - vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); -} - /* * Set up the vmcs to automatically save and restore system * msrs. Don't touch the 64-bit msrs if the guest is in legacy @@ -2590,7 +2563,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) vmx->save_nmsrs = save_nmsrs; if (cpu_has_vmx_msr_bitmap()) - vmx_set_msr_bitmap(&vmx->vcpu); + vmx_update_msr_bitmap(&vmx->vcpu); } /* @@ -3537,6 +3510,8 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) loaded_vmcs_clear(loaded_vmcs); free_vmcs(loaded_vmcs->vmcs); loaded_vmcs->vmcs = NULL; + if (loaded_vmcs->msr_bitmap) + free_page((unsigned long)loaded_vmcs->msr_bitmap); WARN_ON(loaded_vmcs->shadow_vmcs != NULL); } @@ -3553,7 +3528,18 @@ static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) loaded_vmcs->shadow_vmcs = NULL; loaded_vmcs_init(loaded_vmcs); + + if (cpu_has_vmx_msr_bitmap()) { + loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!loaded_vmcs->msr_bitmap) + goto out_vmcs; + memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); + } return 0; + +out_vmcs: + free_loaded_vmcs(loaded_vmcs); + return -ENOMEM; } static void free_kvm_area(void) @@ -4562,10 +4548,8 @@ static void free_vpid(int vpid) spin_unlock(&vmx_vpid_lock); } -#define MSR_TYPE_R 1 -#define MSR_TYPE_W 2 -static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, - u32 msr, int type) +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type) { int f = sizeof(unsigned long); @@ -4599,8 +4583,8 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, } } -static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, - u32 msr, int type) +static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type) { int f = sizeof(unsigned long); @@ -4634,6 +4618,15 @@ static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, } } +static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type, bool value) +{ + if (value) + vmx_enable_intercept_for_msr(msr_bitmap, msr, type); + else + vmx_disable_intercept_for_msr(msr_bitmap, msr, type); +} + /* * If a msr is allowed by L0, we should check whether it is allowed by L1. * The corresponding bit will be cleared unless both of L0 and L1 allow it. @@ -4680,58 +4673,68 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1, } } -static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) +static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) { - if (!longmode_only) - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, - msr, MSR_TYPE_R | MSR_TYPE_W); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, - msr, MSR_TYPE_R | MSR_TYPE_W); + u8 mode = 0; + + if (cpu_has_secondary_exec_ctrls() && + (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { + mode |= MSR_BITMAP_MODE_X2APIC; + if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) + mode |= MSR_BITMAP_MODE_X2APIC_APICV; + } + + if (is_long_mode(vcpu)) + mode |= MSR_BITMAP_MODE_LM; + + return mode; } -static void vmx_enable_intercept_msr_read_x2apic(u32 msr, bool apicv_active) +#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) + +static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap, + u8 mode) { - if (apicv_active) { - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, - msr, MSR_TYPE_R); - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, - msr, MSR_TYPE_R); - } else { - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive, - msr, MSR_TYPE_R); - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive, - msr, MSR_TYPE_R); + int msr; + + for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { + unsigned word = msr / BITS_PER_LONG; + msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0; + msr_bitmap[word + (0x800 / sizeof(long))] = ~0; + } + + if (mode & MSR_BITMAP_MODE_X2APIC) { + /* + * TPR reads and writes can be virtualized even if virtual interrupt + * delivery is not in use. + */ + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW); + if (mode & MSR_BITMAP_MODE_X2APIC_APICV) { + vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R); + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); + } } } -static void vmx_disable_intercept_msr_read_x2apic(u32 msr, bool apicv_active) +static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) { - if (apicv_active) { - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, - msr, MSR_TYPE_R); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, - msr, MSR_TYPE_R); - } else { - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive, - msr, MSR_TYPE_R); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive, - msr, MSR_TYPE_R); - } -} + struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; + u8 mode = vmx_msr_bitmap_mode(vcpu); + u8 changed = mode ^ vmx->msr_bitmap_mode; -static void vmx_disable_intercept_msr_write_x2apic(u32 msr, bool apicv_active) -{ - if (apicv_active) { - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, - msr, MSR_TYPE_W); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, - msr, MSR_TYPE_W); - } else { - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive, - msr, MSR_TYPE_W); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive, - msr, MSR_TYPE_W); - } + if (!changed) + return; + + vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW, + !(mode & MSR_BITMAP_MODE_LM)); + + if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) + vmx_update_msr_bitmap_x2apic(msr_bitmap, mode); + + vmx->msr_bitmap_mode = mode; } static bool vmx_get_enable_apicv(void) @@ -4976,7 +4979,7 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) } if (cpu_has_vmx_msr_bitmap()) - vmx_set_msr_bitmap(vcpu); + vmx_update_msr_bitmap(vcpu); } static u32 vmx_exec_control(struct vcpu_vmx *vmx) @@ -5065,7 +5068,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); } if (cpu_has_vmx_msr_bitmap()) - vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); + vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap)); vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ @@ -6396,7 +6399,7 @@ static void wakeup_handler(void) static __init int hardware_setup(void) { - int r = -ENOMEM, i, msr; + int r = -ENOMEM, i; rdmsrl_safe(MSR_EFER, &host_efer); @@ -6411,41 +6414,13 @@ static __init int hardware_setup(void) if (!vmx_io_bitmap_b) goto out; - vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_legacy) - goto out1; - - vmx_msr_bitmap_legacy_x2apic = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_legacy_x2apic) - goto out2; - - vmx_msr_bitmap_legacy_x2apic_apicv_inactive = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_legacy_x2apic_apicv_inactive) - goto out3; - - vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_longmode) - goto out4; - - vmx_msr_bitmap_longmode_x2apic = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_longmode_x2apic) - goto out5; - - vmx_msr_bitmap_longmode_x2apic_apicv_inactive = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_longmode_x2apic_apicv_inactive) - goto out6; - vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); if (!vmx_vmread_bitmap) - goto out7; + goto out1; vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); if (!vmx_vmwrite_bitmap) - goto out8; + goto out2; memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); @@ -6454,12 +6429,9 @@ static __init int hardware_setup(void) memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); - memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); - memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); - if (setup_vmcs_config(&vmcs_config) < 0) { r = -EIO; - goto out9; + goto out3; } if (boot_cpu_has(X86_FEATURE_NX)) @@ -6516,47 +6488,8 @@ static __init int hardware_setup(void) kvm_tsc_scaling_ratio_frac_bits = 48; } - vmx_disable_intercept_for_msr(MSR_FS_BASE, false); - vmx_disable_intercept_for_msr(MSR_GS_BASE, false); - vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); - - memcpy(vmx_msr_bitmap_legacy_x2apic, - vmx_msr_bitmap_legacy, PAGE_SIZE); - memcpy(vmx_msr_bitmap_longmode_x2apic, - vmx_msr_bitmap_longmode, PAGE_SIZE); - memcpy(vmx_msr_bitmap_legacy_x2apic_apicv_inactive, - vmx_msr_bitmap_legacy, PAGE_SIZE); - memcpy(vmx_msr_bitmap_longmode_x2apic_apicv_inactive, - vmx_msr_bitmap_longmode, PAGE_SIZE); - set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ - /* - * enable_apicv && kvm_vcpu_apicv_active() - */ - for (msr = 0x800; msr <= 0x8ff; msr++) - vmx_disable_intercept_msr_read_x2apic(msr, true); - - /* TMCCT */ - vmx_enable_intercept_msr_read_x2apic(0x839, true); - /* TPR */ - vmx_disable_intercept_msr_write_x2apic(0x808, true); - /* EOI */ - vmx_disable_intercept_msr_write_x2apic(0x80b, true); - /* SELF-IPI */ - vmx_disable_intercept_msr_write_x2apic(0x83f, true); - - /* - * (enable_apicv && !kvm_vcpu_apicv_active()) || - * !enable_apicv - */ - /* TPR */ - vmx_disable_intercept_msr_read_x2apic(0x808, false); - vmx_disable_intercept_msr_write_x2apic(0x808, false); - if (enable_ept) { kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK, (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull, @@ -6602,22 +6535,10 @@ static __init int hardware_setup(void) return alloc_kvm_area(); -out9: - free_page((unsigned long)vmx_vmwrite_bitmap); -out8: - free_page((unsigned long)vmx_vmread_bitmap); -out7: - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive); -out6: - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); -out5: - free_page((unsigned long)vmx_msr_bitmap_longmode); -out4: - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive); out3: - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); + free_page((unsigned long)vmx_vmwrite_bitmap); out2: - free_page((unsigned long)vmx_msr_bitmap_legacy); + free_page((unsigned long)vmx_vmread_bitmap); out1: free_page((unsigned long)vmx_io_bitmap_b); out: @@ -6628,12 +6549,6 @@ out: static __exit void hardware_unsetup(void) { - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive); - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive); - free_page((unsigned long)vmx_msr_bitmap_legacy); - free_page((unsigned long)vmx_msr_bitmap_longmode); free_page((unsigned long)vmx_io_bitmap_b); free_page((unsigned long)vmx_io_bitmap_a); free_page((unsigned long)vmx_vmwrite_bitmap); @@ -6998,13 +6913,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu) if (r < 0) goto out_vmcs02; - if (cpu_has_vmx_msr_bitmap()) { - vmx->nested.msr_bitmap = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx->nested.msr_bitmap) - goto out_msr_bitmap; - } - vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); if (!vmx->nested.cached_vmcs12) goto out_cached_vmcs12; @@ -7034,9 +6942,6 @@ out_shadow_vmcs: kfree(vmx->nested.cached_vmcs12); out_cached_vmcs12: - free_page((unsigned long)vmx->nested.msr_bitmap); - -out_msr_bitmap: free_loaded_vmcs(&vmx->nested.vmcs02); out_vmcs02: @@ -7115,10 +7020,6 @@ static void free_nested(struct vcpu_vmx *vmx) vmx->nested.vmxon = false; free_vpid(vmx->nested.vpid02); nested_release_vmcs12(vmx); - if (vmx->nested.msr_bitmap) { - free_page((unsigned long)vmx->nested.msr_bitmap); - vmx->nested.msr_bitmap = NULL; - } if (enable_shadow_vmcs) { vmcs_clear(vmx->vmcs01.shadow_vmcs); free_vmcs(vmx->vmcs01.shadow_vmcs); @@ -8465,7 +8366,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) } vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); - vmx_set_msr_bitmap(vcpu); + vmx_update_msr_bitmap(vcpu); } static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) @@ -9085,6 +8986,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) { int err; struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + unsigned long *msr_bitmap; int cpu; if (!vmx) @@ -9125,6 +9027,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (err < 0) goto free_msrs; + msr_bitmap = vmx->vmcs01.msr_bitmap; + vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); + vmx->msr_bitmap_mode = 0; + vmx->loaded_vmcs = &vmx->vmcs01; cpu = get_cpu(); vmx_vcpu_load(&vmx->vcpu, cpu); @@ -9519,7 +9430,7 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, int msr; struct page *page; unsigned long *msr_bitmap_l1; - unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap; + unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; /* This shortcut is ok because we support only x2APIC MSRs so far. */ if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) @@ -10034,6 +9945,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) if (kvm_has_tsc_control) decache_tsc_multiplier(vmx); + if (cpu_has_vmx_msr_bitmap()) + vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap)); + if (enable_vpid) { /* * There is no direct mapping between vpid02 and vpid12, the @@ -10738,7 +10652,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vmcs_write64(GUEST_IA32_DEBUGCTL, 0); if (cpu_has_vmx_msr_bitmap()) - vmx_set_msr_bitmap(vcpu); + vmx_update_msr_bitmap(vcpu); if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, vmcs12->vm_exit_msr_load_count)) From 7013129a4034ea2168a0ccb32d7ddfefe9123333 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Thu, 1 Feb 2018 22:59:43 +0100 Subject: [PATCH 384/705] KVM/x86: Add IBPB support (cherry picked from commit 15d45071523d89b3fb7372e2135fbd72f6af9506) The Indirect Branch Predictor Barrier (IBPB) is an indirect branch control mechanism. It keeps earlier branches from influencing later ones. Unlike IBRS and STIBP, IBPB does not define a new mode of operation. It's a command that ensures predicted branch targets aren't used after the barrier. Although IBRS and IBPB are enumerated by the same CPUID enumeration, IBPB is very different. IBPB helps mitigate against three potential attacks: * Mitigate guests from being attacked by other guests. - This is addressed by issing IBPB when we do a guest switch. * Mitigate attacks from guest/ring3->host/ring3. These would require a IBPB during context switch in host, or after VMEXIT. The host process has two ways to mitigate - Either it can be compiled with retpoline - If its going through context switch, and has set !dumpable then there is a IBPB in that path. (Tim's patch: https://patchwork.kernel.org/patch/10192871) - The case where after a VMEXIT you return back to Qemu might make Qemu attackable from guest when Qemu isn't compiled with retpoline. There are issues reported when doing IBPB on every VMEXIT that resulted in some tsc calibration woes in guest. * Mitigate guest/ring0->host/ring0 attacks. When host kernel is using retpoline it is safe against these attacks. If host kernel isn't using retpoline we might need to do a IBPB flush on every VMEXIT. Even when using retpoline for indirect calls, in certain conditions 'ret' can use the BTB on Skylake-era CPUs. There are other mitigations available like RSB stuffing/clearing. * IBPB is issued only for SVM during svm_free_vcpu(). VMX has a vmclear and SVM doesn't. Follow discussion here: https://lkml.org/lkml/2018/1/15/146 Please refer to the following spec for more details on the enumeration and control. Refer here to get documentation about mitigations. https://software.intel.com/en-us/side-channel-security-support [peterz: rebase and changelog rewrite] [karahmed: - rebase - vmx: expose PRED_CMD if guest has it in CPUID - svm: only pass through IBPB if guest has it in CPUID - vmx: support !cpu_has_vmx_msr_bitmap()] - vmx: support nested] [dwmw2: Expose CPUID bit too (AMD IBPB only for now as we lack IBRS) PRED_CMD is a write-only MSR] Signed-off-by: Ashok Raj Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: David Woodhouse Signed-off-by: KarimAllah Ahmed Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Cc: Andrea Arcangeli Cc: Andi Kleen Cc: kvm@vger.kernel.org Cc: Asit Mallick Cc: Linus Torvalds Cc: Andy Lutomirski Cc: Dave Hansen Cc: Arjan Van De Ven Cc: Greg KH Cc: Jun Nakajima Cc: Paolo Bonzini Cc: Dan Williams Cc: Tim Chen Link: http://lkml.kernel.org/r/1515720739-43819-6-git-send-email-ashok.raj@intel.com Link: https://lkml.kernel.org/r/1517522386-18410-3-git-send-email-karahmed@amazon.de Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.c | 11 +++++- arch/x86/kvm/cpuid.h | 12 +++++++ arch/x86/kvm/svm.c | 28 ++++++++++++++++ arch/x86/kvm/vmx.c | 79 ++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 127 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 91af75e37306..6f24483b2b84 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -355,6 +355,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); + /* cpuid 0x80000008.ebx */ + const u32 kvm_cpuid_8000_0008_ebx_x86_features = + F(IBPB); + /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | @@ -607,7 +611,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, if (!g_phys_as) g_phys_as = phys_as; entry->eax = g_phys_as | (virt_as << 8); - entry->ebx = entry->edx = 0; + entry->edx = 0; + /* IBPB isn't necessarily present in hardware cpuid */ + if (boot_cpu_has(X86_FEATURE_IBPB)) + entry->ebx |= F(IBPB); + entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; + cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); break; } case 0x80000019: diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 9368fecca3ee..ec4f9dc54c70 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -160,6 +160,18 @@ static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu) return best && (best->edx & bit(X86_FEATURE_RDTSCP)); } +static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); + if (best && (best->ebx & bit(X86_FEATURE_IBPB))) + return true; + best = kvm_find_cpuid_entry(vcpu, 7, 0); + return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); +} + + /* * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3 */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 24af898fb3a6..6de8d6510dbe 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -248,6 +248,7 @@ static const struct svm_direct_access_msrs { { .index = MSR_CSTAR, .always = true }, { .index = MSR_SYSCALL_MASK, .always = true }, #endif + { .index = MSR_IA32_PRED_CMD, .always = false }, { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, { .index = MSR_IA32_LASTINTFROMIP, .always = false }, @@ -510,6 +511,7 @@ struct svm_cpu_data { struct kvm_ldttss_desc *tss_desc; struct page *save_area; + struct vmcb *current_vmcb; }; static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); @@ -1644,11 +1646,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, svm); + /* + * The vmcb page can be recycled, causing a false negative in + * svm_vcpu_load(). So do a full IBPB now. + */ + indirect_branch_prediction_barrier(); } static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_svm *svm = to_svm(vcpu); + struct svm_cpu_data *sd = per_cpu(svm_data, cpu); int i; if (unlikely(cpu != vcpu->cpu)) { @@ -1677,6 +1685,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (static_cpu_has(X86_FEATURE_RDTSCP)) wrmsrl(MSR_TSC_AUX, svm->tsc_aux); + if (sd->current_vmcb != svm->vmcb) { + sd->current_vmcb = svm->vmcb; + indirect_branch_prediction_barrier(); + } avic_vcpu_load(vcpu, cpu); } @@ -3599,6 +3611,22 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_IA32_TSC: kvm_write_tsc(vcpu, msr); break; + case MSR_IA32_PRED_CMD: + if (!msr->host_initiated && + !guest_cpuid_has_ibpb(vcpu)) + return 1; + + if (data & ~PRED_CMD_IBPB) + return 1; + + if (!data) + break; + + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); + if (is_guest_mode(vcpu)) + break; + set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); + break; case MSR_STAR: svm->vmcb->save.star = data; break; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d772e88ae8f3..b58b7819e890 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -550,6 +550,7 @@ struct vcpu_vmx { u64 msr_host_kernel_gs_base; u64 msr_guest_kernel_gs_base; #endif + u32 vm_entry_controls_shadow; u32 vm_exit_controls_shadow; /* @@ -911,6 +912,8 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); static int alloc_identity_pagetable(struct kvm *kvm); static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -1846,6 +1849,29 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) vmcs_write32(EXCEPTION_BITMAP, eb); } +/* + * Check if MSR is intercepted for L01 MSR bitmap. + */ +static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr) +{ + unsigned long *msr_bitmap; + int f = sizeof(unsigned long); + + if (!cpu_has_vmx_msr_bitmap()) + return true; + + msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap; + + if (msr <= 0x1fff) { + return !!test_bit(msr, msr_bitmap + 0x800 / f); + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { + msr &= 0x1fff; + return !!test_bit(msr, msr_bitmap + 0xc00 / f); + } + + return true; +} + static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, unsigned long entry, unsigned long exit) { @@ -2255,6 +2281,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; vmcs_load(vmx->loaded_vmcs->vmcs); + indirect_branch_prediction_barrier(); } if (!already_loaded) { @@ -3056,6 +3083,33 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC: kvm_write_tsc(vcpu, msr_info); break; + case MSR_IA32_PRED_CMD: + if (!msr_info->host_initiated && + !guest_cpuid_has_ibpb(vcpu)) + return 1; + + if (data & ~PRED_CMD_IBPB) + return 1; + + if (!data) + break; + + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); + + /* + * For non-nested: + * When it's written (to non-zero) for the first time, pass + * it through. + * + * For nested: + * The handling of the MSR bitmap for L2 guests is done in + * nested_vmx_merge_msr_bitmap. We should not touch the + * vmcs02.msr_bitmap here since it gets completely overwritten + * in the merging. + */ + vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, + MSR_TYPE_W); + break; case MSR_IA32_CR_PAT: if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) @@ -9431,9 +9485,23 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, struct page *page; unsigned long *msr_bitmap_l1; unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; + /* + * pred_cmd is trying to verify two things: + * + * 1. L0 gave a permission to L1 to actually passthrough the MSR. This + * ensures that we do not accidentally generate an L02 MSR bitmap + * from the L12 MSR bitmap that is too permissive. + * 2. That L1 or L2s have actually used the MSR. This avoids + * unnecessarily merging of the bitmap if the MSR is unused. This + * works properly because we only update the L01 MSR bitmap lazily. + * So even if L0 should pass L1 these MSRs, the L01 bitmap is only + * updated to reflect this when L1 (or its L2s) actually write to + * the MSR. + */ + bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); - /* This shortcut is ok because we support only x2APIC MSRs so far. */ - if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) + if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && + !pred_cmd) return false; page = nested_get_page(vcpu, vmcs12->msr_bitmap); @@ -9466,6 +9534,13 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, MSR_TYPE_W); } } + + if (pred_cmd) + nested_vmx_disable_intercept_for_msr( + msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_PRED_CMD, + MSR_TYPE_W); + kunmap(page); nested_release_page_clean(page); From 755502f810c646a1d828fbc66364c252430b6064 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Thu, 1 Feb 2018 22:59:44 +0100 Subject: [PATCH 385/705] KVM/VMX: Emulate MSR_IA32_ARCH_CAPABILITIES (cherry picked from commit 28c1c9fabf48d6ad596273a11c46e0d0da3e14cd) Intel processors use MSR_IA32_ARCH_CAPABILITIES MSR to indicate RDCL_NO (bit 0) and IBRS_ALL (bit 1). This is a read-only MSR. By default the contents will come directly from the hardware, but user-space can still override it. [dwmw2: The bit in kvm_cpuid_7_0_edx_x86_features can be unconditional] Signed-off-by: KarimAllah Ahmed Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Paolo Bonzini Reviewed-by: Darren Kenny Reviewed-by: Jim Mattson Reviewed-by: Konrad Rzeszutek Wilk Cc: Andrea Arcangeli Cc: Andi Kleen Cc: Jun Nakajima Cc: kvm@vger.kernel.org Cc: Dave Hansen Cc: Linus Torvalds Cc: Andy Lutomirski Cc: Asit Mallick Cc: Arjan Van De Ven Cc: Greg KH Cc: Dan Williams Cc: Tim Chen Cc: Ashok Raj Link: https://lkml.kernel.org/r/1517522386-18410-4-git-send-email-karahmed@amazon.de Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.c | 8 +++++++- arch/x86/kvm/cpuid.h | 8 ++++++++ arch/x86/kvm/vmx.c | 15 +++++++++++++++ arch/x86/kvm/x86.c | 1 + 4 files changed, 31 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 6f24483b2b84..9c6493f82094 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -380,6 +380,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.ecx*/ const u32 kvm_cpuid_7_0_ecx_x86_features = F(PKU) | 0 /*OSPKE*/; + /* cpuid 7.0.edx*/ + const u32 kvm_cpuid_7_0_edx_x86_features = + F(ARCH_CAPABILITIES); + /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); @@ -462,12 +466,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* PKU is not yet implemented for shadow paging. */ if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) entry->ecx &= ~F(PKU); + entry->edx &= kvm_cpuid_7_0_edx_x86_features; + cpuid_mask(&entry->edx, CPUID_7_EDX); } else { entry->ebx = 0; entry->ecx = 0; + entry->edx = 0; } entry->eax = 0; - entry->edx = 0; break; } case 9: diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index ec4f9dc54c70..8719997dd2e4 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -171,6 +171,14 @@ static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu) return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); } +static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 7, 0); + return best && (best->edx & bit(X86_FEATURE_ARCH_CAPABILITIES)); +} + /* * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b58b7819e890..1bd0caf87c94 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -551,6 +551,8 @@ struct vcpu_vmx { u64 msr_guest_kernel_gs_base; #endif + u64 arch_capabilities; + u32 vm_entry_controls_shadow; u32 vm_exit_controls_shadow; /* @@ -2979,6 +2981,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC: msr_info->data = guest_read_tsc(vcpu); break; + case MSR_IA32_ARCH_CAPABILITIES: + if (!msr_info->host_initiated && + !guest_cpuid_has_arch_capabilities(vcpu)) + return 1; + msr_info->data = to_vmx(vcpu)->arch_capabilities; + break; case MSR_IA32_SYSENTER_CS: msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); break; @@ -3110,6 +3118,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, MSR_TYPE_W); break; + case MSR_IA32_ARCH_CAPABILITIES: + if (!msr_info->host_initiated) + return 1; + vmx->arch_capabilities = data; + break; case MSR_IA32_CR_PAT: if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) @@ -5196,6 +5209,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) ++vmx->nmsrs; } + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities); vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e023ef981feb..94d1573a717b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -975,6 +975,7 @@ static u32 msrs_to_save[] = { #endif MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, + MSR_IA32_ARCH_CAPABILITIES }; static unsigned num_msrs_to_save; From e5a83419c957edff9290a7e09b951f44af7fa2e2 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Thu, 1 Feb 2018 22:59:45 +0100 Subject: [PATCH 386/705] KVM/VMX: Allow direct access to MSR_IA32_SPEC_CTRL (cherry picked from commit d28b387fb74da95d69d2615732f50cceb38e9a4d) [ Based on a patch from Ashok Raj ] Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for guests that will only mitigate Spectre V2 through IBRS+IBPB and will not be using a retpoline+IBPB based approach. To avoid the overhead of saving and restoring the MSR_IA32_SPEC_CTRL for guests that do not actually use the MSR, only start saving and restoring when a non-zero is written to it. No attempt is made to handle STIBP here, intentionally. Filtering STIBP may be added in a future patch, which may require trapping all writes if we don't want to pass it through directly to the guest. [dwmw2: Clean up CPUID bits, save/restore manually, handle reset] Signed-off-by: KarimAllah Ahmed Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Darren Kenny Reviewed-by: Konrad Rzeszutek Wilk Reviewed-by: Jim Mattson Cc: Andrea Arcangeli Cc: Andi Kleen Cc: Jun Nakajima Cc: kvm@vger.kernel.org Cc: Dave Hansen Cc: Tim Chen Cc: Andy Lutomirski Cc: Asit Mallick Cc: Arjan Van De Ven Cc: Greg KH Cc: Paolo Bonzini Cc: Dan Williams Cc: Linus Torvalds Cc: Ashok Raj Link: https://lkml.kernel.org/r/1517522386-18410-5-git-send-email-karahmed@amazon.de Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.c | 8 ++-- arch/x86/kvm/cpuid.h | 11 +++++ arch/x86/kvm/vmx.c | 103 ++++++++++++++++++++++++++++++++++++++++++- arch/x86/kvm/x86.c | 2 +- 4 files changed, 118 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 9c6493f82094..93f924de06cf 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -357,7 +357,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = - F(IBPB); + F(IBPB) | F(IBRS); /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = @@ -382,7 +382,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = - F(ARCH_CAPABILITIES); + F(SPEC_CTRL) | F(ARCH_CAPABILITIES); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); @@ -618,9 +618,11 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, g_phys_as = phys_as; entry->eax = g_phys_as | (virt_as << 8); entry->edx = 0; - /* IBPB isn't necessarily present in hardware cpuid */ + /* IBRS and IBPB aren't necessarily present in hardware cpuid */ if (boot_cpu_has(X86_FEATURE_IBPB)) entry->ebx |= F(IBPB); + if (boot_cpu_has(X86_FEATURE_IBRS)) + entry->ebx |= F(IBRS); entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); break; diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 8719997dd2e4..d1beb7156704 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -171,6 +171,17 @@ static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu) return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); } +static inline bool guest_cpuid_has_ibrs(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); + if (best && (best->ebx & bit(X86_FEATURE_IBRS))) + return true; + best = kvm_find_cpuid_entry(vcpu, 7, 0); + return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); +} + static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1bd0caf87c94..d49da86e3099 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -552,6 +552,7 @@ struct vcpu_vmx { #endif u64 arch_capabilities; + u64 spec_ctrl; u32 vm_entry_controls_shadow; u32 vm_exit_controls_shadow; @@ -1851,6 +1852,29 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) vmcs_write32(EXCEPTION_BITMAP, eb); } +/* + * Check if MSR is intercepted for currently loaded MSR bitmap. + */ +static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) +{ + unsigned long *msr_bitmap; + int f = sizeof(unsigned long); + + if (!cpu_has_vmx_msr_bitmap()) + return true; + + msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap; + + if (msr <= 0x1fff) { + return !!test_bit(msr, msr_bitmap + 0x800 / f); + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { + msr &= 0x1fff; + return !!test_bit(msr, msr_bitmap + 0xc00 / f); + } + + return true; +} + /* * Check if MSR is intercepted for L01 MSR bitmap. */ @@ -2981,6 +3005,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC: msr_info->data = guest_read_tsc(vcpu); break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has_ibrs(vcpu)) + return 1; + + msr_info->data = to_vmx(vcpu)->spec_ctrl; + break; case MSR_IA32_ARCH_CAPABILITIES: if (!msr_info->host_initiated && !guest_cpuid_has_arch_capabilities(vcpu)) @@ -3091,6 +3122,36 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC: kvm_write_tsc(vcpu, msr_info); break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has_ibrs(vcpu)) + return 1; + + /* The STIBP bit doesn't fault even if it's not advertised */ + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) + return 1; + + vmx->spec_ctrl = data; + + if (!data) + break; + + /* + * For non-nested: + * When it's written (to non-zero) for the first time, pass + * it through. + * + * For nested: + * The handling of the MSR bitmap for L2 guests is done in + * nested_vmx_merge_msr_bitmap. We should not touch the + * vmcs02.msr_bitmap here since it gets completely overwritten + * in the merging. We update the vmcs01 here for L1 as well + * since it will end up touching the MSR anyway now. + */ + vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, + MSR_IA32_SPEC_CTRL, + MSR_TYPE_RW); + break; case MSR_IA32_PRED_CMD: if (!msr_info->host_initiated && !guest_cpuid_has_ibpb(vcpu)) @@ -5239,6 +5300,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) u64 cr0; vmx->rmode.vm86_active = 0; + vmx->spec_ctrl = 0; vmx->soft_vnmi_blocked = 0; @@ -8824,6 +8886,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx_arm_hv_timer(vcpu); + /* + * If this vCPU has touched SPEC_CTRL, restore the guest's value if + * it's non-zero. Since vmentry is serialising on affected CPUs, there + * is no need to worry about the conditional branch over the wrmsr + * being speculatively taken. + */ + if (vmx->spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + vmx->__launched = vmx->loaded_vmcs->launched; asm( /* Store host registers */ @@ -8942,6 +9013,27 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* + * We do not use IBRS in the kernel. If this vCPU has used the + * SPEC_CTRL MSR it may have left it on; save the value and + * turn it off. This is much more efficient than blindly adding + * it to the atomic save/restore list. Especially as the former + * (Saving guest MSRs on vmexit) doesn't even exist in KVM. + * + * For non-nested case: + * If the L01 MSR bitmap does not intercept the MSR, then we need to + * save it. + * + * For nested case: + * If the L02 MSR bitmap does not intercept the MSR, then we need to + * save it. + */ + if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) + rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + + if (vmx->spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, 0); + /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); @@ -9501,7 +9593,7 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, unsigned long *msr_bitmap_l1; unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; /* - * pred_cmd is trying to verify two things: + * pred_cmd & spec_ctrl are trying to verify two things: * * 1. L0 gave a permission to L1 to actually passthrough the MSR. This * ensures that we do not accidentally generate an L02 MSR bitmap @@ -9514,9 +9606,10 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, * the MSR. */ bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); + bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL); if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && - !pred_cmd) + !pred_cmd && !spec_ctrl) return false; page = nested_get_page(vcpu, vmcs12->msr_bitmap); @@ -9550,6 +9643,12 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, } } + if (spec_ctrl) + nested_vmx_disable_intercept_for_msr( + msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_SPEC_CTRL, + MSR_TYPE_R | MSR_TYPE_W); + if (pred_cmd) nested_vmx_disable_intercept_for_msr( msr_bitmap_l1, msr_bitmap_l0, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 94d1573a717b..75f756eac979 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -975,7 +975,7 @@ static u32 msrs_to_save[] = { #endif MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, - MSR_IA32_ARCH_CAPABILITIES + MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES }; static unsigned num_msrs_to_save; From fc00dde96099a1a331a683a87b23ccb4626d816a Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Sat, 3 Feb 2018 15:56:23 +0100 Subject: [PATCH 387/705] KVM/SVM: Allow direct access to MSR_IA32_SPEC_CTRL (cherry picked from commit b2ac58f90540e39324e7a29a7ad471407ae0bf48) [ Based on a patch from Paolo Bonzini ] ... basically doing exactly what we do for VMX: - Passthrough SPEC_CTRL to guests (if enabled in guest CPUID) - Save and restore SPEC_CTRL around VMExit and VMEntry only if the guest actually used it. Signed-off-by: KarimAllah Ahmed Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Darren Kenny Reviewed-by: Konrad Rzeszutek Wilk Cc: Andrea Arcangeli Cc: Andi Kleen Cc: Jun Nakajima Cc: kvm@vger.kernel.org Cc: Dave Hansen Cc: Tim Chen Cc: Andy Lutomirski Cc: Asit Mallick Cc: Arjan Van De Ven Cc: Greg KH Cc: Paolo Bonzini Cc: Dan Williams Cc: Linus Torvalds Cc: Ashok Raj Link: https://lkml.kernel.org/r/1517669783-20732-1-git-send-email-karahmed@amazon.de Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 6de8d6510dbe..be644afab1bb 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -183,6 +183,8 @@ struct vcpu_svm { u64 gs_base; } host; + u64 spec_ctrl; + u32 *msrpm; ulong nmi_iret_rip; @@ -248,6 +250,7 @@ static const struct svm_direct_access_msrs { { .index = MSR_CSTAR, .always = true }, { .index = MSR_SYSCALL_MASK, .always = true }, #endif + { .index = MSR_IA32_SPEC_CTRL, .always = false }, { .index = MSR_IA32_PRED_CMD, .always = false }, { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, @@ -863,6 +866,25 @@ static bool valid_msr_intercept(u32 index) return false; } +static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr) +{ + u8 bit_write; + unsigned long tmp; + u32 offset; + u32 *msrpm; + + msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm: + to_svm(vcpu)->msrpm; + + offset = svm_msrpm_offset(msr); + bit_write = 2 * (msr & 0x0f) + 1; + tmp = msrpm[offset]; + + BUG_ON(offset == MSR_INVALID); + + return !!test_bit(bit_write, &tmp); +} + static void set_msr_interception(u32 *msrpm, unsigned msr, int read, int write) { @@ -1537,6 +1559,8 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) u32 dummy; u32 eax = 1; + svm->spec_ctrl = 0; + if (!init_event) { svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; @@ -3520,6 +3544,13 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_VM_CR: msr_info->data = svm->nested.vm_cr_msr; break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has_ibrs(vcpu)) + return 1; + + msr_info->data = svm->spec_ctrl; + break; case MSR_IA32_UCODE_REV: msr_info->data = 0x01000065; break; @@ -3611,6 +3642,33 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_IA32_TSC: kvm_write_tsc(vcpu, msr); break; + case MSR_IA32_SPEC_CTRL: + if (!msr->host_initiated && + !guest_cpuid_has_ibrs(vcpu)) + return 1; + + /* The STIBP bit doesn't fault even if it's not advertised */ + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) + return 1; + + svm->spec_ctrl = data; + + if (!data) + break; + + /* + * For non-nested: + * When it's written (to non-zero) for the first time, pass + * it through. + * + * For nested: + * The handling of the MSR bitmap for L2 guests is done in + * nested_svm_vmrun_msrpm. + * We update the L1 MSR bit as well since it will end up + * touching the MSR anyway now. + */ + set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1); + break; case MSR_IA32_PRED_CMD: if (!msr->host_initiated && !guest_cpuid_has_ibpb(vcpu)) @@ -4854,6 +4912,15 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) local_irq_enable(); + /* + * If this vCPU has touched SPEC_CTRL, restore the guest's value if + * it's non-zero. Since vmentry is serialising on affected CPUs, there + * is no need to worry about the conditional branch over the wrmsr + * being speculatively taken. + */ + if (svm->spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + asm volatile ( "push %%" _ASM_BP "; \n\t" "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t" @@ -4946,6 +5013,27 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* + * We do not use IBRS in the kernel. If this vCPU has used the + * SPEC_CTRL MSR it may have left it on; save the value and + * turn it off. This is much more efficient than blindly adding + * it to the atomic save/restore list. Especially as the former + * (Saving guest MSRs on vmexit) doesn't even exist in KVM. + * + * For non-nested case: + * If the L01 MSR bitmap does not intercept the MSR, then we need to + * save it. + * + * For nested case: + * If the L02 MSR bitmap does not intercept the MSR, then we need to + * save it. + */ + if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) + rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + + if (svm->spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, 0); + /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); From 7c17a1e5852a0c1c67a2ac65569270619fe1d576 Mon Sep 17 00:00:00 2001 From: Robert Baronescu Date: Tue, 10 Oct 2017 13:21:59 +0300 Subject: [PATCH 388/705] crypto: tcrypt - fix S/G table for test_aead_speed() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5c6ac1d4f8fbdbed65dbeb8cf149d736409d16a1 upstream. In case buffer length is a multiple of PAGE_SIZE, the S/G table is incorrectly generated. Fix this by handling buflen = k * PAGE_SIZE separately. Signed-off-by: Robert Baronescu Signed-off-by: Herbert Xu Signed-off-by: Horia Geantă Signed-off-by: Greg Kroah-Hartman --- crypto/tcrypt.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index e3af318af2db..2a07341aca46 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -223,11 +223,13 @@ static void sg_init_aead(struct scatterlist *sg, char *xbuf[XBUFSIZE], } sg_init_table(sg, np + 1); - np--; + if (rem) + np--; for (k = 0; k < np; k++) sg_set_buf(&sg[k + 1], xbuf[k], PAGE_SIZE); - sg_set_buf(&sg[k + 1], xbuf[k], rem); + if (rem) + sg_set_buf(&sg[k + 1], xbuf[k], rem); } static void test_aead_speed(const char *algo, int enc, unsigned int secs, From a7de0e9718c3062ed5fad916ceb65f387a29447b Mon Sep 17 00:00:00 2001 From: Julian Scheel Date: Wed, 24 May 2017 12:28:23 +0200 Subject: [PATCH 389/705] ASoC: simple-card: Fix misleading error message commit 7ac45d1635a4cd2e99a4b11903d4a2815ca1b27b upstream. In case cpu could not be found the error message would always refer to /codec/ not being found in DT. Fix this by catching the cpu node not found case explicitly. Signed-off-by: Julian Scheel Signed-off-by: Mark Brown Signed-off-by: thongsyho Signed-off-by: Nhan Nguyen Signed-off-by: Greg Kroah-Hartman --- sound/soc/generic/simple-card.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c index f608f8d23f3d..dd88c2cb6470 100644 --- a/sound/soc/generic/simple-card.c +++ b/sound/soc/generic/simple-card.c @@ -232,13 +232,19 @@ static int asoc_simple_card_dai_link_of(struct device_node *node, snprintf(prop, sizeof(prop), "%scpu", prefix); cpu = of_get_child_by_name(node, prop); + if (!cpu) { + ret = -EINVAL; + dev_err(dev, "%s: Can't find %s DT node\n", __func__, prop); + goto dai_link_of_err; + } + snprintf(prop, sizeof(prop), "%splat", prefix); plat = of_get_child_by_name(node, prop); snprintf(prop, sizeof(prop), "%scodec", prefix); codec = of_get_child_by_name(node, prop); - if (!cpu || !codec) { + if (!codec) { ret = -EINVAL; dev_err(dev, "%s: Can't find %s DT node\n", __func__, prop); goto dai_link_of_err; From 24978c21f7ed183e7bb9745a295d066483b6bd48 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 16 May 2017 01:48:24 +0000 Subject: [PATCH 390/705] ASoC: rsnd: don't call free_irq() on Parent SSI commit 1f8754d4daea5f257370a52a30fcb22798c54516 upstream. If SSI uses shared pin, some SSI will be used as parent SSI. Then, normal SSI's remove and Parent SSI's remove (these are same SSI) will be called when unbind or remove timing. In this case, free_irq() will be called twice. This patch solve this issue. Signed-off-by: Kuninori Morimoto Tested-by: Hiroyuki Yokoyama Reported-by: Hiroyuki Yokoyama Signed-off-by: Mark Brown Signed-off-by: thongsyho Signed-off-by: Nhan Nguyen Signed-off-by: Greg Kroah-Hartman --- sound/soc/sh/rcar/ssi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c index 560cf4b51a99..071fa850fed5 100644 --- a/sound/soc/sh/rcar/ssi.c +++ b/sound/soc/sh/rcar/ssi.c @@ -699,9 +699,14 @@ static int rsnd_ssi_dma_remove(struct rsnd_mod *mod, struct rsnd_priv *priv) { struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod); + struct rsnd_mod *ssi_parent_mod = rsnd_io_to_mod_ssip(io); struct device *dev = rsnd_priv_to_dev(priv); int irq = ssi->irq; + /* Do nothing for SSI parent mod */ + if (ssi_parent_mod == mod) + return 0; + /* PIO will request IRQ again */ devm_free_irq(dev, irq, mod); From 1cb145c67260edf54f106f031756a6ae780b7a32 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 9 Aug 2017 02:16:20 +0000 Subject: [PATCH 391/705] ASoC: rsnd: avoid duplicate free_irq() commit e0936c3471a8411a5df327641fa3ffe12a2fb07b upstream. commit 1f8754d4daea5f ("ASoC: rsnd: don't call free_irq() on Parent SSI") fixed Parent SSI duplicate free_irq(). But on Renesas Sound, not only Parent SSI but also Multi SSI have same issue. This patch avoid duplicate free_irq() if it was not pure SSI. Fixes: 1f8754d4daea5f ("ASoC: rsnd: don't call free_irq() on Parent SSI") Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown Signed-off-by: thongsyho Signed-off-by: Nhan Nguyen Signed-off-by: Greg Kroah-Hartman --- sound/soc/sh/rcar/ssi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c index 071fa850fed5..a9a43acce30e 100644 --- a/sound/soc/sh/rcar/ssi.c +++ b/sound/soc/sh/rcar/ssi.c @@ -699,12 +699,12 @@ static int rsnd_ssi_dma_remove(struct rsnd_mod *mod, struct rsnd_priv *priv) { struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod); - struct rsnd_mod *ssi_parent_mod = rsnd_io_to_mod_ssip(io); + struct rsnd_mod *pure_ssi_mod = rsnd_io_to_mod_ssi(io); struct device *dev = rsnd_priv_to_dev(priv); int irq = ssi->irq; - /* Do nothing for SSI parent mod */ - if (ssi_parent_mod == mod) + /* Do nothing if non SSI (= SSI parent, multi SSI) mod */ + if (pure_ssi_mod != mod) return 0; /* PIO will request IRQ again */ From 758e22acf4fd9aaf51c7bae93a47401b8f792d56 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 10 Jul 2017 23:46:39 +0300 Subject: [PATCH 392/705] drm: rcar-du: Use the VBK interrupt for vblank events commit cbbb90b0c084d7dfb2ed8e3fecf8df200fbdd2a0 upstream. When implementing support for interlaced modes, the driver switched from reporting vblank events on the vertical blanking (VBK) interrupt to the frame end interrupt (FRM). This incorrectly divided the reported refresh rate by two. Fix it by moving back to the VBK interrupt. Fixes: 906eff7fcada ("drm: rcar-du: Implement support for interlaced modes") Signed-off-by: Laurent Pinchart Reviewed-by: Kieran Bingham Signed-off-by: thongsyho Signed-off-by: Nhan Nguyen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/rcar-du/rcar_du_crtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c index a2ec6d8796a0..848f7f2152ee 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c @@ -551,7 +551,7 @@ static irqreturn_t rcar_du_crtc_irq(int irq, void *arg) status = rcar_du_crtc_read(rcrtc, DSSR); rcar_du_crtc_write(rcrtc, DSRCR, status & DSRCR_MASK); - if (status & DSSR_FRM) { + if (status & DSSR_VBK) { drm_crtc_handle_vblank(&rcrtc->crtc); rcar_du_crtc_finish_page_flip(rcrtc); ret = IRQ_HANDLED; From 230ca8fb951528f298e06a9c15257df5df85ecbd Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sat, 29 Jul 2017 02:31:33 +0300 Subject: [PATCH 393/705] drm: rcar-du: Fix race condition when disabling planes at CRTC stop commit 641307df71fe77d7b38a477067495ede05d47295 upstream. When stopping the CRTC the driver must disable all planes and wait for the change to take effect at the next vblank. Merely calling drm_crtc_wait_one_vblank() is not enough, as the function doesn't include any mechanism to handle the race with vblank interrupts. Replace the drm_crtc_wait_one_vblank() call with a manual mechanism that handles the vblank interrupt race. Signed-off-by: Laurent Pinchart Reviewed-by: Kieran Bingham Signed-off-by: thongsyho Signed-off-by: Nhan Nguyen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/rcar-du/rcar_du_crtc.c | 53 +++++++++++++++++++++++--- drivers/gpu/drm/rcar-du/rcar_du_crtc.h | 8 ++++ 2 files changed, 55 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c index 848f7f2152ee..3322b157106d 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c @@ -392,6 +392,31 @@ static void rcar_du_crtc_start(struct rcar_du_crtc *rcrtc) rcrtc->started = true; } +static void rcar_du_crtc_disable_planes(struct rcar_du_crtc *rcrtc) +{ + struct rcar_du_device *rcdu = rcrtc->group->dev; + struct drm_crtc *crtc = &rcrtc->crtc; + u32 status; + /* Make sure vblank interrupts are enabled. */ + drm_crtc_vblank_get(crtc); + /* + * Disable planes and calculate how many vertical blanking interrupts we + * have to wait for. If a vertical blanking interrupt has been triggered + * but not processed yet, we don't know whether it occurred before or + * after the planes got disabled. We thus have to wait for two vblank + * interrupts in that case. + */ + spin_lock_irq(&rcrtc->vblank_lock); + rcar_du_group_write(rcrtc->group, rcrtc->index % 2 ? DS2PR : DS1PR, 0); + status = rcar_du_crtc_read(rcrtc, DSSR); + rcrtc->vblank_count = status & DSSR_VBK ? 2 : 1; + spin_unlock_irq(&rcrtc->vblank_lock); + if (!wait_event_timeout(rcrtc->vblank_wait, rcrtc->vblank_count == 0, + msecs_to_jiffies(100))) + dev_warn(rcdu->dev, "vertical blanking timeout\n"); + drm_crtc_vblank_put(crtc); +} + static void rcar_du_crtc_stop(struct rcar_du_crtc *rcrtc) { struct drm_crtc *crtc = &rcrtc->crtc; @@ -400,17 +425,16 @@ static void rcar_du_crtc_stop(struct rcar_du_crtc *rcrtc) return; /* Disable all planes and wait for the change to take effect. This is - * required as the DSnPR registers are updated on vblank, and no vblank - * will occur once the CRTC is stopped. Disabling planes when starting - * the CRTC thus wouldn't be enough as it would start scanning out - * immediately from old frame buffers until the next vblank. + * required as the plane enable registers are updated on vblank, and no + * vblank will occur once the CRTC is stopped. Disabling planes when + * starting the CRTC thus wouldn't be enough as it would start scanning + * out immediately from old frame buffers until the next vblank. * * This increases the CRTC stop delay, especially when multiple CRTCs * are stopped in one operation as we now wait for one vblank per CRTC. * Whether this can be improved needs to be researched. */ - rcar_du_group_write(rcrtc->group, rcrtc->index % 2 ? DS2PR : DS1PR, 0); - drm_crtc_wait_one_vblank(crtc); + rcar_du_crtc_disable_planes(rcrtc); /* Disable vertical blanking interrupt reporting. We first need to wait * for page flip completion before stopping the CRTC as userspace @@ -548,9 +572,24 @@ static irqreturn_t rcar_du_crtc_irq(int irq, void *arg) irqreturn_t ret = IRQ_NONE; u32 status; + spin_lock(&rcrtc->vblank_lock); + status = rcar_du_crtc_read(rcrtc, DSSR); rcar_du_crtc_write(rcrtc, DSRCR, status & DSRCR_MASK); + if (status & DSSR_VBK) { + /* + * Wake up the vblank wait if the counter reaches 0. This must + * be protected by the vblank_lock to avoid races in + * rcar_du_crtc_disable_planes(). + */ + if (rcrtc->vblank_count) { + if (--rcrtc->vblank_count == 0) + wake_up(&rcrtc->vblank_wait); + } + } + spin_unlock(&rcrtc->vblank_lock); + if (status & DSSR_VBK) { drm_crtc_handle_vblank(&rcrtc->crtc); rcar_du_crtc_finish_page_flip(rcrtc); @@ -606,6 +645,8 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) } init_waitqueue_head(&rcrtc->flip_wait); + init_waitqueue_head(&rcrtc->vblank_wait); + spin_lock_init(&rcrtc->vblank_lock); rcrtc->group = rgrp; rcrtc->mmio_offset = mmio_offsets[index]; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h index 6f08b7e7db06..48bef05b4c62 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h @@ -15,6 +15,7 @@ #define __RCAR_DU_CRTC_H__ #include +#include #include #include @@ -33,6 +34,9 @@ struct rcar_du_vsp; * @started: whether the CRTC has been started and is running * @event: event to post when the pending page flip completes * @flip_wait: wait queue used to signal page flip completion + * @vblank_lock: protects vblank_wait and vblank_count + * @vblank_wait: wait queue used to signal vertical blanking + * @vblank_count: number of vertical blanking interrupts to wait for * @outputs: bitmask of the outputs (enum rcar_du_output) driven by this CRTC * @group: CRTC group this CRTC belongs to */ @@ -48,6 +52,10 @@ struct rcar_du_crtc { struct drm_pending_vblank_event *event; wait_queue_head_t flip_wait; + spinlock_t vblank_lock; + wait_queue_head_t vblank_wait; + unsigned int vblank_count; + unsigned int outputs; struct rcar_du_group *group; From 2760f452a71899af860355abe818a8b2bd32b2cb Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 12 Oct 2017 13:23:16 +0200 Subject: [PATCH 394/705] x86/microcode: Do the family check first commit 1f161f67a272cc4f29f27934dd3f74cb657eb5c4 upstream with adjustments. On CPUs like AMD's Geode, for example, we shouldn't even try to load microcode because they do not support the modern microcode loading interface. However, we do the family check *after* the other checks whether the loader has been disabled on the command line or whether we're running in a guest. So move the family checks first in order to exit early if we're being loaded on an unsupported family. Reported-and-tested-by: Sven Glodowski Signed-off-by: Borislav Petkov Cc: # 4.11.. Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://bugzilla.suse.com/show_bug.cgi?id=1061396 Link: http://lkml.kernel.org/r/20171012112316.977-1-bp@alien8.de Signed-off-by: Ingo Molnar Signed-off-by: Rolf Neugebauer Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/microcode/core.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index dc0b9f8763ad..0afaf00b029b 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -86,9 +86,6 @@ static bool __init check_loader_disabled_bsp(void) bool *res = &dis_ucode_ldr; #endif - if (!have_cpuid_p()) - return *res; - a = 1; c = 0; native_cpuid(&a, &b, &c, &d); @@ -130,8 +127,9 @@ void __init load_ucode_bsp(void) { int vendor; unsigned int family; + bool intel = true; - if (check_loader_disabled_bsp()) + if (!have_cpuid_p()) return; vendor = x86_cpuid_vendor(); @@ -139,16 +137,27 @@ void __init load_ucode_bsp(void) switch (vendor) { case X86_VENDOR_INTEL: - if (family >= 6) - load_ucode_intel_bsp(); + if (family < 6) + return; break; + case X86_VENDOR_AMD: - if (family >= 0x10) - load_ucode_amd_bsp(family); + if (family < 0x10) + return; + intel = false; break; + default: - break; + return; } + + if (check_loader_disabled_bsp()) + return; + + if (intel) + load_ucode_intel_bsp(); + else + load_ucode_amd_bsp(family); } static bool check_loader_disabled_ap(void) From 7f3bd8db99746a60bcae1ec4059a4756d19b63c2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 13 Feb 2018 12:36:03 +0100 Subject: [PATCH 395/705] Linux 4.9.81 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9550b6939076..4d5753f1c37b 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 80 +SUBLEVEL = 81 EXTRAVERSION = NAME = Roaring Lionus From ba4f9c192d3b655af9a24e0b8273e58d9ed8aa63 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Mon, 15 Jan 2018 14:30:03 +0100 Subject: [PATCH 396/705] powerpc/pseries: include linux/types.h in asm/hvcall.h commit 1b689a95ce7427075f9ac9fb4aea1af530742b7f upstream. Commit 6e032b350cd1 ("powerpc/powernv: Check device-tree for RFI flush settings") uses u64 in asm/hvcall.h without including linux/types.h This breaks hvcall.h users that do not include the header themselves. Fixes: 6e032b350cd1 ("powerpc/powernv: Check device-tree for RFI flush settings") Signed-off-by: Michal Suchanek Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/hvcall.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 0e12cb2437d1..dc0996b9d75d 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -319,6 +319,7 @@ #define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2 #ifndef __ASSEMBLY__ +#include /** * plpar_hcall_norets: - Make a pseries hypervisor call with no return arguments From ee6858f72a39226728e5085fc7388785a8ddc5d3 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 15 Dec 2017 12:48:32 -0800 Subject: [PATCH 397/705] cifs: Fix missing put_xid in cifs_file_strict_mmap commit f04a703c3d613845ae3141bfaf223489de8ab3eb upstream. If cifs_zap_mapping() returned an error, we would return without putting the xid that we got earlier. Restructure cifs_file_strict_mmap() and cifs_file_mmap() to be more similar to each other and have a single point of return that always puts the xid. Signed-off-by: Matthew Wilcox Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/file.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index cf192f9ce254..02e403af9518 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3285,20 +3285,18 @@ static const struct vm_operations_struct cifs_file_vm_ops = { int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) { - int rc, xid; + int xid, rc = 0; struct inode *inode = file_inode(file); xid = get_xid(); - if (!CIFS_CACHE_READ(CIFS_I(inode))) { + if (!CIFS_CACHE_READ(CIFS_I(inode))) rc = cifs_zap_mapping(inode); - if (rc) - return rc; - } - - rc = generic_file_mmap(file, vma); - if (rc == 0) + if (!rc) + rc = generic_file_mmap(file, vma); + if (!rc) vma->vm_ops = &cifs_file_vm_ops; + free_xid(xid); return rc; } @@ -3308,16 +3306,16 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) int rc, xid; xid = get_xid(); + rc = cifs_revalidate_file(file); - if (rc) { + if (rc) cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n", rc); - free_xid(xid); - return rc; - } - rc = generic_file_mmap(file, vma); - if (rc == 0) + if (!rc) + rc = generic_file_mmap(file, vma); + if (!rc) vma->vm_ops = &cifs_file_vm_ops; + free_xid(xid); return rc; } From f59eda16646ab3b0de9e7cac9528c40df46606ac Mon Sep 17 00:00:00 2001 From: Daniel N Pettersson Date: Thu, 11 Jan 2018 16:00:12 +0100 Subject: [PATCH 398/705] cifs: Fix autonegotiate security settings mismatch commit 9aca7e454415f7878b28524e76bebe1170911a88 upstream. Autonegotiation gives a security settings mismatch error if the SMB server selects an SMBv3 dialect that isn't SMB3.02. The exact error is "protocol revalidation - security settings mismatch". This can be tested using Samba v4.2 or by setting the global Samba setting max protocol = SMB3_00. The check that fails in smb3_validate_negotiate is the dialect verification of the negotiate info response. This is because it tries to verify against the protocol_id in the global smbdefault_values. The protocol_id in smbdefault_values is SMB3.02. In SMB2_negotiate the protocol_id in smbdefault_values isn't updated, it is global so it probably shouldn't be, but server->dialect is. This patch changes the check in smb3_validate_negotiate to use server->dialect instead of server->vals->protocol_id. The patch works with autonegotiate and when using a specific version in the vers mount option. Signed-off-by: Daniel N Pettersson Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 69b610ad3fdc..94c4c1901222 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -585,8 +585,7 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) } /* check validate negotiate info response matches what we got earlier */ - if (pneg_rsp->Dialect != - cpu_to_le16(tcon->ses->server->vals->protocol_id)) + if (pneg_rsp->Dialect != cpu_to_le16(tcon->ses->server->dialect)) goto vneg_out; if (pneg_rsp->SecurityMode != cpu_to_le16(tcon->ses->server->sec_mode)) From 7e68916c361ac6d730601bef4e8a4c43aa93495e Mon Sep 17 00:00:00 2001 From: Aurelien Aptel Date: Thu, 25 Jan 2018 15:59:39 +0100 Subject: [PATCH 399/705] CIFS: zero sensitive data when freeing commit 97f4b7276b829a8927ac903a119bef2f963ccc58 upstream. also replaces memset()+kfree() by kzfree(). Signed-off-by: Aurelien Aptel Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifsencrypt.c | 3 +-- fs/cifs/connect.c | 6 +++--- fs/cifs/misc.c | 14 ++++---------- 3 files changed, 8 insertions(+), 15 deletions(-) diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 5eb04129f938..73360df52ce9 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -318,9 +318,8 @@ int calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt, { int i; int rc; - char password_with_pad[CIFS_ENCPWD_SIZE]; + char password_with_pad[CIFS_ENCPWD_SIZE] = {0}; - memset(password_with_pad, 0, CIFS_ENCPWD_SIZE); if (password) strncpy(password_with_pad, password, CIFS_ENCPWD_SIZE); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 580b3a4ca53a..441d434a48c1 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1667,7 +1667,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, tmp_end++; if (!(tmp_end < end && tmp_end[1] == delim)) { /* No it is not. Set the password to NULL */ - kfree(vol->password); + kzfree(vol->password); vol->password = NULL; break; } @@ -1705,7 +1705,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, options = end; } - kfree(vol->password); + kzfree(vol->password); /* Now build new password string */ temp_len = strlen(value); vol->password = kzalloc(temp_len+1, GFP_KERNEL); @@ -4159,7 +4159,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) reset_cifs_unix_caps(0, tcon, NULL, vol_info); out: kfree(vol_info->username); - kfree(vol_info->password); + kzfree(vol_info->password); kfree(vol_info); return tcon; diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 5419afea0a36..323d8e34abde 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -99,14 +99,11 @@ sesInfoFree(struct cifs_ses *buf_to_free) kfree(buf_to_free->serverOS); kfree(buf_to_free->serverDomain); kfree(buf_to_free->serverNOS); - if (buf_to_free->password) { - memset(buf_to_free->password, 0, strlen(buf_to_free->password)); - kfree(buf_to_free->password); - } + kzfree(buf_to_free->password); kfree(buf_to_free->user_name); kfree(buf_to_free->domainName); - kfree(buf_to_free->auth_key.response); - kfree(buf_to_free); + kzfree(buf_to_free->auth_key.response); + kzfree(buf_to_free); } struct cifs_tcon * @@ -137,10 +134,7 @@ tconInfoFree(struct cifs_tcon *buf_to_free) } atomic_dec(&tconInfoAllocCount); kfree(buf_to_free->nativeFileSystem); - if (buf_to_free->password) { - memset(buf_to_free->password, 0, strlen(buf_to_free->password)); - kfree(buf_to_free->password); - } + kzfree(buf_to_free->password); kfree(buf_to_free); } From 297c7cc4b5651b174a62925b6c961085f04979fd Mon Sep 17 00:00:00 2001 From: Yang Shunyong Date: Mon, 29 Jan 2018 14:40:11 +0800 Subject: [PATCH 400/705] dmaengine: dmatest: fix container_of member in dmatest_callback commit 66b3bd2356e0a1531c71a3dcf96944621e25c17c upstream. The type of arg passed to dmatest_callback is struct dmatest_done. It refers to test_done in struct dmatest_thread, not done_wait. Fixes: 6f6a23a213be ("dmaengine: dmatest: move callback wait ...") Signed-off-by: Yang Shunyong Acked-by: Adam Wallis Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/dmatest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index e0bd578a253a..ebe72a466587 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -339,7 +339,7 @@ static void dmatest_callback(void *arg) { struct dmatest_done *done = arg; struct dmatest_thread *thread = - container_of(arg, struct dmatest_thread, done_wait); + container_of(done, struct dmatest_thread, test_done); if (!thread->done) { done->done = true; wake_up_all(done->wait); From 83946c33b9b99b5bc6157cfbf3970265f006c2bf Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 13 Feb 2018 16:45:20 +0100 Subject: [PATCH 401/705] kaiser: fix compile error without vsyscall MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tobias noticed a compile error on 4.4.115, and it's the same on 4.9.80: arch/x86/mm/kaiser.c: In function ‘kaiser_init’: arch/x86/mm/kaiser.c:348:8: error: ‘vsyscall_pgprot’ undeclared (first use in this function) It seems like his combination of kernel options doesn't work for KAISER. X86_VSYSCALL_EMULATION is not set on his system, while LEGACY_VSYSCALL is set to NONE (LEGACY_VSYSCALL_NONE=y). He managed to get things compiling again, by moving the 'extern unsigned long vsyscall_pgprot' outside of the preprocessor statement. This works because the optimizer removes that code (vsyscall_enabled() is always false) - and that's how it was done in some older backports. Reported-by: Tobias Jakobi Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/vsyscall.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index 9ee85066f407..62210da19a92 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -13,7 +13,6 @@ extern void map_vsyscall(void); */ extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); extern bool vsyscall_enabled(void); -extern unsigned long vsyscall_pgprot; #else static inline void map_vsyscall(void) {} static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) @@ -22,5 +21,6 @@ static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) } static inline bool vsyscall_enabled(void) { return false; } #endif +extern unsigned long vsyscall_pgprot; #endif /* _ASM_X86_VSYSCALL_H */ From 0b376535ad5493d2fcf70ab5f6539551aadb493e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Dec 2017 10:32:03 +0100 Subject: [PATCH 402/705] posix-timer: Properly check sigevent->sigev_notify commit cef31d9af908243421258f1df35a4a644604efbe upstream. timer_create() specifies via sigevent->sigev_notify the signal delivery for the new timer. The valid modes are SIGEV_NONE, SIGEV_SIGNAL, SIGEV_THREAD and (SIGEV_SIGNAL | SIGEV_THREAD_ID). The sanity check in good_sigevent() is only checking the valid combination for the SIGEV_THREAD_ID bit, i.e. SIGEV_SIGNAL, but if SIGEV_THREAD_ID is not set it accepts any random value. This has no real effects on the posix timer and signal delivery code, but it affects show_timer() which handles the output of /proc/$PID/timers. That function uses a string array to pretty print sigev_notify. The access to that array has no bound checks, so random sigev_notify cause access beyond the array bounds. Add proper checks for the valid notify modes and remove the SIGEV_THREAD_ID masking from various code pathes as SIGEV_NONE can never be set in combination with SIGEV_THREAD_ID. Reported-by: Eric Biggers Reported-by: Dmitry Vyukov Reported-by: Alexey Dobriyan Signed-off-by: Thomas Gleixner Cc: John Stultz Signed-off-by: Greg Kroah-Hartman --- kernel/time/posix-timers.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index f2826c35e918..fc7c37ad90a0 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -507,17 +507,22 @@ static struct pid *good_sigevent(sigevent_t * event) { struct task_struct *rtn = current->group_leader; - if ((event->sigev_notify & SIGEV_THREAD_ID ) && - (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) || - !same_thread_group(rtn, current) || - (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL)) + switch (event->sigev_notify) { + case SIGEV_SIGNAL | SIGEV_THREAD_ID: + rtn = find_task_by_vpid(event->sigev_notify_thread_id); + if (!rtn || !same_thread_group(rtn, current)) + return NULL; + /* FALLTHRU */ + case SIGEV_SIGNAL: + case SIGEV_THREAD: + if (event->sigev_signo <= 0 || event->sigev_signo > SIGRTMAX) + return NULL; + /* FALLTHRU */ + case SIGEV_NONE: + return task_pid(rtn); + default: return NULL; - - if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) && - ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX))) - return NULL; - - return task_pid(rtn); + } } void posix_timers_register_clock(const clockid_t clock_id, @@ -745,8 +750,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) /* interval timer ? */ if (iv.tv64) cur_setting->it_interval = ktime_to_timespec(iv); - else if (!hrtimer_active(timer) && - (timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) + else if (!hrtimer_active(timer) && timr->it_sigev_notify != SIGEV_NONE) return; now = timer->base->get_time(); @@ -757,7 +761,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) * expiry is > now. */ if (iv.tv64 && (timr->it_requeue_pending & REQUEUE_PENDING || - (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) + timr->it_sigev_notify == SIGEV_NONE)) timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv); remaining = __hrtimer_expires_remaining_adjusted(timer, now); @@ -767,7 +771,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) * A single shot SIGEV_NONE timer must return 0, when * it is expired ! */ - if ((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) + if (timr->it_sigev_notify != SIGEV_NONE) cur_setting->it_value.tv_nsec = 1; } else cur_setting->it_value = ktime_to_timespec(remaining); @@ -865,7 +869,7 @@ common_timer_set(struct k_itimer *timr, int flags, timr->it.real.interval = timespec_to_ktime(new_setting->it_interval); /* SIGEV_NONE timers are not queued ! See common_timer_get */ - if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) { + if (timr->it_sigev_notify == SIGEV_NONE) { /* Setup correct expiry time for relative timers */ if (mode == HRTIMER_MODE_REL) { hrtimer_add_expires(timer, timer->base->get_time()); From 57ddb8eae517315f811c4915f1d5c11e59423707 Mon Sep 17 00:00:00 2001 From: Petr Cvek Date: Tue, 7 Mar 2017 00:57:20 +0100 Subject: [PATCH 403/705] usb: gadget: uvc: Missing files for configfs interface commit c8cd751060b149997b9de53a494fb1490ded72c5 upstream. Commit 76e0da34c7ce ("usb-gadget/uvc: use per-attribute show and store methods") caused a stringification of an undefined macro argument "aname", so three UVC parameters (streaming_interval, streaming_maxpacket and streaming_maxburst) were named "aname". Add the definition of "aname" to the main macro and name the filenames as originaly intended. Signed-off-by: Petr Cvek Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/uvc_configfs.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c index 31125a4a2658..d7dcd39fe12c 100644 --- a/drivers/usb/gadget/function/uvc_configfs.c +++ b/drivers/usb/gadget/function/uvc_configfs.c @@ -2140,7 +2140,7 @@ static struct configfs_item_operations uvc_item_ops = { .release = uvc_attr_release, }; -#define UVCG_OPTS_ATTR(cname, conv, str2u, uxx, vnoc, limit) \ +#define UVCG_OPTS_ATTR(cname, aname, conv, str2u, uxx, vnoc, limit) \ static ssize_t f_uvc_opts_##cname##_show( \ struct config_item *item, char *page) \ { \ @@ -2183,16 +2183,16 @@ end: \ return ret; \ } \ \ -UVC_ATTR(f_uvc_opts_, cname, aname) +UVC_ATTR(f_uvc_opts_, cname, cname) #define identity_conv(x) (x) -UVCG_OPTS_ATTR(streaming_interval, identity_conv, kstrtou8, u8, identity_conv, - 16); -UVCG_OPTS_ATTR(streaming_maxpacket, le16_to_cpu, kstrtou16, u16, le16_to_cpu, - 3072); -UVCG_OPTS_ATTR(streaming_maxburst, identity_conv, kstrtou8, u8, identity_conv, - 15); +UVCG_OPTS_ATTR(streaming_interval, streaming_interval, identity_conv, + kstrtou8, u8, identity_conv, 16); +UVCG_OPTS_ATTR(streaming_maxpacket, streaming_maxpacket, le16_to_cpu, + kstrtou16, u16, le16_to_cpu, 3072); +UVCG_OPTS_ATTR(streaming_maxburst, streaming_maxburst, identity_conv, + kstrtou8, u8, identity_conv, 15); #undef identity_conv From 1c679981309b4d36b024fc954cfcf2111a007de0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 23 Jan 2018 20:45:37 -0500 Subject: [PATCH 404/705] sched/rt: Use container_of() to get root domain in rto_push_irq_work_func() commit ad0f1d9d65938aec72a698116cd73a980916895e upstream. When the rto_push_irq_work_func() is called, it looks at the RT overloaded bitmask in the root domain via the runqueue (rq->rd). The problem is that during CPU up and down, nothing here stops rq->rd from changing between taking the rq->rd->rto_lock and releasing it. That means the lock that is released is not the same lock that was taken. Instead of using this_rq()->rd to get the root domain, as the irq work is part of the root domain, we can simply get the root domain from the irq work that is passed to the routine: container_of(work, struct root_domain, rto_push_work) This keeps the root domain consistent. Reported-by: Pavan Kondeti Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 4bdced5c9a292 ("sched/rt: Simplify the IPI based RT balancing logic") Link: http://lkml.kernel.org/r/CAEU1=PkiHO35Dzna8EQqNSKW1fr1y1zRQ5y66X117MG06sQtNA@mail.gmail.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/rt.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 7a360d6f6798..27b6209a2928 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1895,9 +1895,8 @@ static void push_rt_tasks(struct rq *rq) * the rt_loop_next will cause the iterator to perform another scan. * */ -static int rto_next_cpu(struct rq *rq) +static int rto_next_cpu(struct root_domain *rd) { - struct root_domain *rd = rq->rd; int next; int cpu; @@ -1973,7 +1972,7 @@ static void tell_cpu_to_push(struct rq *rq) * Otherwise it is finishing up and an ipi needs to be sent. */ if (rq->rd->rto_cpu < 0) - cpu = rto_next_cpu(rq); + cpu = rto_next_cpu(rq->rd); raw_spin_unlock(&rq->rd->rto_lock); @@ -1986,6 +1985,8 @@ static void tell_cpu_to_push(struct rq *rq) /* Called from hardirq context */ void rto_push_irq_work_func(struct irq_work *work) { + struct root_domain *rd = + container_of(work, struct root_domain, rto_push_work); struct rq *rq; int cpu; @@ -2001,18 +2002,18 @@ void rto_push_irq_work_func(struct irq_work *work) raw_spin_unlock(&rq->lock); } - raw_spin_lock(&rq->rd->rto_lock); + raw_spin_lock(&rd->rto_lock); /* Pass the IPI to the next rt overloaded queue */ - cpu = rto_next_cpu(rq); + cpu = rto_next_cpu(rd); - raw_spin_unlock(&rq->rd->rto_lock); + raw_spin_unlock(&rd->rto_lock); if (cpu < 0) return; /* Try the next RT overloaded CPU */ - irq_work_queue_on(&rq->rd->rto_push_work, cpu); + irq_work_queue_on(&rd->rto_push_work, cpu); } #endif /* HAVE_RT_PUSH_IPI */ From a384e5437f705972d2884cea17b931c1a2cd3277 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 23 Jan 2018 20:45:38 -0500 Subject: [PATCH 405/705] sched/rt: Up the root domain ref count when passing it around via IPIs commit 364f56653708ba8bcdefd4f0da2a42904baa8eeb upstream. When issuing an IPI RT push, where an IPI is sent to each CPU that has more than one RT task scheduled on it, it references the root domain's rto_mask, that contains all the CPUs within the root domain that has more than one RT task in the runable state. The problem is, after the IPIs are initiated, the rq->lock is released. This means that the root domain that is associated to the run queue could be freed while the IPIs are going around. Add a sched_get_rd() and a sched_put_rd() that will increment and decrement the root domain's ref count respectively. This way when initiating the IPIs, the scheduler will up the root domain's ref count before releasing the rq->lock, ensuring that the root domain does not go away until the IPI round is complete. Reported-by: Pavan Kondeti Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 4bdced5c9a292 ("sched/rt: Simplify the IPI based RT balancing logic") Link: http://lkml.kernel.org/r/CAEU1=PkiHO35Dzna8EQqNSKW1fr1y1zRQ5y66X117MG06sQtNA@mail.gmail.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/core.c | 13 +++++++++++++ kernel/sched/rt.c | 9 +++++++-- kernel/sched/sched.h | 2 ++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e5066955cc3a..bce3a7ad4253 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5864,6 +5864,19 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) call_rcu_sched(&old_rd->rcu, free_rootdomain); } +void sched_get_rd(struct root_domain *rd) +{ + atomic_inc(&rd->refcount); +} + +void sched_put_rd(struct root_domain *rd) +{ + if (!atomic_dec_and_test(&rd->refcount)) + return; + + call_rcu_sched(&rd->rcu, free_rootdomain); +} + static int init_rootdomain(struct root_domain *rd) { memset(rd, 0, sizeof(*rd)); diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 27b6209a2928..f6d68ddfa2f3 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1978,8 +1978,11 @@ static void tell_cpu_to_push(struct rq *rq) rto_start_unlock(&rq->rd->rto_loop_start); - if (cpu >= 0) + if (cpu >= 0) { + /* Make sure the rd does not get freed while pushing */ + sched_get_rd(rq->rd); irq_work_queue_on(&rq->rd->rto_push_work, cpu); + } } /* Called from hardirq context */ @@ -2009,8 +2012,10 @@ void rto_push_irq_work_func(struct irq_work *work) raw_spin_unlock(&rd->rto_lock); - if (cpu < 0) + if (cpu < 0) { + sched_put_rd(rd); return; + } /* Try the next RT overloaded CPU */ irq_work_queue_on(&rd->rto_push_work, cpu); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index cff985feb6e7..f564a1d2c9d5 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -590,6 +590,8 @@ struct root_domain { }; extern struct root_domain def_root_domain; +extern void sched_get_rd(struct root_domain *rd); +extern void sched_put_rd(struct root_domain *rd); #ifdef HAVE_RT_PUSH_IPI extern void rto_push_irq_work_func(struct irq_work *work); From 7e2fb808d3c7c52f88ebc670949dbf1bae48f2a2 Mon Sep 17 00:00:00 2001 From: Mohamed Ghannam Date: Tue, 5 Dec 2017 20:58:35 +0000 Subject: [PATCH 406/705] dccp: CVE-2017-8824: use-after-free in DCCP code commit 69c64866ce072dea1d1e59a0d61e0f66c0dffb76 upstream. Whenever the sock object is in DCCP_CLOSED state, dccp_disconnect() must free dccps_hc_tx_ccid and dccps_hc_rx_ccid and set to NULL. Signed-off-by: Mohamed Ghannam Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/proto.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index b68168fcc06a..9d43c1f40274 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -259,6 +259,7 @@ int dccp_disconnect(struct sock *sk, int flags) { struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); + struct dccp_sock *dp = dccp_sk(sk); int err = 0; const int old_state = sk->sk_state; @@ -278,6 +279,10 @@ int dccp_disconnect(struct sock *sk, int flags) sk->sk_err = ECONNRESET; dccp_clear_xmit_timers(sk); + ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); + ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); + dp->dccps_hc_rx_ccid = NULL; + dp->dccps_hc_tx_ccid = NULL; __skb_queue_purge(&sk->sk_receive_queue); __skb_queue_purge(&sk->sk_write_queue); From 1ff1353a03c6cde5334a94cb67f8632141b0589b Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Tue, 26 Sep 2017 17:10:20 -0400 Subject: [PATCH 407/705] media: dvb-usb-v2: lmedm04: Improve logic checking of warm start commit 3d932ee27e852e4904647f15b64dedca51187ad7 upstream. Warm start has no check as whether a genuine device has connected and proceeds to next execution path. Check device should read 0x47 at offset of 2 on USB descriptor read and it is the amount requested of 6 bytes. Fix for kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access as Reported-by: Andrey Konovalov Signed-off-by: Malcolm Priestley Signed-off-by: Mauro Carvalho Chehab Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb-v2/lmedm04.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/media/usb/dvb-usb-v2/lmedm04.c b/drivers/media/usb/dvb-usb-v2/lmedm04.c index 0e8fb89896c4..50d02f879949 100644 --- a/drivers/media/usb/dvb-usb-v2/lmedm04.c +++ b/drivers/media/usb/dvb-usb-v2/lmedm04.c @@ -504,18 +504,23 @@ static int lme2510_pid_filter(struct dvb_usb_adapter *adap, int index, u16 pid, static int lme2510_return_status(struct dvb_usb_device *d) { - int ret = 0; + int ret; u8 *data; - data = kzalloc(10, GFP_KERNEL); + data = kzalloc(6, GFP_KERNEL); if (!data) return -ENOMEM; - ret |= usb_control_msg(d->udev, usb_rcvctrlpipe(d->udev, 0), - 0x06, 0x80, 0x0302, 0x00, data, 0x0006, 200); - info("Firmware Status: %x (%x)", ret , data[2]); + ret = usb_control_msg(d->udev, usb_rcvctrlpipe(d->udev, 0), + 0x06, 0x80, 0x0302, 0x00, + data, 0x6, 200); + if (ret != 6) + ret = -EINVAL; + else + ret = data[2]; + + info("Firmware Status: %6ph", data); - ret = (ret < 0) ? -ENODEV : data[2]; kfree(data); return ret; } @@ -1200,6 +1205,7 @@ static int lme2510_get_adapter_count(struct dvb_usb_device *d) static int lme2510_identify_state(struct dvb_usb_device *d, const char **name) { struct lme2510_state *st = d->priv; + int status; usb_reset_configuration(d->udev); @@ -1208,12 +1214,16 @@ static int lme2510_identify_state(struct dvb_usb_device *d, const char **name) st->dvb_usb_lme2510_firmware = dvb_usb_lme2510_firmware; - if (lme2510_return_status(d) == 0x44) { + status = lme2510_return_status(d); + if (status == 0x44) { *name = lme_firmware_switch(d, 0); return COLD; } - return 0; + if (status != 0x47) + return -EINVAL; + + return WARM; } static int lme2510_get_stream_config(struct dvb_frontend *fe, u8 *ts_type, From f320dd20224ca9193040c379f0243030dd67fa4b Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Tue, 26 Sep 2017 17:10:21 -0400 Subject: [PATCH 408/705] media: dvb-usb-v2: lmedm04: move ts2020 attach to dm04_lme2510_tuner commit 7bf7a7116ed313c601307f7e585419369926ab05 upstream. When the tuner was split from m88rs2000 the attach function is in wrong place. Move to dm04_lme2510_tuner to trap errors on failure and removing a call to lme_coldreset. Prevents driver starting up without any tuner connected. Fixes to trap for ts2020 fail. LME2510(C): FE Found M88RS2000 ts2020: probe of 0-0060 failed with error -11 ... LME2510(C): TUN Found RS2000 tuner kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN Reported-by: Andrey Konovalov Signed-off-by: Malcolm Priestley Tested-by: Andrey Konovalov Signed-off-by: Mauro Carvalho Chehab Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb-v2/lmedm04.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/media/usb/dvb-usb-v2/lmedm04.c b/drivers/media/usb/dvb-usb-v2/lmedm04.c index 50d02f879949..5c4aa247d650 100644 --- a/drivers/media/usb/dvb-usb-v2/lmedm04.c +++ b/drivers/media/usb/dvb-usb-v2/lmedm04.c @@ -1084,8 +1084,6 @@ static int dm04_lme2510_frontend_attach(struct dvb_usb_adapter *adap) if (adap->fe[0]) { info("FE Found M88RS2000"); - dvb_attach(ts2020_attach, adap->fe[0], &ts2020_config, - &d->i2c_adap); st->i2c_tuner_gate_w = 5; st->i2c_tuner_gate_r = 5; st->i2c_tuner_addr = 0x60; @@ -1151,17 +1149,18 @@ static int dm04_lme2510_tuner(struct dvb_usb_adapter *adap) ret = st->tuner_config; break; case TUNER_RS2000: - ret = st->tuner_config; + if (dvb_attach(ts2020_attach, adap->fe[0], + &ts2020_config, &d->i2c_adap)) + ret = st->tuner_config; break; default: break; } - if (ret) + if (ret) { info("TUN Found %s tuner", tun_msg[ret]); - else { - info("TUN No tuner found --- resetting device"); - lme_coldreset(d); + } else { + info("TUN No tuner found"); return -ENODEV; } From 198a7ddaf5d2c76130b28f19ed6d768860ea2b8e Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Fri, 22 Sep 2017 09:07:06 -0400 Subject: [PATCH 409/705] media: hdpvr: Fix an error handling path in hdpvr_probe() commit c0f71bbb810237a38734607ca4599632f7f5d47f upstream. Here, hdpvr_register_videodev() is responsible for setup and register a video device. Also defining and initializing a worker. hdpvr_register_videodev() is calling by hdpvr_probe at last. So no need to flush any work here. Unregister v4l2, free buffers and memory. If hdpvr_probe() will fail. Signed-off-by: Arvind Yadav Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/hdpvr/hdpvr-core.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/media/usb/hdpvr/hdpvr-core.c b/drivers/media/usb/hdpvr/hdpvr-core.c index a61d8fd63c12..a20b60ac66ca 100644 --- a/drivers/media/usb/hdpvr/hdpvr-core.c +++ b/drivers/media/usb/hdpvr/hdpvr-core.c @@ -295,7 +295,7 @@ static int hdpvr_probe(struct usb_interface *interface, /* register v4l2_device early so it can be used for printks */ if (v4l2_device_register(&interface->dev, &dev->v4l2_dev)) { dev_err(&interface->dev, "v4l2_device_register failed\n"); - goto error; + goto error_free_dev; } mutex_init(&dev->io_mutex); @@ -304,7 +304,7 @@ static int hdpvr_probe(struct usb_interface *interface, dev->usbc_buf = kmalloc(64, GFP_KERNEL); if (!dev->usbc_buf) { v4l2_err(&dev->v4l2_dev, "Out of memory\n"); - goto error; + goto error_v4l2_unregister; } init_waitqueue_head(&dev->wait_buffer); @@ -342,13 +342,13 @@ static int hdpvr_probe(struct usb_interface *interface, } if (!dev->bulk_in_endpointAddr) { v4l2_err(&dev->v4l2_dev, "Could not find bulk-in endpoint\n"); - goto error; + goto error_put_usb; } /* init the device */ if (hdpvr_device_init(dev)) { v4l2_err(&dev->v4l2_dev, "device init failed\n"); - goto error; + goto error_put_usb; } mutex_lock(&dev->io_mutex); @@ -356,7 +356,7 @@ static int hdpvr_probe(struct usb_interface *interface, mutex_unlock(&dev->io_mutex); v4l2_err(&dev->v4l2_dev, "allocating transfer buffers failed\n"); - goto error; + goto error_put_usb; } mutex_unlock(&dev->io_mutex); @@ -364,7 +364,7 @@ static int hdpvr_probe(struct usb_interface *interface, retval = hdpvr_register_i2c_adapter(dev); if (retval < 0) { v4l2_err(&dev->v4l2_dev, "i2c adapter register failed\n"); - goto error; + goto error_free_buffers; } client = hdpvr_register_ir_rx_i2c(dev); @@ -397,13 +397,17 @@ static int hdpvr_probe(struct usb_interface *interface, reg_fail: #if IS_ENABLED(CONFIG_I2C) i2c_del_adapter(&dev->i2c_adapter); +error_free_buffers: #endif + hdpvr_free_buffers(dev); +error_put_usb: + usb_put_dev(dev->udev); + kfree(dev->usbc_buf); +error_v4l2_unregister: + v4l2_device_unregister(&dev->v4l2_dev); +error_free_dev: + kfree(dev); error: - if (dev) { - flush_work(&dev->worker); - /* this frees allocated memory */ - hdpvr_delete(dev); - } return retval; } From cbdabc7027b1fea4d6a7d86bc573096ef5126657 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Oct 2017 15:54:10 +0200 Subject: [PATCH 410/705] mtd: cfi: convert inline functions to macros commit 9e343e87d2c4c707ef8fae2844864d4dde3a2d13 upstream. The map_word_() functions, dating back to linux-2.6.8, try to perform bitwise operations on a 'map_word' structure. This may have worked with compilers that were current then (gcc-3.4 or earlier), but end up being rather inefficient on any version I could try now (gcc-4.4 or higher). Specifically we hit a problem analyzed in gcc PR81715 where we fail to reuse the stack space for local variables. This can be seen immediately in the stack consumption for cfi_staa_erase_varsize() and other functions that (with CONFIG_KASAN) can be up to 2200 bytes. Changing the inline functions into macros brings this down to 1280 bytes. Without KASAN, the same problem exists, but the stack consumption is lower to start with, my patch shrinks it from 920 to 496 bytes on with arm-linux-gnueabi-gcc-5.4, and saves around 1KB in .text size for cfi_cmdset_0020.c, as it avoids copying map_word structures for each call to one of these helpers. With the latest gcc-8 snapshot, the problem is fixed in upstream gcc, but nobody uses that yet, so we should still work around it in mainline kernels and probably backport the workaround to stable kernels as well. We had a couple of other functions that suffered from the same gcc bug, and all of those had a simpler workaround involving dummy variables in the inline function. Unfortunately that did not work here, the macro hack was the best I could come up with. It would also be helpful to have someone to a little performance testing on the patch, to see how much it helps in terms of CPU utilitzation. Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 Signed-off-by: Arnd Bergmann Acked-by: Richard Weinberger Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- include/linux/mtd/map.h | 120 +++++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 64 deletions(-) diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index 3aa56e3104bb..b5b43f94f311 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -270,75 +270,67 @@ void map_destroy(struct mtd_info *mtd); #define INVALIDATE_CACHED_RANGE(map, from, size) \ do { if (map->inval_cache) map->inval_cache(map, from, size); } while (0) +#define map_word_equal(map, val1, val2) \ +({ \ + int i, ret = 1; \ + for (i = 0; i < map_words(map); i++) \ + if ((val1).x[i] != (val2).x[i]) { \ + ret = 0; \ + break; \ + } \ + ret; \ +}) -static inline int map_word_equal(struct map_info *map, map_word val1, map_word val2) -{ - int i; +#define map_word_and(map, val1, val2) \ +({ \ + map_word r; \ + int i; \ + for (i = 0; i < map_words(map); i++) \ + r.x[i] = (val1).x[i] & (val2).x[i]; \ + r; \ +}) - for (i = 0; i < map_words(map); i++) { - if (val1.x[i] != val2.x[i]) - return 0; - } +#define map_word_clr(map, val1, val2) \ +({ \ + map_word r; \ + int i; \ + for (i = 0; i < map_words(map); i++) \ + r.x[i] = (val1).x[i] & ~(val2).x[i]; \ + r; \ +}) - return 1; -} +#define map_word_or(map, val1, val2) \ +({ \ + map_word r; \ + int i; \ + for (i = 0; i < map_words(map); i++) \ + r.x[i] = (val1).x[i] | (val2).x[i]; \ + r; \ +}) -static inline map_word map_word_and(struct map_info *map, map_word val1, map_word val2) -{ - map_word r; - int i; +#define map_word_andequal(map, val1, val2, val3) \ +({ \ + int i, ret = 1; \ + for (i = 0; i < map_words(map); i++) { \ + if (((val1).x[i] & (val2).x[i]) != (val2).x[i]) { \ + ret = 0; \ + break; \ + } \ + } \ + ret; \ +}) - for (i = 0; i < map_words(map); i++) - r.x[i] = val1.x[i] & val2.x[i]; - - return r; -} - -static inline map_word map_word_clr(struct map_info *map, map_word val1, map_word val2) -{ - map_word r; - int i; - - for (i = 0; i < map_words(map); i++) - r.x[i] = val1.x[i] & ~val2.x[i]; - - return r; -} - -static inline map_word map_word_or(struct map_info *map, map_word val1, map_word val2) -{ - map_word r; - int i; - - for (i = 0; i < map_words(map); i++) - r.x[i] = val1.x[i] | val2.x[i]; - - return r; -} - -static inline int map_word_andequal(struct map_info *map, map_word val1, map_word val2, map_word val3) -{ - int i; - - for (i = 0; i < map_words(map); i++) { - if ((val1.x[i] & val2.x[i]) != val3.x[i]) - return 0; - } - - return 1; -} - -static inline int map_word_bitsset(struct map_info *map, map_word val1, map_word val2) -{ - int i; - - for (i = 0; i < map_words(map); i++) { - if (val1.x[i] & val2.x[i]) - return 1; - } - - return 0; -} +#define map_word_bitsset(map, val1, val2) \ +({ \ + int i, ret = 0; \ + for (i = 0; i < map_words(map); i++) { \ + if ((val1).x[i] & (val2).x[i]) { \ + ret = 1; \ + break; \ + } \ + } \ + ret; \ +}) static inline map_word map_word_load(struct map_info *map, const void *ptr) { From d25d52ff1011faaee8174bac39b0c58a59833f22 Mon Sep 17 00:00:00 2001 From: Kamal Dasu Date: Mon, 8 Jan 2018 15:36:48 -0500 Subject: [PATCH 411/705] mtd: nand: brcmnand: Disable prefetch by default commit f953f0f89663c39f08f4baaa8a4a881401b65654 upstream. Brcm nand controller prefetch feature needs to be disabled by default. Enabling affects performance on random reads as well as dma reads. Signed-off-by: Kamal Dasu Fixes: 27c5b17cd1b1 ("mtd: nand: add NAND driver "library" for Broadcom STB NAND controller") Acked-by: Florian Fainelli Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/brcmnand/brcmnand.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c index d9fab2222eb3..1a4a790054e4 100644 --- a/drivers/mtd/nand/brcmnand/brcmnand.c +++ b/drivers/mtd/nand/brcmnand/brcmnand.c @@ -2193,16 +2193,9 @@ static int brcmnand_setup_dev(struct brcmnand_host *host) if (ctrl->nand_version >= 0x0702) tmp |= ACC_CONTROL_RD_ERASED; tmp &= ~ACC_CONTROL_FAST_PGM_RDIN; - if (ctrl->features & BRCMNAND_HAS_PREFETCH) { - /* - * FIXME: Flash DMA + prefetch may see spurious erased-page ECC - * errors - */ - if (has_flash_dma(ctrl)) - tmp &= ~ACC_CONTROL_PREFETCH; - else - tmp |= ACC_CONTROL_PREFETCH; - } + if (ctrl->features & BRCMNAND_HAS_PREFETCH) + tmp &= ~ACC_CONTROL_PREFETCH; + nand_writereg(ctrl, offs, tmp); return 0; From d80cd3e93653e4868d99aed4eb18ff2fcd134574 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 12 Jan 2018 10:13:36 +0100 Subject: [PATCH 412/705] mtd: nand: Fix nand_do_read_oob() return value commit 87e89ce8d0d14f573c068c61bec2117751fb5103 upstream. Starting from commit 041e4575f034 ("mtd: nand: handle ECC errors in OOB"), nand_do_read_oob() (from the NAND core) did return 0 or a negative error, and the MTD layer expected it. However, the trend for the NAND layer is now to return an error or a positive number of bitflips. Deciding which status to return to the user belongs to the MTD layer. Commit e47f68587b82 ("mtd: check for max_bitflips in mtd_read_oob()") brought this logic to the mtd_read_oob() function while the return value coming from nand_do_read_oob() (called by the ->_read_oob() hook) was left unchanged. Fixes: e47f68587b82 ("mtd: check for max_bitflips in mtd_read_oob()") Signed-off-by: Miquel Raynal Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/nand_base.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index a77cfd74a92e..21c03086bb7f 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -2320,6 +2320,7 @@ EXPORT_SYMBOL(nand_write_oob_syndrome); static int nand_do_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops) { + unsigned int max_bitflips = 0; int page, realpage, chipnr; struct nand_chip *chip = mtd_to_nand(mtd); struct mtd_ecc_stats stats; @@ -2377,6 +2378,8 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from, nand_wait_ready(mtd); } + max_bitflips = max_t(unsigned int, max_bitflips, ret); + readlen -= len; if (!readlen) break; @@ -2402,7 +2405,7 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from, if (mtd->ecc_stats.failed - stats.failed) return -EBADMSG; - return mtd->ecc_stats.corrected - stats.corrected ? -EUCLEAN : 0; + return max_bitflips; } /** From 44ebd641be56653803482756aca487b5152a22d5 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 24 Jan 2018 23:49:31 +0100 Subject: [PATCH 413/705] mtd: nand: sunxi: Fix ECC strength choice commit f4c6cd1a7f2275d5bc0e494b21fff26f8dde80f0 upstream. When the requested ECC strength does not exactly match the strengths supported by the ECC engine, the driver is selecting the closest strength meeting the 'selected_strength > requested_strength' constraint. Fix the fact that, in this particular case, ecc->strength value was not updated to match the 'selected_strength'. For instance, one can encounter this issue when no ECC requirement is filled in the device tree while the NAND chip minimum requirement is not a strength/step_size combo natively supported by the ECC engine. Fixes: 1fef62c1423b ("mtd: nand: add sunxi NAND flash controller support") Suggested-by: Boris Brezillon Signed-off-by: Miquel Raynal Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/sunxi_nand.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c index f9b2a771096b..e26c4f880df6 100644 --- a/drivers/mtd/nand/sunxi_nand.c +++ b/drivers/mtd/nand/sunxi_nand.c @@ -1835,8 +1835,14 @@ static int sunxi_nand_hw_common_ecc_ctrl_init(struct mtd_info *mtd, /* Add ECC info retrieval from DT */ for (i = 0; i < ARRAY_SIZE(strengths); i++) { - if (ecc->strength <= strengths[i]) + if (ecc->strength <= strengths[i]) { + /* + * Update ecc->strength value with the actual strength + * that will be used by the ECC engine. + */ + ecc->strength = strengths[i]; break; + } } if (i >= ARRAY_SIZE(strengths)) { From 84f9d8536c8bf440d84093eb749290d88f7e1d76 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Tue, 5 Dec 2017 16:01:20 +0100 Subject: [PATCH 414/705] ubi: fastmap: Erase outdated anchor PEBs during attach commit f78e5623f45bab2b726eec29dc5cefbbab2d0b1c upstream. The fastmap update code might erase the current fastmap anchor PEB in case it doesn't find any new free PEB. When a power cut happens in this situation we must not have any outdated fastmap anchor PEB on the device, because that would be used to attach during next boot. The easiest way to make that sure is to erase all outdated fastmap anchor PEBs synchronously during attach. Signed-off-by: Sascha Hauer Reviewed-by: Richard Weinberger Fixes: dbb7d2a88d2a ("UBI: Add fastmap core") Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/wl.c | 77 ++++++++++++++++++++++++++++++++------------ 1 file changed, 57 insertions(+), 20 deletions(-) diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index b5b8cd6f481c..668b46202507 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1528,6 +1528,46 @@ static void shutdown_work(struct ubi_device *ubi) } } +/** + * erase_aeb - erase a PEB given in UBI attach info PEB + * @ubi: UBI device description object + * @aeb: UBI attach info PEB + * @sync: If true, erase synchronously. Otherwise schedule for erasure + */ +static int erase_aeb(struct ubi_device *ubi, struct ubi_ainf_peb *aeb, bool sync) +{ + struct ubi_wl_entry *e; + int err; + + e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); + if (!e) + return -ENOMEM; + + e->pnum = aeb->pnum; + e->ec = aeb->ec; + ubi->lookuptbl[e->pnum] = e; + + if (sync) { + err = sync_erase(ubi, e, false); + if (err) + goto out_free; + + wl_tree_add(e, &ubi->free); + ubi->free_count++; + } else { + err = schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false); + if (err) + goto out_free; + } + + return 0; + +out_free: + wl_entry_destroy(ubi, e); + + return err; +} + /** * ubi_wl_init - initialize the WL sub-system using attaching information. * @ubi: UBI device description object @@ -1566,18 +1606,10 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) list_for_each_entry_safe(aeb, tmp, &ai->erase, u.list) { cond_resched(); - e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); - if (!e) + err = erase_aeb(ubi, aeb, false); + if (err) goto out_free; - e->pnum = aeb->pnum; - e->ec = aeb->ec; - ubi->lookuptbl[e->pnum] = e; - if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false)) { - wl_entry_destroy(ubi, e); - goto out_free; - } - found_pebs++; } @@ -1635,6 +1667,8 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) ubi_assert(!ubi->lookuptbl[e->pnum]); ubi->lookuptbl[e->pnum] = e; } else { + bool sync = false; + /* * Usually old Fastmap PEBs are scheduled for erasure * and we don't have to care about them but if we face @@ -1644,18 +1678,21 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) if (ubi->lookuptbl[aeb->pnum]) continue; - e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); - if (!e) - goto out_free; + /* + * The fastmap update code might not find a free PEB for + * writing the fastmap anchor to and then reuses the + * current fastmap anchor PEB. When this PEB gets erased + * and a power cut happens before it is written again we + * must make sure that the fastmap attach code doesn't + * find any outdated fastmap anchors, hence we erase the + * outdated fastmap anchor PEBs synchronously here. + */ + if (aeb->vol_id == UBI_FM_SB_VOLUME_ID) + sync = true; - e->pnum = aeb->pnum; - e->ec = aeb->ec; - ubi_assert(!ubi->lookuptbl[e->pnum]); - ubi->lookuptbl[e->pnum] = e; - if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false)) { - wl_entry_destroy(ubi, e); + err = erase_aeb(ubi, aeb, sync); + if (err) goto out_free; - } } found_pebs++; From de14d0c124ca496381872a42cd32a53433ed28b2 Mon Sep 17 00:00:00 2001 From: Bradley Bolen Date: Thu, 18 Jan 2018 08:55:20 -0500 Subject: [PATCH 415/705] ubi: block: Fix locking for idr_alloc/idr_remove commit 7f29ae9f977bcdc3654e68bc36d170223c52fd48 upstream. This fixes a race with idr_alloc where gd->first_minor can be set to the same value for two simultaneous calls to ubiblock_create. Each instance calls device_add_disk with the same first_minor. device_add_disk calls bdi_register_owner which generates several warnings. WARNING: CPU: 1 PID: 179 at kernel-source/fs/sysfs/dir.c:31 sysfs_warn_dup+0x68/0x88 sysfs: cannot create duplicate filename '/devices/virtual/bdi/252:2' WARNING: CPU: 1 PID: 179 at kernel-source/lib/kobject.c:240 kobject_add_internal+0x1ec/0x2f8 kobject_add_internal failed for 252:2 with -EEXIST, don't try to register things with the same name in the same directory WARNING: CPU: 1 PID: 179 at kernel-source/fs/sysfs/dir.c:31 sysfs_warn_dup+0x68/0x88 sysfs: cannot create duplicate filename '/dev/block/252:2' However, device_add_disk does not error out when bdi_register_owner returns an error. Control continues until reaching blk_register_queue. It then BUGs. kernel BUG at kernel-source/fs/sysfs/group.c:113! [] (internal_create_group) from [] (sysfs_create_group+0x20/0x24) [] (sysfs_create_group) from [] (blk_trace_init_sysfs+0x18/0x20) [] (blk_trace_init_sysfs) from [] (blk_register_queue+0xd8/0x154) [] (blk_register_queue) from [] (device_add_disk+0x194/0x44c) [] (device_add_disk) from [] (ubiblock_create+0x284/0x2e0) [] (ubiblock_create) from [] (vol_cdev_ioctl+0x450/0x554) [] (vol_cdev_ioctl) from [] (vfs_ioctl+0x30/0x44) [] (vfs_ioctl) from [] (do_vfs_ioctl+0xa0/0x790) [] (do_vfs_ioctl) from [] (SyS_ioctl+0x44/0x68) [] (SyS_ioctl) from [] (ret_fast_syscall+0x0/0x34) Locking idr_alloc/idr_remove removes the race and keeps gd->first_minor unique. Fixes: 2bf50d42f3a4 ("UBI: block: Dynamically allocate minor numbers") Signed-off-by: Bradley Bolen Reviewed-by: Boris Brezillon Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/block.c | 42 +++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index d1e6931c132f..46913ef25bc0 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c @@ -99,6 +99,8 @@ struct ubiblock { /* Linked list of all ubiblock instances */ static LIST_HEAD(ubiblock_devices); +static DEFINE_IDR(ubiblock_minor_idr); +/* Protects ubiblock_devices and ubiblock_minor_idr */ static DEFINE_MUTEX(devices_mutex); static int ubiblock_major; @@ -353,8 +355,6 @@ static struct blk_mq_ops ubiblock_mq_ops = { .init_request = ubiblock_init_request, }; -static DEFINE_IDR(ubiblock_minor_idr); - int ubiblock_create(struct ubi_volume_info *vi) { struct ubiblock *dev; @@ -367,14 +367,15 @@ int ubiblock_create(struct ubi_volume_info *vi) /* Check that the volume isn't already handled */ mutex_lock(&devices_mutex); if (find_dev_nolock(vi->ubi_num, vi->vol_id)) { - mutex_unlock(&devices_mutex); - return -EEXIST; + ret = -EEXIST; + goto out_unlock; } - mutex_unlock(&devices_mutex); dev = kzalloc(sizeof(struct ubiblock), GFP_KERNEL); - if (!dev) - return -ENOMEM; + if (!dev) { + ret = -ENOMEM; + goto out_unlock; + } mutex_init(&dev->dev_mutex); @@ -439,14 +440,13 @@ int ubiblock_create(struct ubi_volume_info *vi) goto out_free_queue; } - mutex_lock(&devices_mutex); list_add_tail(&dev->list, &ubiblock_devices); - mutex_unlock(&devices_mutex); /* Must be the last step: anyone can call file ops from now on */ add_disk(dev->gd); dev_info(disk_to_dev(dev->gd), "created from ubi%d:%d(%s)", dev->ubi_num, dev->vol_id, vi->name); + mutex_unlock(&devices_mutex); return 0; out_free_queue: @@ -459,6 +459,8 @@ out_put_disk: put_disk(dev->gd); out_free_dev: kfree(dev); +out_unlock: + mutex_unlock(&devices_mutex); return ret; } @@ -480,30 +482,36 @@ static void ubiblock_cleanup(struct ubiblock *dev) int ubiblock_remove(struct ubi_volume_info *vi) { struct ubiblock *dev; + int ret; mutex_lock(&devices_mutex); dev = find_dev_nolock(vi->ubi_num, vi->vol_id); if (!dev) { - mutex_unlock(&devices_mutex); - return -ENODEV; + ret = -ENODEV; + goto out_unlock; } /* Found a device, let's lock it so we can check if it's busy */ mutex_lock(&dev->dev_mutex); if (dev->refcnt > 0) { - mutex_unlock(&dev->dev_mutex); - mutex_unlock(&devices_mutex); - return -EBUSY; + ret = -EBUSY; + goto out_unlock_dev; } /* Remove from device list */ list_del(&dev->list); - mutex_unlock(&devices_mutex); - ubiblock_cleanup(dev); mutex_unlock(&dev->dev_mutex); + mutex_unlock(&devices_mutex); + kfree(dev); return 0; + +out_unlock_dev: + mutex_unlock(&dev->dev_mutex); +out_unlock: + mutex_unlock(&devices_mutex); + return ret; } static int ubiblock_resize(struct ubi_volume_info *vi) @@ -632,6 +640,7 @@ static void ubiblock_remove_all(void) struct ubiblock *next; struct ubiblock *dev; + mutex_lock(&devices_mutex); list_for_each_entry_safe(dev, next, &ubiblock_devices, list) { /* The module is being forcefully removed */ WARN_ON(dev->desc); @@ -640,6 +649,7 @@ static void ubiblock_remove_all(void) ubiblock_cleanup(dev); kfree(dev); } + mutex_unlock(&devices_mutex); } int __init ubiblock_init(void) From 298dc6c6696b3d2abcdf18614d37278acb64abd3 Mon Sep 17 00:00:00 2001 From: Xiaolei Li Date: Fri, 23 Jun 2017 10:37:23 +0800 Subject: [PATCH 416/705] ubifs: Massage assert in ubifs_xattr_set() wrt. init_xattrs commit d8db5b1ca9d4c57e49893d0f78e6d5ce81450cc8 upstream. The inode is not locked in init_xattrs when creating a new inode. Without this patch, there will occurs assert when booting or creating a new file, if the kernel config CONFIG_SECURITY_SMACK is enabled. Log likes: UBIFS assert failed in ubifs_xattr_set at 298 (pid 1156) CPU: 1 PID: 1156 Comm: ldconfig Tainted: G S 4.12.0-rc1-207440-g1e70b02 #2 Hardware name: MediaTek MT2712 evaluation board (DT) Call trace: [] dump_backtrace+0x0/0x238 [] show_stack+0x14/0x20 [] dump_stack+0x9c/0xc0 [] ubifs_xattr_set+0x374/0x5e0 [] init_xattrs+0x5c/0xb8 [] security_inode_init_security+0x110/0x190 [] ubifs_init_security+0x30/0x68 [] ubifs_mkdir+0x100/0x200 [] vfs_mkdir+0x11c/0x1b8 [] SyS_mkdirat+0x74/0xd0 [] __sys_trace_return+0x0/0x4 Signed-off-by: Xiaolei Li Signed-off-by: Richard Weinberger Cc: stable@vger.kernel.org (julia: massaged to apply to 4.9.y, which doesn't contain fscrypto support) Signed-off-by: Julia Cartwright Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/xattr.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index d9f9615bfd71..3979d767a0cb 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -270,7 +270,8 @@ static struct inode *iget_xattr(struct ubifs_info *c, ino_t inum) } static int __ubifs_setxattr(struct inode *host, const char *name, - const void *value, size_t size, int flags) + const void *value, size_t size, int flags, + bool check_lock) { struct inode *inode; struct ubifs_info *c = host->i_sb->s_fs_info; @@ -279,7 +280,8 @@ static int __ubifs_setxattr(struct inode *host, const char *name, union ubifs_key key; int err; - ubifs_assert(inode_is_locked(host)); + if (check_lock) + ubifs_assert(inode_is_locked(host)); if (size > UBIFS_MAX_INO_DATA) return -ERANGE; @@ -548,7 +550,8 @@ static int init_xattrs(struct inode *inode, const struct xattr *xattr_array, } strcpy(name, XATTR_SECURITY_PREFIX); strcpy(name + XATTR_SECURITY_PREFIX_LEN, xattr->name); - err = __ubifs_setxattr(inode, name, xattr->value, xattr->value_len, 0); + err = __ubifs_setxattr(inode, name, xattr->value, + xattr->value_len, 0, false); kfree(name); if (err < 0) break; @@ -594,7 +597,8 @@ static int ubifs_xattr_set(const struct xattr_handler *handler, name = xattr_full_name(handler, name); if (value) - return __ubifs_setxattr(inode, name, value, size, flags); + return __ubifs_setxattr(inode, name, value, size, flags, + true); else return __ubifs_removexattr(inode, name); } From e1df8c682df646223dedc5352263f4a5286f9a86 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Fri, 15 Dec 2017 16:12:32 -0500 Subject: [PATCH 417/705] nfs/pnfs: fix nfs_direct_req ref leak when i/o falls back to the mds commit ba4a76f703ab7eb72941fdaac848502073d6e9ee upstream. Currently when falling back to doing I/O through the MDS (via pnfs_{read|write}_through_mds), the client frees the nfs_pgio_header without releasing the reference taken on the dreq via pnfs_generic_pg_{read|write}pages -> nfs_pgheader_init -> nfs_direct_pgio_init. It then takes another reference on the dreq via nfs_generic_pg_pgios -> nfs_pgheader_init -> nfs_direct_pgio_init and as a result the requester will become stuck in inode_dio_wait. Once that happens, other processes accessing the inode will become stuck as well. Ensure that pnfs_read_through_mds() and pnfs_write_through_mds() clean up correctly by calling hdr->completion_ops->completion() instead of calling hdr->release() directly. This can be reproduced (sometimes) by performing "storage failover takeover" commands on NetApp filer while doing direct I/O from a client. This can also be reproduced using SystemTap to simulate a failure while doing direct I/O from a client (from Dave Wysochanski ): stap -v -g -e 'probe module("nfs_layout_nfsv41_files").function("nfs4_fl_prepare_ds").return { $return=NULL; exit(); }' Suggested-by: Trond Myklebust Signed-off-by: Scott Mayhew Fixes: 1ca018d28d ("pNFS: Fix a memory leak when attempted pnfs fails") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/pnfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index b7a07ba8783a..b8e44746f761 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2145,7 +2145,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, nfs_pageio_reset_write_mds(desc); mirror->pg_recoalesce = 1; } - hdr->release(hdr); + hdr->completion_ops->completion(hdr); } static enum pnfs_try_status @@ -2256,7 +2256,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, nfs_pageio_reset_read_mds(desc); mirror->pg_recoalesce = 1; } - hdr->release(hdr); + hdr->completion_ops->completion(hdr); } /* From d1840343f9483b240b4ceb7bb21a909b7ba3eb9c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 18 Dec 2017 14:39:13 -0500 Subject: [PATCH 418/705] NFS: Add a cond_resched() to nfs_commit_release_pages() commit 7f1bda447c9bd48b415acedba6b830f61591601f upstream. The commit list can get very large, and so we need a cond_resched() in nfs_commit_release_pages() in order to ensure we don't hog the CPU for excessive periods of time. Reported-by: Mike Galbraith Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/write.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 9905735463a4..9a3b3820306d 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1806,6 +1806,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); next: nfs_unlock_and_release_request(req); + /* Latency breaker */ + cond_resched(); } nfss = NFS_SERVER(data->inode); if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) From ca2c316f7cb490c350b101d59c1b892970ac33d0 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 16 Jan 2018 10:08:00 -0500 Subject: [PATCH 419/705] NFS: commit direct writes even if they fail partially commit 1b8d97b0a837beaf48a8449955b52c650a7114b4 upstream. If some of the WRITE calls making up an O_DIRECT write syscall fail, we neglect to commit, even if some of the WRITEs succeed. We also depend on the commit code to free the reference count on the nfs_page taken in the "if (request_commit)" case at the end of nfs_direct_write_completion(). The problem was originally noticed because ENOSPC's encountered partway through a write would result in a closed file being sillyrenamed when it should have been unlinked. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/direct.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index bd81bcf3ffcf..1ac1593aded3 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -787,10 +787,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) spin_lock(&dreq->lock); - if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { - dreq->flags = 0; + if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) dreq->error = hdr->error; - } if (dreq->error == 0) { nfs_direct_good_bytes(dreq, hdr); if (nfs_write_need_commit(hdr)) { From 967f650f8835012e9d8bf96bdace29a6b84b43db Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 19 Jan 2018 15:15:34 -0800 Subject: [PATCH 420/705] NFS: reject request for id_legacy key without auxdata commit 49686cbbb3ebafe42e63868222f269d8053ead00 upstream. nfs_idmap_legacy_upcall() is supposed to be called with 'aux' pointing to a 'struct idmap', via the call to request_key_with_auxdata() in nfs_idmap_request_key(). However it can also be reached via the request_key() system call in which case 'aux' will be NULL, causing a NULL pointer dereference in nfs_idmap_prepare_pipe_upcall(), assuming that the key description is valid enough to get that far. Fix this by making nfs_idmap_legacy_upcall() negate the key if no auxdata is provided. As usual, this bug was found by syzkaller. A simple reproducer using the command-line keyctl program is: keyctl request2 id_legacy uid:0 '' @s Fixes: 57e62324e469 ("NFS: Store the legacy idmapper result in the keyring") Reported-by: syzbot+5dfdbcf7b3eb5912abbb@syzkaller.appspotmail.com Signed-off-by: Eric Biggers Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4idmap.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index c444285bb1b1..f1160cdd4682 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -567,9 +567,13 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, struct idmap_msg *im; struct idmap *idmap = (struct idmap *)aux; struct key *key = cons->key; - int ret = -ENOMEM; + int ret = -ENOKEY; + + if (!aux) + goto out1; /* msg and im are freed in idmap_pipe_destroy_msg */ + ret = -ENOMEM; data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) goto out1; From b79d8854ee0e222c5beb52fc50246ad0b6d9088e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 28 Jan 2018 09:29:41 -0500 Subject: [PATCH 421/705] NFS: Fix a race between mmap() and O_DIRECT commit e231c6879cfd44e4fffd384bb6dd7d313249a523 upstream. When locking the file in order to do O_DIRECT on it, we must unmap any mmapped ranges on the pagecache so that we can flush out the dirty data. Fixes: a5864c999de67 ("NFS: Do not serialise O_DIRECT reads and writes") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/io.c b/fs/nfs/io.c index 1fc5d1ce327e..d18ccc1ce0b5 100644 --- a/fs/nfs/io.c +++ b/fs/nfs/io.c @@ -98,7 +98,7 @@ static void nfs_block_buffered(struct nfs_inode *nfsi, struct inode *inode) { if (!test_bit(NFS_INO_ODIRECT, &nfsi->flags)) { set_bit(NFS_INO_ODIRECT, &nfsi->flags); - nfs_wb_all(inode); + nfs_sync_mapping(inode->i_mapping); } } From 058d13f85da6909c44319a21bc275f22c51451dc Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 19 Jan 2018 09:18:54 +0100 Subject: [PATCH 422/705] kernfs: fix regression in kernfs_fop_write caused by wrong type commit ba87977a49913129962af8ac35b0e13e0fa4382d upstream. Commit b7ce40cff0b9 ("kernfs: cache atomic_write_len in kernfs_open_file") changes type of local variable 'len' from ssize_t to size_t. This change caused that the *ppos value is updated also when the previous write callback failed. Mentioned snippet: ... len = ops->write(...); <- return value can be negative ... if (len > 0) <- true here in this case *ppos += len; ... Fixes: b7ce40cff0b9 ("kernfs: cache atomic_write_len in kernfs_open_file") Acked-by: Tejun Heo Signed-off-by: Ivan Vecera Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 78219d5644e9..d6512cd9ba02 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -275,7 +275,7 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf, { struct kernfs_open_file *of = kernfs_of(file); const struct kernfs_ops *ops; - size_t len; + ssize_t len; char *buf; if (of->atomic_write_len) { From 3332b6f3276b8d20fa0c8665529b43605d9b494b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 6 Dec 2017 16:41:08 +0100 Subject: [PATCH 423/705] ahci: Annotate PCI ids for mobile Intel chipsets as such commit ca1b4974bd237f2373b0e980b11957aac3499b56 upstream. Intel uses different SATA PCI ids for the Desktop and Mobile SKUs of their chipsets. For older models the comment describing which chipset the PCI id is for, aksi indicates when we're dealing with a mobile SKU. Extend the comments for recent chipsets to also indicate mobile SKUs. The information this commit adds comes from Intel's chipset datasheets. This commit is a preparation patch for allowing a different default sata link powermanagement policy for mobile chipsets. Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index c94038206c3a..15ac8c255b60 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -265,9 +265,9 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x3b23), board_ahci }, /* PCH AHCI */ { PCI_VDEVICE(INTEL, 0x3b24), board_ahci }, /* PCH RAID */ { PCI_VDEVICE(INTEL, 0x3b25), board_ahci }, /* PCH RAID */ - { PCI_VDEVICE(INTEL, 0x3b29), board_ahci }, /* PCH AHCI */ + { PCI_VDEVICE(INTEL, 0x3b29), board_ahci }, /* PCH M AHCI */ { PCI_VDEVICE(INTEL, 0x3b2b), board_ahci }, /* PCH RAID */ - { PCI_VDEVICE(INTEL, 0x3b2c), board_ahci }, /* PCH RAID */ + { PCI_VDEVICE(INTEL, 0x3b2c), board_ahci }, /* PCH M RAID */ { PCI_VDEVICE(INTEL, 0x3b2f), board_ahci }, /* PCH AHCI */ { PCI_VDEVICE(INTEL, 0x19b0), board_ahci }, /* DNV AHCI */ { PCI_VDEVICE(INTEL, 0x19b1), board_ahci }, /* DNV AHCI */ @@ -290,9 +290,9 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x19cE), board_ahci }, /* DNV AHCI */ { PCI_VDEVICE(INTEL, 0x19cF), board_ahci }, /* DNV AHCI */ { PCI_VDEVICE(INTEL, 0x1c02), board_ahci }, /* CPT AHCI */ - { PCI_VDEVICE(INTEL, 0x1c03), board_ahci }, /* CPT AHCI */ + { PCI_VDEVICE(INTEL, 0x1c03), board_ahci }, /* CPT M AHCI */ { PCI_VDEVICE(INTEL, 0x1c04), board_ahci }, /* CPT RAID */ - { PCI_VDEVICE(INTEL, 0x1c05), board_ahci }, /* CPT RAID */ + { PCI_VDEVICE(INTEL, 0x1c05), board_ahci }, /* CPT M RAID */ { PCI_VDEVICE(INTEL, 0x1c06), board_ahci }, /* CPT RAID */ { PCI_VDEVICE(INTEL, 0x1c07), board_ahci }, /* CPT RAID */ { PCI_VDEVICE(INTEL, 0x1d02), board_ahci }, /* PBG AHCI */ @@ -301,20 +301,20 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* PBG RAID */ { PCI_VDEVICE(INTEL, 0x2323), board_ahci }, /* DH89xxCC AHCI */ { PCI_VDEVICE(INTEL, 0x1e02), board_ahci }, /* Panther Point AHCI */ - { PCI_VDEVICE(INTEL, 0x1e03), board_ahci }, /* Panther Point AHCI */ + { PCI_VDEVICE(INTEL, 0x1e03), board_ahci }, /* Panther Point M AHCI */ { PCI_VDEVICE(INTEL, 0x1e04), board_ahci }, /* Panther Point RAID */ { PCI_VDEVICE(INTEL, 0x1e05), board_ahci }, /* Panther Point RAID */ { PCI_VDEVICE(INTEL, 0x1e06), board_ahci }, /* Panther Point RAID */ - { PCI_VDEVICE(INTEL, 0x1e07), board_ahci }, /* Panther Point RAID */ + { PCI_VDEVICE(INTEL, 0x1e07), board_ahci }, /* Panther Point M RAID */ { PCI_VDEVICE(INTEL, 0x1e0e), board_ahci }, /* Panther Point RAID */ { PCI_VDEVICE(INTEL, 0x8c02), board_ahci }, /* Lynx Point AHCI */ - { PCI_VDEVICE(INTEL, 0x8c03), board_ahci }, /* Lynx Point AHCI */ + { PCI_VDEVICE(INTEL, 0x8c03), board_ahci }, /* Lynx Point M AHCI */ { PCI_VDEVICE(INTEL, 0x8c04), board_ahci }, /* Lynx Point RAID */ - { PCI_VDEVICE(INTEL, 0x8c05), board_ahci }, /* Lynx Point RAID */ + { PCI_VDEVICE(INTEL, 0x8c05), board_ahci }, /* Lynx Point M RAID */ { PCI_VDEVICE(INTEL, 0x8c06), board_ahci }, /* Lynx Point RAID */ - { PCI_VDEVICE(INTEL, 0x8c07), board_ahci }, /* Lynx Point RAID */ + { PCI_VDEVICE(INTEL, 0x8c07), board_ahci }, /* Lynx Point M RAID */ { PCI_VDEVICE(INTEL, 0x8c0e), board_ahci }, /* Lynx Point RAID */ - { PCI_VDEVICE(INTEL, 0x8c0f), board_ahci }, /* Lynx Point RAID */ + { PCI_VDEVICE(INTEL, 0x8c0f), board_ahci }, /* Lynx Point M RAID */ { PCI_VDEVICE(INTEL, 0x9c02), board_ahci }, /* Lynx Point-LP AHCI */ { PCI_VDEVICE(INTEL, 0x9c03), board_ahci }, /* Lynx Point-LP AHCI */ { PCI_VDEVICE(INTEL, 0x9c04), board_ahci }, /* Lynx Point-LP RAID */ @@ -355,21 +355,21 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x9c87), board_ahci }, /* Wildcat Point-LP RAID */ { PCI_VDEVICE(INTEL, 0x9c8f), board_ahci }, /* Wildcat Point-LP RAID */ { PCI_VDEVICE(INTEL, 0x8c82), board_ahci }, /* 9 Series AHCI */ - { PCI_VDEVICE(INTEL, 0x8c83), board_ahci }, /* 9 Series AHCI */ + { PCI_VDEVICE(INTEL, 0x8c83), board_ahci }, /* 9 Series M AHCI */ { PCI_VDEVICE(INTEL, 0x8c84), board_ahci }, /* 9 Series RAID */ - { PCI_VDEVICE(INTEL, 0x8c85), board_ahci }, /* 9 Series RAID */ + { PCI_VDEVICE(INTEL, 0x8c85), board_ahci }, /* 9 Series M RAID */ { PCI_VDEVICE(INTEL, 0x8c86), board_ahci }, /* 9 Series RAID */ - { PCI_VDEVICE(INTEL, 0x8c87), board_ahci }, /* 9 Series RAID */ + { PCI_VDEVICE(INTEL, 0x8c87), board_ahci }, /* 9 Series M RAID */ { PCI_VDEVICE(INTEL, 0x8c8e), board_ahci }, /* 9 Series RAID */ - { PCI_VDEVICE(INTEL, 0x8c8f), board_ahci }, /* 9 Series RAID */ + { PCI_VDEVICE(INTEL, 0x8c8f), board_ahci }, /* 9 Series M RAID */ { PCI_VDEVICE(INTEL, 0x9d03), board_ahci }, /* Sunrise Point-LP AHCI */ { PCI_VDEVICE(INTEL, 0x9d05), board_ahci }, /* Sunrise Point-LP RAID */ { PCI_VDEVICE(INTEL, 0x9d07), board_ahci }, /* Sunrise Point-LP RAID */ { PCI_VDEVICE(INTEL, 0xa102), board_ahci }, /* Sunrise Point-H AHCI */ - { PCI_VDEVICE(INTEL, 0xa103), board_ahci }, /* Sunrise Point-H AHCI */ + { PCI_VDEVICE(INTEL, 0xa103), board_ahci }, /* Sunrise Point-H M AHCI */ { PCI_VDEVICE(INTEL, 0xa105), board_ahci }, /* Sunrise Point-H RAID */ { PCI_VDEVICE(INTEL, 0xa106), board_ahci }, /* Sunrise Point-H RAID */ - { PCI_VDEVICE(INTEL, 0xa107), board_ahci }, /* Sunrise Point-H RAID */ + { PCI_VDEVICE(INTEL, 0xa107), board_ahci }, /* Sunrise Point-H M RAID */ { PCI_VDEVICE(INTEL, 0xa10f), board_ahci }, /* Sunrise Point-H RAID */ { PCI_VDEVICE(INTEL, 0x2822), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0x2823), board_ahci }, /* Lewisburg AHCI*/ From 72c0031a914e973c343f76bed7651308f3c31667 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 6 Dec 2017 16:41:09 +0100 Subject: [PATCH 424/705] ahci: Add PCI ids for Intel Bay Trail, Cherry Trail and Apollo Lake AHCI commit 998008b779e424bd7513c434d0ab9c1268459009 upstream. Add PCI ids for Intel Bay Trail, Cherry Trail and Apollo Lake AHCI SATA controllers. This commit is a preparation patch for allowing a different default sata link powermanagement policy for mobile chipsets. Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 15ac8c255b60..4cc8942817b3 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -383,6 +383,10 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0xa206), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa252), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa256), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0x0f22), board_ahci }, /* Bay Trail AHCI */ + { PCI_VDEVICE(INTEL, 0x0f23), board_ahci }, /* Bay Trail AHCI */ + { PCI_VDEVICE(INTEL, 0x22a3), board_ahci }, /* Cherry Trail AHCI */ + { PCI_VDEVICE(INTEL, 0x5ae3), board_ahci }, /* Apollo Lake AHCI */ /* JMicron 360/1/3/5/6, match class to avoid IDE function */ { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, From 016572d31d325220ab2e1b54a80a5dcda0884eaf Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 11 Jan 2018 15:55:50 +0300 Subject: [PATCH 425/705] ahci: Add Intel Cannon Lake PCH-H PCI ID commit f919dde0772a894c693a1eeabc77df69d6a9b937 upstream. Add Intel Cannon Lake PCH-H PCI ID to the list of supported controllers. Signed-off-by: Mika Westerberg Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 4cc8942817b3..9b46ef4c851e 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -383,6 +383,7 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0xa206), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa252), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa256), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa356), board_ahci }, /* Cannon Lake PCH-H RAID */ { PCI_VDEVICE(INTEL, 0x0f22), board_ahci }, /* Bay Trail AHCI */ { PCI_VDEVICE(INTEL, 0x0f23), board_ahci }, /* Bay Trail AHCI */ { PCI_VDEVICE(INTEL, 0x22a3), board_ahci }, /* Cherry Trail AHCI */ From d2b492bda56046fca0ae4579c142e2d15d235156 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 3 Jan 2018 11:16:22 -0800 Subject: [PATCH 426/705] crypto: hash - introduce crypto_hash_alg_has_setkey() commit cd6ed77ad5d223dc6299fb58f62e0f5267f7e2ba upstream. Templates that use an shash spawn can use crypto_shash_alg_has_setkey() to determine whether the underlying algorithm requires a key or not. But there was no corresponding function for ahash spawns. Add it. Note that the new function actually has to support both shash and ahash algorithms, since the ahash API can be used with either. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/ahash.c | 11 +++++++++++ include/crypto/internal/hash.h | 2 ++ 2 files changed, 13 insertions(+) diff --git a/crypto/ahash.c b/crypto/ahash.c index cce0268a13fe..f3fa104de479 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -625,5 +625,16 @@ struct hash_alg_common *ahash_attr_alg(struct rtattr *rta, u32 type, u32 mask) } EXPORT_SYMBOL_GPL(ahash_attr_alg); +bool crypto_hash_alg_has_setkey(struct hash_alg_common *halg) +{ + struct crypto_alg *alg = &halg->base; + + if (alg->cra_type != &crypto_ahash_type) + return crypto_shash_alg_has_setkey(__crypto_shash_alg(alg)); + + return __crypto_ahash_alg(alg)->setkey != NULL; +} +EXPORT_SYMBOL_GPL(crypto_hash_alg_has_setkey); + MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Asynchronous cryptographic hash type"); diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index cac57358f7af..5203560f992e 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -88,6 +88,8 @@ static inline bool crypto_shash_alg_has_setkey(struct shash_alg *alg) return alg->setkey != shash_no_setkey; } +bool crypto_hash_alg_has_setkey(struct hash_alg_common *halg); + int crypto_init_ahash_spawn(struct crypto_ahash_spawn *spawn, struct hash_alg_common *alg, struct crypto_instance *inst); From c1ebf9f8354702eb229e623280f90ad5e456d75a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 3 Jan 2018 11:16:23 -0800 Subject: [PATCH 427/705] crypto: cryptd - pass through absence of ->setkey() commit 841a3ff329713f796a63356fef6e2f72e4a3f6a3 upstream. When the cryptd template is used to wrap an unkeyed hash algorithm, don't install a ->setkey() method to the cryptd instance. This change is necessary for cryptd to keep working with unkeyed hash algorithms once we start enforcing that ->setkey() is called when present. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/cryptd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/cryptd.c b/crypto/cryptd.c index 0c654e59f215..af9ad45d1909 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -691,7 +691,8 @@ static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, inst->alg.finup = cryptd_hash_finup_enqueue; inst->alg.export = cryptd_hash_export; inst->alg.import = cryptd_hash_import; - inst->alg.setkey = cryptd_hash_setkey; + if (crypto_shash_alg_has_setkey(salg)) + inst->alg.setkey = cryptd_hash_setkey; inst->alg.digest = cryptd_hash_digest_enqueue; err = ahash_register_instance(tmpl, inst); From 45f31106baa3fb77f5bdebca824bbc2956c51b7b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 3 Jan 2018 11:16:24 -0800 Subject: [PATCH 428/705] crypto: mcryptd - pass through absence of ->setkey() commit fa59b92d299f2787e6bae1ff078ee0982e80211f upstream. When the mcryptd template is used to wrap an unkeyed hash algorithm, don't install a ->setkey() method to the mcryptd instance. This change is necessary for mcryptd to keep working with unkeyed hash algorithms once we start enforcing that ->setkey() is called when present. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/mcryptd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c index a14100e74754..6e9389c8bfbd 100644 --- a/crypto/mcryptd.c +++ b/crypto/mcryptd.c @@ -534,7 +534,8 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, inst->alg.finup = mcryptd_hash_finup_enqueue; inst->alg.export = mcryptd_hash_export; inst->alg.import = mcryptd_hash_import; - inst->alg.setkey = mcryptd_hash_setkey; + if (crypto_hash_alg_has_setkey(halg)) + inst->alg.setkey = mcryptd_hash_setkey; inst->alg.digest = mcryptd_hash_digest_enqueue; err = ahash_register_instance(tmpl, inst); From b93728341fb72fe0cf7ab24ada232ce0273e92db Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 3 Jan 2018 11:16:25 -0800 Subject: [PATCH 429/705] crypto: poly1305 - remove ->setkey() method commit a16e772e664b9a261424107784804cffc8894977 upstream. Since Poly1305 requires a nonce per invocation, the Linux kernel implementations of Poly1305 don't use the crypto API's keying mechanism and instead expect the key and nonce as the first 32 bytes of the data. But ->setkey() is still defined as a stub returning an error code. This prevents Poly1305 from being used through AF_ALG and will also break it completely once we start enforcing that all crypto API users (not just AF_ALG) call ->setkey() if present. Fix it by removing crypto_poly1305_setkey(), leaving ->setkey as NULL. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/poly1305_glue.c | 1 - crypto/poly1305_generic.c | 17 +++++------------ include/crypto/poly1305.h | 2 -- 3 files changed, 5 insertions(+), 15 deletions(-) diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c index e32142bc071d..28c372003e44 100644 --- a/arch/x86/crypto/poly1305_glue.c +++ b/arch/x86/crypto/poly1305_glue.c @@ -164,7 +164,6 @@ static struct shash_alg alg = { .init = poly1305_simd_init, .update = poly1305_simd_update, .final = crypto_poly1305_final, - .setkey = crypto_poly1305_setkey, .descsize = sizeof(struct poly1305_simd_desc_ctx), .base = { .cra_name = "poly1305", diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c index 2df9835dfbc0..bca99238948f 100644 --- a/crypto/poly1305_generic.c +++ b/crypto/poly1305_generic.c @@ -51,17 +51,6 @@ int crypto_poly1305_init(struct shash_desc *desc) } EXPORT_SYMBOL_GPL(crypto_poly1305_init); -int crypto_poly1305_setkey(struct crypto_shash *tfm, - const u8 *key, unsigned int keylen) -{ - /* Poly1305 requires a unique key for each tag, which implies that - * we can't set it on the tfm that gets accessed by multiple users - * simultaneously. Instead we expect the key as the first 32 bytes in - * the update() call. */ - return -ENOTSUPP; -} -EXPORT_SYMBOL_GPL(crypto_poly1305_setkey); - static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key) { /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ @@ -80,6 +69,11 @@ static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key) dctx->s[3] = le32_to_cpuvp(key + 12); } +/* + * Poly1305 requires a unique key for each tag, which implies that we can't set + * it on the tfm that gets accessed by multiple users simultaneously. Instead we + * expect the key as the first 32 bytes in the update() call. + */ unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, const u8 *src, unsigned int srclen) { @@ -285,7 +279,6 @@ static struct shash_alg poly1305_alg = { .init = crypto_poly1305_init, .update = crypto_poly1305_update, .final = crypto_poly1305_final, - .setkey = crypto_poly1305_setkey, .descsize = sizeof(struct poly1305_desc_ctx), .base = { .cra_name = "poly1305", diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h index 894df59b74e4..d586f741cab5 100644 --- a/include/crypto/poly1305.h +++ b/include/crypto/poly1305.h @@ -30,8 +30,6 @@ struct poly1305_desc_ctx { }; int crypto_poly1305_init(struct shash_desc *desc); -int crypto_poly1305_setkey(struct crypto_shash *tfm, - const u8 *key, unsigned int keylen); unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, const u8 *src, unsigned int srclen); int crypto_poly1305_update(struct shash_desc *desc, From daaa81c48402da28cc9e32ad55c48fb05e61b005 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 19 Apr 2017 15:11:00 -0700 Subject: [PATCH 430/705] nsfs: mark dentry with DCACHE_RCUACCESS commit 073c516ff73557a8f7315066856c04b50383ac34 upstream. Andrey reported a use-after-free in __ns_get_path(): spin_lock include/linux/spinlock.h:299 [inline] lockref_get_not_dead+0x19/0x80 lib/lockref.c:179 __ns_get_path+0x197/0x860 fs/nsfs.c:66 open_related_ns+0xda/0x200 fs/nsfs.c:143 sock_ioctl+0x39d/0x440 net/socket.c:1001 vfs_ioctl fs/ioctl.c:45 [inline] do_vfs_ioctl+0x1bf/0x1780 fs/ioctl.c:685 SYSC_ioctl fs/ioctl.c:700 [inline] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691 We are under rcu read lock protection at that point: rcu_read_lock(); d = atomic_long_read(&ns->stashed); if (!d) goto slow; dentry = (struct dentry *)d; if (!lockref_get_not_dead(&dentry->d_lockref)) goto slow; rcu_read_unlock(); but don't use a proper RCU API on the free path, therefore a parallel __d_free() could free it at the same time. We need to mark the stashed dentry with DCACHE_RCUACCESS so that __d_free() will be called after all readers leave RCU. Fixes: e149ed2b805f ("take the targets of /proc/*/ns/* symlinks to separate fs") Cc: Alexander Viro Cc: Andrew Morton Reported-by: Andrey Konovalov Signed-off-by: Cong Wang Signed-off-by: Linus Torvalds Cc: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- fs/nsfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nsfs.c b/fs/nsfs.c index 8718af895eab..80fdfad7c215 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -90,6 +90,7 @@ slow: return ERR_PTR(-ENOMEM); } d_instantiate(dentry, inode); + dentry->d_flags |= DCACHE_RCUACCESS; dentry->d_fsdata = (void *)ns->ops; d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry); if (d) { From e78d9fdf5ecce2830d76d54017c3d8531bf9b119 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:48:18 +0100 Subject: [PATCH 431/705] media: v4l2-ioctl.c: don't copy back the result for -ENOTTY commit 181a4a2d5a0a7b43cab08a70710d727e7764ccdd upstream. If the ioctl returned -ENOTTY, then don't bother copying back the result as there is no point. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-ioctl.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index c52d94c018bb..4510e8a37244 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -2862,8 +2862,11 @@ video_usercopy(struct file *file, unsigned int cmd, unsigned long arg, /* Handles IOCTL */ err = func(file, cmd, parg); - if (err == -ENOIOCTLCMD) + if (err == -ENOTTY || err == -ENOIOCTLCMD) { err = -ENOTTY; + goto out; + } + if (err == 0) { if (cmd == VIDIOC_DQBUF) trace_v4l2_dqbuf(video_devdata(file)->minor, parg); From f294548da6455cae64456a9dfeff1e96390171c0 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:48:19 +0100 Subject: [PATCH 432/705] media: v4l2-compat-ioctl32.c: add missing VIDIOC_PREPARE_BUF commit 3ee6d040719ae09110e5cdf24d5386abe5d1b776 upstream. The result of the VIDIOC_PREPARE_BUF ioctl was never copied back to userspace since it was missing in the switch. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index dc51dd86377d..049380bbf4cf 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -1022,6 +1022,7 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar err = put_v4l2_create32(&karg.v2crt, up); break; + case VIDIOC_PREPARE_BUF: case VIDIOC_QUERYBUF: case VIDIOC_QBUF: case VIDIOC_DQBUF: From 02129c9bc23582a48194e89cbbeb15169115b8b9 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:48:20 +0100 Subject: [PATCH 433/705] media: v4l2-compat-ioctl32.c: fix the indentation commit b7b957d429f601d6d1942122b339474f31191d75 upstream. The indentation of this source is all over the place. Fix this. This patch only changes whitespace. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 208 +++++++++--------- 1 file changed, 104 insertions(+), 104 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index 049380bbf4cf..57211c7fc491 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -48,11 +48,11 @@ struct v4l2_window32 { static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user *up) { if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_window32)) || - copy_from_user(&kp->w, &up->w, sizeof(up->w)) || - get_user(kp->field, &up->field) || - get_user(kp->chromakey, &up->chromakey) || - get_user(kp->clipcount, &up->clipcount)) - return -EFAULT; + copy_from_user(&kp->w, &up->w, sizeof(up->w)) || + get_user(kp->field, &up->field) || + get_user(kp->chromakey, &up->chromakey) || + get_user(kp->clipcount, &up->clipcount)) + return -EFAULT; if (kp->clipcount > 2048) return -EINVAL; if (kp->clipcount) { @@ -82,10 +82,10 @@ static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user static int put_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user *up) { if (copy_to_user(&up->w, &kp->w, sizeof(kp->w)) || - put_user(kp->field, &up->field) || - put_user(kp->chromakey, &up->chromakey) || - put_user(kp->clipcount, &up->clipcount)) - return -EFAULT; + put_user(kp->field, &up->field) || + put_user(kp->chromakey, &up->chromakey) || + put_user(kp->clipcount, &up->clipcount)) + return -EFAULT; return 0; } @@ -97,7 +97,7 @@ static inline int get_v4l2_pix_format(struct v4l2_pix_format *kp, struct v4l2_pi } static inline int get_v4l2_pix_format_mplane(struct v4l2_pix_format_mplane *kp, - struct v4l2_pix_format_mplane __user *up) + struct v4l2_pix_format_mplane __user *up) { if (copy_from_user(kp, up, sizeof(struct v4l2_pix_format_mplane))) return -EFAULT; @@ -112,7 +112,7 @@ static inline int put_v4l2_pix_format(struct v4l2_pix_format *kp, struct v4l2_pi } static inline int put_v4l2_pix_format_mplane(struct v4l2_pix_format_mplane *kp, - struct v4l2_pix_format_mplane __user *up) + struct v4l2_pix_format_mplane __user *up) { if (copy_to_user(up, kp, sizeof(struct v4l2_pix_format_mplane))) return -EFAULT; @@ -218,7 +218,7 @@ static int __get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __us return get_v4l2_sdr_format(&kp->fmt.sdr, &up->fmt.sdr); default: pr_info("compat_ioctl32: unexpected VIDIOC_FMT type %d\n", - kp->type); + kp->type); return -EINVAL; } } @@ -265,7 +265,7 @@ static int __put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __us return put_v4l2_sdr_format(&kp->fmt.sdr, &up->fmt.sdr); default: pr_info("compat_ioctl32: unexpected VIDIOC_FMT type %d\n", - kp->type); + kp->type); return -EINVAL; } } @@ -299,7 +299,7 @@ static int get_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32 { /* other fields are not set by the user, nor used by the driver */ if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_standard32)) || - get_user(kp->index, &up->index)) + get_user(kp->index, &up->index)) return -EFAULT; return 0; } @@ -307,13 +307,13 @@ static int get_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32 static int put_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32 __user *up) { if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_standard32)) || - put_user(kp->index, &up->index) || - put_user(kp->id, &up->id) || - copy_to_user(up->name, kp->name, 24) || - copy_to_user(&up->frameperiod, &kp->frameperiod, sizeof(kp->frameperiod)) || - put_user(kp->framelines, &up->framelines) || - copy_to_user(up->reserved, kp->reserved, 4 * sizeof(__u32))) - return -EFAULT; + put_user(kp->index, &up->index) || + put_user(kp->id, &up->id) || + copy_to_user(up->name, kp->name, 24) || + copy_to_user(&up->frameperiod, &kp->frameperiod, sizeof(kp->frameperiod)) || + put_user(kp->framelines, &up->framelines) || + copy_to_user(up->reserved, kp->reserved, 4 * sizeof(__u32))) + return -EFAULT; return 0; } @@ -353,14 +353,14 @@ struct v4l2_buffer32 { }; static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __user *up32, - enum v4l2_memory memory) + enum v4l2_memory memory) { void __user *up_pln; compat_long_t p; if (copy_in_user(up, up32, 2 * sizeof(__u32)) || - copy_in_user(&up->data_offset, &up32->data_offset, - sizeof(__u32))) + copy_in_user(&up->data_offset, &up32->data_offset, + sizeof(__u32))) return -EFAULT; if (memory == V4L2_MEMORY_USERPTR) { @@ -374,7 +374,7 @@ static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ return -EFAULT; } else { if (copy_in_user(&up->m.mem_offset, &up32->m.mem_offset, - sizeof(__u32))) + sizeof(__u32))) return -EFAULT; } @@ -382,23 +382,23 @@ static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ } static int put_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __user *up32, - enum v4l2_memory memory) + enum v4l2_memory memory) { if (copy_in_user(up32, up, 2 * sizeof(__u32)) || - copy_in_user(&up32->data_offset, &up->data_offset, - sizeof(__u32))) + copy_in_user(&up32->data_offset, &up->data_offset, + sizeof(__u32))) return -EFAULT; /* For MMAP, driver might've set up the offset, so copy it back. * USERPTR stays the same (was userspace-provided), so no copying. */ if (memory == V4L2_MEMORY_MMAP) if (copy_in_user(&up32->m.mem_offset, &up->m.mem_offset, - sizeof(__u32))) + sizeof(__u32))) return -EFAULT; /* For DMABUF, driver might've set up the fd, so copy it back. */ if (memory == V4L2_MEMORY_DMABUF) if (copy_in_user(&up32->m.fd, &up->m.fd, - sizeof(int))) + sizeof(int))) return -EFAULT; return 0; @@ -413,19 +413,19 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user int ret; if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_buffer32)) || - get_user(kp->index, &up->index) || - get_user(kp->type, &up->type) || - get_user(kp->flags, &up->flags) || - get_user(kp->memory, &up->memory) || - get_user(kp->length, &up->length)) - return -EFAULT; + get_user(kp->index, &up->index) || + get_user(kp->type, &up->type) || + get_user(kp->flags, &up->flags) || + get_user(kp->memory, &up->memory) || + get_user(kp->length, &up->length)) + return -EFAULT; if (V4L2_TYPE_IS_OUTPUT(kp->type)) if (get_user(kp->bytesused, &up->bytesused) || - get_user(kp->field, &up->field) || - get_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || - get_user(kp->timestamp.tv_usec, - &up->timestamp.tv_usec)) + get_user(kp->field, &up->field) || + get_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || + get_user(kp->timestamp.tv_usec, + &up->timestamp.tv_usec)) return -EFAULT; if (V4L2_TYPE_IS_MULTIPLANAR(kp->type)) { @@ -442,13 +442,13 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user uplane32 = compat_ptr(p); if (!access_ok(VERIFY_READ, uplane32, - num_planes * sizeof(struct v4l2_plane32))) + num_planes * sizeof(struct v4l2_plane32))) return -EFAULT; /* We don't really care if userspace decides to kill itself * by passing a very big num_planes value */ uplane = compat_alloc_user_space(num_planes * - sizeof(struct v4l2_plane)); + sizeof(struct v4l2_plane)); kp->m.planes = (__force struct v4l2_plane *)uplane; while (--num_planes >= 0) { @@ -466,12 +466,12 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user break; case V4L2_MEMORY_USERPTR: { - compat_long_t tmp; + compat_long_t tmp; - if (get_user(tmp, &up->m.userptr)) - return -EFAULT; + if (get_user(tmp, &up->m.userptr)) + return -EFAULT; - kp->m.userptr = (unsigned long)compat_ptr(tmp); + kp->m.userptr = (unsigned long)compat_ptr(tmp); } break; case V4L2_MEMORY_OVERLAY: @@ -497,22 +497,22 @@ static int put_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user int ret; if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_buffer32)) || - put_user(kp->index, &up->index) || - put_user(kp->type, &up->type) || - put_user(kp->flags, &up->flags) || - put_user(kp->memory, &up->memory)) - return -EFAULT; + put_user(kp->index, &up->index) || + put_user(kp->type, &up->type) || + put_user(kp->flags, &up->flags) || + put_user(kp->memory, &up->memory)) + return -EFAULT; if (put_user(kp->bytesused, &up->bytesused) || - put_user(kp->field, &up->field) || - put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || - put_user(kp->timestamp.tv_usec, &up->timestamp.tv_usec) || - copy_to_user(&up->timecode, &kp->timecode, sizeof(struct v4l2_timecode)) || - put_user(kp->sequence, &up->sequence) || - put_user(kp->reserved2, &up->reserved2) || - put_user(kp->reserved, &up->reserved) || - put_user(kp->length, &up->length)) - return -EFAULT; + put_user(kp->field, &up->field) || + put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || + put_user(kp->timestamp.tv_usec, &up->timestamp.tv_usec) || + copy_to_user(&up->timecode, &kp->timecode, sizeof(struct v4l2_timecode)) || + put_user(kp->sequence, &up->sequence) || + put_user(kp->reserved2, &up->reserved2) || + put_user(kp->reserved, &up->reserved) || + put_user(kp->length, &up->length)) + return -EFAULT; if (V4L2_TYPE_IS_MULTIPLANAR(kp->type)) { num_planes = kp->length; @@ -576,11 +576,11 @@ static int get_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_frame u32 tmp; if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_framebuffer32)) || - get_user(tmp, &up->base) || - get_user(kp->capability, &up->capability) || - get_user(kp->flags, &up->flags) || - copy_from_user(&kp->fmt, &up->fmt, sizeof(up->fmt))) - return -EFAULT; + get_user(tmp, &up->base) || + get_user(kp->capability, &up->capability) || + get_user(kp->flags, &up->flags) || + copy_from_user(&kp->fmt, &up->fmt, sizeof(up->fmt))) + return -EFAULT; kp->base = (__force void *)compat_ptr(tmp); return 0; } @@ -590,11 +590,11 @@ static int put_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_frame u32 tmp = (u32)((unsigned long)kp->base); if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_framebuffer32)) || - put_user(tmp, &up->base) || - put_user(kp->capability, &up->capability) || - put_user(kp->flags, &up->flags) || - copy_to_user(&up->fmt, &kp->fmt, sizeof(up->fmt))) - return -EFAULT; + put_user(tmp, &up->base) || + put_user(kp->capability, &up->capability) || + put_user(kp->flags, &up->flags) || + copy_to_user(&up->fmt, &kp->fmt, sizeof(up->fmt))) + return -EFAULT; return 0; } @@ -669,12 +669,12 @@ static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext compat_caddr_t p; if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_ext_controls32)) || - get_user(kp->which, &up->which) || - get_user(kp->count, &up->count) || - get_user(kp->error_idx, &up->error_idx) || - copy_from_user(kp->reserved, up->reserved, - sizeof(kp->reserved))) - return -EFAULT; + get_user(kp->which, &up->which) || + get_user(kp->count, &up->count) || + get_user(kp->error_idx, &up->error_idx) || + copy_from_user(kp->reserved, up->reserved, + sizeof(kp->reserved))) + return -EFAULT; n = kp->count; if (n == 0) { kp->controls = NULL; @@ -684,7 +684,7 @@ static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext return -EFAULT; ucontrols = compat_ptr(p); if (!access_ok(VERIFY_READ, ucontrols, - n * sizeof(struct v4l2_ext_control32))) + n * sizeof(struct v4l2_ext_control32))) return -EFAULT; kcontrols = compat_alloc_user_space(n * sizeof(struct v4l2_ext_control)); kp->controls = (__force struct v4l2_ext_control *)kcontrols; @@ -719,11 +719,11 @@ static int put_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext compat_caddr_t p; if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_ext_controls32)) || - put_user(kp->which, &up->which) || - put_user(kp->count, &up->count) || - put_user(kp->error_idx, &up->error_idx) || - copy_to_user(up->reserved, kp->reserved, sizeof(up->reserved))) - return -EFAULT; + put_user(kp->which, &up->which) || + put_user(kp->count, &up->count) || + put_user(kp->error_idx, &up->error_idx) || + copy_to_user(up->reserved, kp->reserved, sizeof(up->reserved))) + return -EFAULT; if (!kp->count) return 0; @@ -731,7 +731,7 @@ static int put_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext return -EFAULT; ucontrols = compat_ptr(p); if (!access_ok(VERIFY_WRITE, ucontrols, - n * sizeof(struct v4l2_ext_control32))) + n * sizeof(struct v4l2_ext_control32))) return -EFAULT; while (--n >= 0) { @@ -769,15 +769,15 @@ struct v4l2_event32 { static int put_v4l2_event32(struct v4l2_event *kp, struct v4l2_event32 __user *up) { if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_event32)) || - put_user(kp->type, &up->type) || - copy_to_user(&up->u, &kp->u, sizeof(kp->u)) || - put_user(kp->pending, &up->pending) || - put_user(kp->sequence, &up->sequence) || - put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || - put_user(kp->timestamp.tv_nsec, &up->timestamp.tv_nsec) || - put_user(kp->id, &up->id) || - copy_to_user(up->reserved, kp->reserved, 8 * sizeof(__u32))) - return -EFAULT; + put_user(kp->type, &up->type) || + copy_to_user(&up->u, &kp->u, sizeof(kp->u)) || + put_user(kp->pending, &up->pending) || + put_user(kp->sequence, &up->sequence) || + put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || + put_user(kp->timestamp.tv_nsec, &up->timestamp.tv_nsec) || + put_user(kp->id, &up->id) || + copy_to_user(up->reserved, kp->reserved, 8 * sizeof(__u32))) + return -EFAULT; return 0; } @@ -794,12 +794,12 @@ static int get_v4l2_edid32(struct v4l2_edid *kp, struct v4l2_edid32 __user *up) u32 tmp; if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_edid32)) || - get_user(kp->pad, &up->pad) || - get_user(kp->start_block, &up->start_block) || - get_user(kp->blocks, &up->blocks) || - get_user(tmp, &up->edid) || - copy_from_user(kp->reserved, up->reserved, sizeof(kp->reserved))) - return -EFAULT; + get_user(kp->pad, &up->pad) || + get_user(kp->start_block, &up->start_block) || + get_user(kp->blocks, &up->blocks) || + get_user(tmp, &up->edid) || + copy_from_user(kp->reserved, up->reserved, sizeof(kp->reserved))) + return -EFAULT; kp->edid = (__force u8 *)compat_ptr(tmp); return 0; } @@ -809,12 +809,12 @@ static int put_v4l2_edid32(struct v4l2_edid *kp, struct v4l2_edid32 __user *up) u32 tmp = (u32)((unsigned long)kp->edid); if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_edid32)) || - put_user(kp->pad, &up->pad) || - put_user(kp->start_block, &up->start_block) || - put_user(kp->blocks, &up->blocks) || - put_user(tmp, &up->edid) || - copy_to_user(up->reserved, kp->reserved, sizeof(up->reserved))) - return -EFAULT; + put_user(kp->pad, &up->pad) || + put_user(kp->start_block, &up->start_block) || + put_user(kp->blocks, &up->blocks) || + put_user(tmp, &up->edid) || + copy_to_user(up->reserved, kp->reserved, sizeof(up->reserved))) + return -EFAULT; return 0; } From 81e0acf07015dbd3e0b45e8f8a053d64b804bb46 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:48:21 +0100 Subject: [PATCH 434/705] media: v4l2-compat-ioctl32.c: move 'helper' functions to __get/put_v4l2_format32 commit 486c521510c44a04cd756a9267e7d1e271c8a4ba upstream. These helper functions do not really help. Move the code to the __get/put_v4l2_format32 functions. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 104 ++++-------------- 1 file changed, 20 insertions(+), 84 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index 57211c7fc491..64bc493edd7f 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -89,78 +89,6 @@ static int put_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user return 0; } -static inline int get_v4l2_pix_format(struct v4l2_pix_format *kp, struct v4l2_pix_format __user *up) -{ - if (copy_from_user(kp, up, sizeof(struct v4l2_pix_format))) - return -EFAULT; - return 0; -} - -static inline int get_v4l2_pix_format_mplane(struct v4l2_pix_format_mplane *kp, - struct v4l2_pix_format_mplane __user *up) -{ - if (copy_from_user(kp, up, sizeof(struct v4l2_pix_format_mplane))) - return -EFAULT; - return 0; -} - -static inline int put_v4l2_pix_format(struct v4l2_pix_format *kp, struct v4l2_pix_format __user *up) -{ - if (copy_to_user(up, kp, sizeof(struct v4l2_pix_format))) - return -EFAULT; - return 0; -} - -static inline int put_v4l2_pix_format_mplane(struct v4l2_pix_format_mplane *kp, - struct v4l2_pix_format_mplane __user *up) -{ - if (copy_to_user(up, kp, sizeof(struct v4l2_pix_format_mplane))) - return -EFAULT; - return 0; -} - -static inline int get_v4l2_vbi_format(struct v4l2_vbi_format *kp, struct v4l2_vbi_format __user *up) -{ - if (copy_from_user(kp, up, sizeof(struct v4l2_vbi_format))) - return -EFAULT; - return 0; -} - -static inline int put_v4l2_vbi_format(struct v4l2_vbi_format *kp, struct v4l2_vbi_format __user *up) -{ - if (copy_to_user(up, kp, sizeof(struct v4l2_vbi_format))) - return -EFAULT; - return 0; -} - -static inline int get_v4l2_sliced_vbi_format(struct v4l2_sliced_vbi_format *kp, struct v4l2_sliced_vbi_format __user *up) -{ - if (copy_from_user(kp, up, sizeof(struct v4l2_sliced_vbi_format))) - return -EFAULT; - return 0; -} - -static inline int put_v4l2_sliced_vbi_format(struct v4l2_sliced_vbi_format *kp, struct v4l2_sliced_vbi_format __user *up) -{ - if (copy_to_user(up, kp, sizeof(struct v4l2_sliced_vbi_format))) - return -EFAULT; - return 0; -} - -static inline int get_v4l2_sdr_format(struct v4l2_sdr_format *kp, struct v4l2_sdr_format __user *up) -{ - if (copy_from_user(kp, up, sizeof(struct v4l2_sdr_format))) - return -EFAULT; - return 0; -} - -static inline int put_v4l2_sdr_format(struct v4l2_sdr_format *kp, struct v4l2_sdr_format __user *up) -{ - if (copy_to_user(up, kp, sizeof(struct v4l2_sdr_format))) - return -EFAULT; - return 0; -} - struct v4l2_format32 { __u32 type; /* enum v4l2_buf_type */ union { @@ -199,23 +127,27 @@ static int __get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __us switch (kp->type) { case V4L2_BUF_TYPE_VIDEO_CAPTURE: case V4L2_BUF_TYPE_VIDEO_OUTPUT: - return get_v4l2_pix_format(&kp->fmt.pix, &up->fmt.pix); + return copy_from_user(&kp->fmt.pix, &up->fmt.pix, + sizeof(kp->fmt.pix)) ? -EFAULT : 0; case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: - return get_v4l2_pix_format_mplane(&kp->fmt.pix_mp, - &up->fmt.pix_mp); + return copy_from_user(&kp->fmt.pix_mp, &up->fmt.pix_mp, + sizeof(kp->fmt.pix_mp)) ? -EFAULT : 0; case V4L2_BUF_TYPE_VIDEO_OVERLAY: case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY: return get_v4l2_window32(&kp->fmt.win, &up->fmt.win); case V4L2_BUF_TYPE_VBI_CAPTURE: case V4L2_BUF_TYPE_VBI_OUTPUT: - return get_v4l2_vbi_format(&kp->fmt.vbi, &up->fmt.vbi); + return copy_from_user(&kp->fmt.vbi, &up->fmt.vbi, + sizeof(kp->fmt.vbi)) ? -EFAULT : 0; case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE: case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT: - return get_v4l2_sliced_vbi_format(&kp->fmt.sliced, &up->fmt.sliced); + return copy_from_user(&kp->fmt.sliced, &up->fmt.sliced, + sizeof(kp->fmt.sliced)) ? -EFAULT : 0; case V4L2_BUF_TYPE_SDR_CAPTURE: case V4L2_BUF_TYPE_SDR_OUTPUT: - return get_v4l2_sdr_format(&kp->fmt.sdr, &up->fmt.sdr); + return copy_from_user(&kp->fmt.sdr, &up->fmt.sdr, + sizeof(kp->fmt.sdr)) ? -EFAULT : 0; default: pr_info("compat_ioctl32: unexpected VIDIOC_FMT type %d\n", kp->type); @@ -246,23 +178,27 @@ static int __put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __us switch (kp->type) { case V4L2_BUF_TYPE_VIDEO_CAPTURE: case V4L2_BUF_TYPE_VIDEO_OUTPUT: - return put_v4l2_pix_format(&kp->fmt.pix, &up->fmt.pix); + return copy_to_user(&up->fmt.pix, &kp->fmt.pix, + sizeof(kp->fmt.pix)) ? -EFAULT : 0; case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: - return put_v4l2_pix_format_mplane(&kp->fmt.pix_mp, - &up->fmt.pix_mp); + return copy_to_user(&up->fmt.pix_mp, &kp->fmt.pix_mp, + sizeof(kp->fmt.pix_mp)) ? -EFAULT : 0; case V4L2_BUF_TYPE_VIDEO_OVERLAY: case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY: return put_v4l2_window32(&kp->fmt.win, &up->fmt.win); case V4L2_BUF_TYPE_VBI_CAPTURE: case V4L2_BUF_TYPE_VBI_OUTPUT: - return put_v4l2_vbi_format(&kp->fmt.vbi, &up->fmt.vbi); + return copy_to_user(&up->fmt.vbi, &kp->fmt.vbi, + sizeof(kp->fmt.vbi)) ? -EFAULT : 0; case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE: case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT: - return put_v4l2_sliced_vbi_format(&kp->fmt.sliced, &up->fmt.sliced); + return copy_to_user(&up->fmt.sliced, &kp->fmt.sliced, + sizeof(kp->fmt.sliced)) ? -EFAULT : 0; case V4L2_BUF_TYPE_SDR_CAPTURE: case V4L2_BUF_TYPE_SDR_OUTPUT: - return put_v4l2_sdr_format(&kp->fmt.sdr, &up->fmt.sdr); + return copy_to_user(&up->fmt.sdr, &kp->fmt.sdr, + sizeof(kp->fmt.sdr)) ? -EFAULT : 0; default: pr_info("compat_ioctl32: unexpected VIDIOC_FMT type %d\n", kp->type); From daff4d009f4f7fb3b1f041b76c0782cb96d99d56 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:48:22 +0100 Subject: [PATCH 435/705] media: v4l2-compat-ioctl32.c: avoid sizeof(type) commit 333b1e9f96ce05f7498b581509bb30cde03018bf upstream. Instead of doing sizeof(struct foo) use sizeof(*up). There even were cases where 4 * sizeof(__u32) was used instead of sizeof(kp->reserved), which is very dangerous when the size of the reserved array changes. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 77 +++++++++---------- 1 file changed, 36 insertions(+), 41 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index 64bc493edd7f..64e3977ab851 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -47,7 +47,7 @@ struct v4l2_window32 { static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user *up) { - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_window32)) || + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || copy_from_user(&kp->w, &up->w, sizeof(up->w)) || get_user(kp->field, &up->field) || get_user(kp->chromakey, &up->chromakey) || @@ -64,7 +64,7 @@ static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user if (get_user(p, &up->clips)) return -EFAULT; uclips = compat_ptr(p); - kclips = compat_alloc_user_space(n * sizeof(struct v4l2_clip)); + kclips = compat_alloc_user_space(n * sizeof(*kclips)); kp->clips = kclips; while (--n >= 0) { if (copy_in_user(&kclips->c, &uclips->c, sizeof(uclips->c))) @@ -157,14 +157,14 @@ static int __get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __us static int get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user *up) { - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_format32))) + if (!access_ok(VERIFY_READ, up, sizeof(*up))) return -EFAULT; return __get_v4l2_format32(kp, up); } static int get_v4l2_create32(struct v4l2_create_buffers *kp, struct v4l2_create_buffers32 __user *up) { - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_create_buffers32)) || + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || copy_from_user(kp, up, offsetof(struct v4l2_create_buffers32, format))) return -EFAULT; return __get_v4l2_format32(&kp->format, &up->format); @@ -208,14 +208,14 @@ static int __put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __us static int put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user *up) { - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_format32))) + if (!access_ok(VERIFY_WRITE, up, sizeof(*up))) return -EFAULT; return __put_v4l2_format32(kp, up); } static int put_v4l2_create32(struct v4l2_create_buffers *kp, struct v4l2_create_buffers32 __user *up) { - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_create_buffers32)) || + if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || copy_to_user(up, kp, offsetof(struct v4l2_create_buffers32, format)) || copy_to_user(up->reserved, kp->reserved, sizeof(kp->reserved))) return -EFAULT; @@ -234,7 +234,7 @@ struct v4l2_standard32 { static int get_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32 __user *up) { /* other fields are not set by the user, nor used by the driver */ - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_standard32)) || + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || get_user(kp->index, &up->index)) return -EFAULT; return 0; @@ -242,13 +242,13 @@ static int get_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32 static int put_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32 __user *up) { - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_standard32)) || + if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || put_user(kp->index, &up->index) || put_user(kp->id, &up->id) || - copy_to_user(up->name, kp->name, 24) || + copy_to_user(up->name, kp->name, sizeof(up->name)) || copy_to_user(&up->frameperiod, &kp->frameperiod, sizeof(kp->frameperiod)) || put_user(kp->framelines, &up->framelines) || - copy_to_user(up->reserved, kp->reserved, 4 * sizeof(__u32))) + copy_to_user(up->reserved, kp->reserved, sizeof(kp->reserved))) return -EFAULT; return 0; } @@ -296,7 +296,7 @@ static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ if (copy_in_user(up, up32, 2 * sizeof(__u32)) || copy_in_user(&up->data_offset, &up32->data_offset, - sizeof(__u32))) + sizeof(up->data_offset))) return -EFAULT; if (memory == V4L2_MEMORY_USERPTR) { @@ -306,11 +306,11 @@ static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ if (put_user((unsigned long)up_pln, &up->m.userptr)) return -EFAULT; } else if (memory == V4L2_MEMORY_DMABUF) { - if (copy_in_user(&up->m.fd, &up32->m.fd, sizeof(int))) + if (copy_in_user(&up->m.fd, &up32->m.fd, sizeof(up32->m.fd))) return -EFAULT; } else { if (copy_in_user(&up->m.mem_offset, &up32->m.mem_offset, - sizeof(__u32))) + sizeof(up32->m.mem_offset))) return -EFAULT; } @@ -322,19 +322,19 @@ static int put_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ { if (copy_in_user(up32, up, 2 * sizeof(__u32)) || copy_in_user(&up32->data_offset, &up->data_offset, - sizeof(__u32))) + sizeof(up->data_offset))) return -EFAULT; /* For MMAP, driver might've set up the offset, so copy it back. * USERPTR stays the same (was userspace-provided), so no copying. */ if (memory == V4L2_MEMORY_MMAP) if (copy_in_user(&up32->m.mem_offset, &up->m.mem_offset, - sizeof(__u32))) + sizeof(up->m.mem_offset))) return -EFAULT; /* For DMABUF, driver might've set up the fd, so copy it back. */ if (memory == V4L2_MEMORY_DMABUF) if (copy_in_user(&up32->m.fd, &up->m.fd, - sizeof(int))) + sizeof(up->m.fd))) return -EFAULT; return 0; @@ -348,7 +348,7 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user int num_planes; int ret; - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_buffer32)) || + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || get_user(kp->index, &up->index) || get_user(kp->type, &up->type) || get_user(kp->flags, &up->flags) || @@ -360,8 +360,7 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user if (get_user(kp->bytesused, &up->bytesused) || get_user(kp->field, &up->field) || get_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || - get_user(kp->timestamp.tv_usec, - &up->timestamp.tv_usec)) + get_user(kp->timestamp.tv_usec, &up->timestamp.tv_usec)) return -EFAULT; if (V4L2_TYPE_IS_MULTIPLANAR(kp->type)) { @@ -378,13 +377,12 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user uplane32 = compat_ptr(p); if (!access_ok(VERIFY_READ, uplane32, - num_planes * sizeof(struct v4l2_plane32))) + num_planes * sizeof(*uplane32))) return -EFAULT; /* We don't really care if userspace decides to kill itself * by passing a very big num_planes value */ - uplane = compat_alloc_user_space(num_planes * - sizeof(struct v4l2_plane)); + uplane = compat_alloc_user_space(num_planes * sizeof(*uplane)); kp->m.planes = (__force struct v4l2_plane *)uplane; while (--num_planes >= 0) { @@ -432,7 +430,7 @@ static int put_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user int num_planes; int ret; - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_buffer32)) || + if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || put_user(kp->index, &up->index) || put_user(kp->type, &up->type) || put_user(kp->flags, &up->flags) || @@ -443,7 +441,7 @@ static int put_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user put_user(kp->field, &up->field) || put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || put_user(kp->timestamp.tv_usec, &up->timestamp.tv_usec) || - copy_to_user(&up->timecode, &kp->timecode, sizeof(struct v4l2_timecode)) || + copy_to_user(&up->timecode, &kp->timecode, sizeof(kp->timecode)) || put_user(kp->sequence, &up->sequence) || put_user(kp->reserved2, &up->reserved2) || put_user(kp->reserved, &up->reserved) || @@ -511,7 +509,7 @@ static int get_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_frame { u32 tmp; - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_framebuffer32)) || + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || get_user(tmp, &up->base) || get_user(kp->capability, &up->capability) || get_user(kp->flags, &up->flags) || @@ -525,7 +523,7 @@ static int put_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_frame { u32 tmp = (u32)((unsigned long)kp->base); - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_framebuffer32)) || + if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || put_user(tmp, &up->base) || put_user(kp->capability, &up->capability) || put_user(kp->flags, &up->flags) || @@ -549,14 +547,14 @@ struct v4l2_input32 { Otherwise it is identical to the 32-bit version. */ static inline int get_v4l2_input32(struct v4l2_input *kp, struct v4l2_input32 __user *up) { - if (copy_from_user(kp, up, sizeof(struct v4l2_input32))) + if (copy_from_user(kp, up, sizeof(*up))) return -EFAULT; return 0; } static inline int put_v4l2_input32(struct v4l2_input *kp, struct v4l2_input32 __user *up) { - if (copy_to_user(up, kp, sizeof(struct v4l2_input32))) + if (copy_to_user(up, kp, sizeof(*up))) return -EFAULT; return 0; } @@ -604,12 +602,11 @@ static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext int n; compat_caddr_t p; - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_ext_controls32)) || + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || get_user(kp->which, &up->which) || get_user(kp->count, &up->count) || get_user(kp->error_idx, &up->error_idx) || - copy_from_user(kp->reserved, up->reserved, - sizeof(kp->reserved))) + copy_from_user(kp->reserved, up->reserved, sizeof(kp->reserved))) return -EFAULT; n = kp->count; if (n == 0) { @@ -619,10 +616,9 @@ static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext if (get_user(p, &up->controls)) return -EFAULT; ucontrols = compat_ptr(p); - if (!access_ok(VERIFY_READ, ucontrols, - n * sizeof(struct v4l2_ext_control32))) + if (!access_ok(VERIFY_READ, ucontrols, n * sizeof(*ucontrols))) return -EFAULT; - kcontrols = compat_alloc_user_space(n * sizeof(struct v4l2_ext_control)); + kcontrols = compat_alloc_user_space(n * sizeof(*kcontrols)); kp->controls = (__force struct v4l2_ext_control *)kcontrols; while (--n >= 0) { u32 id; @@ -654,7 +650,7 @@ static int put_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext int n = kp->count; compat_caddr_t p; - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_ext_controls32)) || + if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || put_user(kp->which, &up->which) || put_user(kp->count, &up->count) || put_user(kp->error_idx, &up->error_idx) || @@ -666,8 +662,7 @@ static int put_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext if (get_user(p, &up->controls)) return -EFAULT; ucontrols = compat_ptr(p); - if (!access_ok(VERIFY_WRITE, ucontrols, - n * sizeof(struct v4l2_ext_control32))) + if (!access_ok(VERIFY_WRITE, ucontrols, n * sizeof(*ucontrols))) return -EFAULT; while (--n >= 0) { @@ -704,7 +699,7 @@ struct v4l2_event32 { static int put_v4l2_event32(struct v4l2_event *kp, struct v4l2_event32 __user *up) { - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_event32)) || + if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || put_user(kp->type, &up->type) || copy_to_user(&up->u, &kp->u, sizeof(kp->u)) || put_user(kp->pending, &up->pending) || @@ -712,7 +707,7 @@ static int put_v4l2_event32(struct v4l2_event *kp, struct v4l2_event32 __user *u put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || put_user(kp->timestamp.tv_nsec, &up->timestamp.tv_nsec) || put_user(kp->id, &up->id) || - copy_to_user(up->reserved, kp->reserved, 8 * sizeof(__u32))) + copy_to_user(up->reserved, kp->reserved, sizeof(kp->reserved))) return -EFAULT; return 0; } @@ -729,7 +724,7 @@ static int get_v4l2_edid32(struct v4l2_edid *kp, struct v4l2_edid32 __user *up) { u32 tmp; - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_edid32)) || + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || get_user(kp->pad, &up->pad) || get_user(kp->start_block, &up->start_block) || get_user(kp->blocks, &up->blocks) || @@ -744,7 +739,7 @@ static int put_v4l2_edid32(struct v4l2_edid *kp, struct v4l2_edid32 __user *up) { u32 tmp = (u32)((unsigned long)kp->edid); - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_edid32)) || + if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || put_user(kp->pad, &up->pad) || put_user(kp->start_block, &up->start_block) || put_user(kp->blocks, &up->blocks) || From eec955463de3259c0db5b38952f79c3e39e03f65 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:48:23 +0100 Subject: [PATCH 436/705] media: v4l2-compat-ioctl32.c: copy m.userptr in put_v4l2_plane32 commit 8ed5a59dcb47a6f76034ee760b36e089f3e82529 upstream. The struct v4l2_plane32 should set m.userptr as well. The same happens in v4l2_buffer32 and v4l2-compliance tests for this. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 47 +++++++++++-------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index 64e3977ab851..2ddeecdababe 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -299,19 +299,24 @@ static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ sizeof(up->data_offset))) return -EFAULT; - if (memory == V4L2_MEMORY_USERPTR) { + switch (memory) { + case V4L2_MEMORY_MMAP: + case V4L2_MEMORY_OVERLAY: + if (copy_in_user(&up->m.mem_offset, &up32->m.mem_offset, + sizeof(up32->m.mem_offset))) + return -EFAULT; + break; + case V4L2_MEMORY_USERPTR: if (get_user(p, &up32->m.userptr)) return -EFAULT; up_pln = compat_ptr(p); if (put_user((unsigned long)up_pln, &up->m.userptr)) return -EFAULT; - } else if (memory == V4L2_MEMORY_DMABUF) { + break; + case V4L2_MEMORY_DMABUF: if (copy_in_user(&up->m.fd, &up32->m.fd, sizeof(up32->m.fd))) return -EFAULT; - } else { - if (copy_in_user(&up->m.mem_offset, &up32->m.mem_offset, - sizeof(up32->m.mem_offset))) - return -EFAULT; + break; } return 0; @@ -320,22 +325,32 @@ static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ static int put_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __user *up32, enum v4l2_memory memory) { + unsigned long p; + if (copy_in_user(up32, up, 2 * sizeof(__u32)) || copy_in_user(&up32->data_offset, &up->data_offset, sizeof(up->data_offset))) return -EFAULT; - /* For MMAP, driver might've set up the offset, so copy it back. - * USERPTR stays the same (was userspace-provided), so no copying. */ - if (memory == V4L2_MEMORY_MMAP) + switch (memory) { + case V4L2_MEMORY_MMAP: + case V4L2_MEMORY_OVERLAY: if (copy_in_user(&up32->m.mem_offset, &up->m.mem_offset, sizeof(up->m.mem_offset))) return -EFAULT; - /* For DMABUF, driver might've set up the fd, so copy it back. */ - if (memory == V4L2_MEMORY_DMABUF) + break; + case V4L2_MEMORY_USERPTR: + if (get_user(p, &up->m.userptr) || + put_user((compat_ulong_t)ptr_to_compat((__force void *)p), + &up32->m.userptr)) + return -EFAULT; + break; + case V4L2_MEMORY_DMABUF: if (copy_in_user(&up32->m.fd, &up->m.fd, sizeof(up->m.fd))) return -EFAULT; + break; + } return 0; } @@ -395,6 +410,7 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user } else { switch (kp->memory) { case V4L2_MEMORY_MMAP: + case V4L2_MEMORY_OVERLAY: if (get_user(kp->m.offset, &up->m.offset)) return -EFAULT; break; @@ -408,10 +424,6 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user kp->m.userptr = (unsigned long)compat_ptr(tmp); } break; - case V4L2_MEMORY_OVERLAY: - if (get_user(kp->m.offset, &up->m.offset)) - return -EFAULT; - break; case V4L2_MEMORY_DMABUF: if (get_user(kp->m.fd, &up->m.fd)) return -EFAULT; @@ -468,6 +480,7 @@ static int put_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user } else { switch (kp->memory) { case V4L2_MEMORY_MMAP: + case V4L2_MEMORY_OVERLAY: if (put_user(kp->m.offset, &up->m.offset)) return -EFAULT; break; @@ -475,10 +488,6 @@ static int put_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user if (put_user(kp->m.userptr, &up->m.userptr)) return -EFAULT; break; - case V4L2_MEMORY_OVERLAY: - if (put_user(kp->m.offset, &up->m.offset)) - return -EFAULT; - break; case V4L2_MEMORY_DMABUF: if (put_user(kp->m.fd, &up->m.fd)) return -EFAULT; From 9a7cd41be3ade82208da3a9792a0bca175f6aafc Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:48:24 +0100 Subject: [PATCH 437/705] media: v4l2-compat-ioctl32.c: fix ctrl_is_pointer commit b8c601e8af2d08f733d74defa8465303391bb930 upstream. ctrl_is_pointer just hardcoded two known string controls, but that caused problems when using e.g. custom controls that use a pointer for the payload. Reimplement this function: it now finds the v4l2_ctrl (if the driver uses the control framework) or it calls vidioc_query_ext_ctrl (if the driver implements that directly). In both cases it can now check if the control is a pointer control or not. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 57 ++++++++++++------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index 2ddeecdababe..c8dd39884f6e 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include static long native_ioctl(struct file *file, unsigned int cmd, unsigned long arg) @@ -587,24 +589,39 @@ struct v4l2_ext_control32 { }; } __attribute__ ((packed)); -/* The following function really belong in v4l2-common, but that causes - a circular dependency between modules. We need to think about this, but - for now this will do. */ - -/* Return non-zero if this control is a pointer type. Currently only - type STRING is a pointer type. */ -static inline int ctrl_is_pointer(u32 id) +/* Return true if this control is a pointer type. */ +static inline bool ctrl_is_pointer(struct file *file, u32 id) { - switch (id) { - case V4L2_CID_RDS_TX_PS_NAME: - case V4L2_CID_RDS_TX_RADIO_TEXT: - return 1; - default: - return 0; + struct video_device *vdev = video_devdata(file); + struct v4l2_fh *fh = NULL; + struct v4l2_ctrl_handler *hdl = NULL; + struct v4l2_query_ext_ctrl qec = { id }; + const struct v4l2_ioctl_ops *ops = vdev->ioctl_ops; + + if (test_bit(V4L2_FL_USES_V4L2_FH, &vdev->flags)) + fh = file->private_data; + + if (fh && fh->ctrl_handler) + hdl = fh->ctrl_handler; + else if (vdev->ctrl_handler) + hdl = vdev->ctrl_handler; + + if (hdl) { + struct v4l2_ctrl *ctrl = v4l2_ctrl_find(hdl, id); + + return ctrl && ctrl->is_ptr; } + + if (!ops->vidioc_query_ext_ctrl) + return false; + + return !ops->vidioc_query_ext_ctrl(file, fh, &qec) && + (qec.flags & V4L2_CTRL_FLAG_HAS_PAYLOAD); } -static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext_controls32 __user *up) +static int get_v4l2_ext_controls32(struct file *file, + struct v4l2_ext_controls *kp, + struct v4l2_ext_controls32 __user *up) { struct v4l2_ext_control32 __user *ucontrols; struct v4l2_ext_control __user *kcontrols; @@ -636,7 +653,7 @@ static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext return -EFAULT; if (get_user(id, &kcontrols->id)) return -EFAULT; - if (ctrl_is_pointer(id)) { + if (ctrl_is_pointer(file, id)) { void __user *s; if (get_user(p, &ucontrols->string)) @@ -651,7 +668,9 @@ static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext return 0; } -static int put_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext_controls32 __user *up) +static int put_v4l2_ext_controls32(struct file *file, + struct v4l2_ext_controls *kp, + struct v4l2_ext_controls32 __user *up) { struct v4l2_ext_control32 __user *ucontrols; struct v4l2_ext_control __user *kcontrols = @@ -683,7 +702,7 @@ static int put_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext /* Do not modify the pointer when copying a pointer control. The contents of the pointer was changed, not the pointer itself. */ - if (ctrl_is_pointer(id)) + if (ctrl_is_pointer(file, id)) size -= sizeof(ucontrols->value64); if (copy_in_user(ucontrols, kcontrols, size)) return -EFAULT; @@ -897,7 +916,7 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar case VIDIOC_G_EXT_CTRLS: case VIDIOC_S_EXT_CTRLS: case VIDIOC_TRY_EXT_CTRLS: - err = get_v4l2_ext_controls32(&karg.v2ecs, up); + err = get_v4l2_ext_controls32(file, &karg.v2ecs, up); compatible_arg = 0; break; case VIDIOC_DQEVENT: @@ -924,7 +943,7 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar case VIDIOC_G_EXT_CTRLS: case VIDIOC_S_EXT_CTRLS: case VIDIOC_TRY_EXT_CTRLS: - if (put_v4l2_ext_controls32(&karg.v2ecs, up)) + if (put_v4l2_ext_controls32(file, &karg.v2ecs, up)) err = -EFAULT; break; } From 8465657a3be4421d564b01cc92188db9b4b06dc1 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:48:25 +0100 Subject: [PATCH 438/705] media: v4l2-compat-ioctl32.c: make ctrl_is_pointer work for subdevs commit 273caa260035c03d89ad63d72d8cd3d9e5c5e3f1 upstream. If the device is of type VFL_TYPE_SUBDEV then vdev->ioctl_ops is NULL so the 'if (!ops->vidioc_query_ext_ctrl)' check would crash. Add a test for !ops to the condition. All sub-devices that have controls will use the control framework, so they do not have an equivalent to ops->vidioc_query_ext_ctrl. Returning false if ops is NULL is the correct thing to do here. Fixes: b8c601e8af ("v4l2-compat-ioctl32.c: fix ctrl_is_pointer") Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Reported-by: Laurent Pinchart Reviewed-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index c8dd39884f6e..da55322bbb0f 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -612,7 +612,7 @@ static inline bool ctrl_is_pointer(struct file *file, u32 id) return ctrl && ctrl->is_ptr; } - if (!ops->vidioc_query_ext_ctrl) + if (!ops || !ops->vidioc_query_ext_ctrl) return false; return !ops->vidioc_query_ext_ctrl(file, fh, &qec) && From 55e3f3e6846c5cef80a66c7a0c4b5ae9c2888224 Mon Sep 17 00:00:00 2001 From: Daniel Mentz Date: Wed, 14 Feb 2018 12:48:26 +0100 Subject: [PATCH 439/705] media: v4l2-compat-ioctl32: Copy v4l2_window->global_alpha commit 025a26fa14f8fd55d50ab284a30c016a5be953d0 upstream. Commit b2787845fb91 ("V4L/DVB (5289): Add support for video output overlays.") added the field global_alpha to struct v4l2_window but did not update the compat layer accordingly. This change adds global_alpha to struct v4l2_window32 and copies the value for global_alpha back and forth. Signed-off-by: Daniel Mentz Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index da55322bbb0f..c32feb94b3e5 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -45,6 +45,7 @@ struct v4l2_window32 { compat_caddr_t clips; /* actually struct v4l2_clip32 * */ __u32 clipcount; compat_caddr_t bitmap; + __u8 global_alpha; }; static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user *up) @@ -53,7 +54,8 @@ static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user copy_from_user(&kp->w, &up->w, sizeof(up->w)) || get_user(kp->field, &up->field) || get_user(kp->chromakey, &up->chromakey) || - get_user(kp->clipcount, &up->clipcount)) + get_user(kp->clipcount, &up->clipcount) || + get_user(kp->global_alpha, &up->global_alpha)) return -EFAULT; if (kp->clipcount > 2048) return -EINVAL; @@ -86,7 +88,8 @@ static int put_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user if (copy_to_user(&up->w, &kp->w, sizeof(kp->w)) || put_user(kp->field, &up->field) || put_user(kp->chromakey, &up->chromakey) || - put_user(kp->clipcount, &up->clipcount)) + put_user(kp->clipcount, &up->clipcount) || + put_user(kp->global_alpha, &up->global_alpha)) return -EFAULT; return 0; } From 30ac343c422270df7cedb4513243196412dee3b2 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:48:27 +0100 Subject: [PATCH 440/705] media: v4l2-compat-ioctl32.c: copy clip list in put_v4l2_window32 commit a751be5b142ef6bcbbb96d9899516f4d9c8d0ef4 upstream. put_v4l2_window32() didn't copy back the clip list to userspace. Drivers can update the clip rectangles, so this should be done. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 61 +++++++++++++------ 1 file changed, 41 insertions(+), 20 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index c32feb94b3e5..3b5f3c8956f2 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -50,6 +50,11 @@ struct v4l2_window32 { static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user *up) { + struct v4l2_clip32 __user *uclips; + struct v4l2_clip __user *kclips; + compat_caddr_t p; + u32 n; + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || copy_from_user(&kp->w, &up->w, sizeof(up->w)) || get_user(kp->field, &up->field) || @@ -59,38 +64,54 @@ static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user return -EFAULT; if (kp->clipcount > 2048) return -EINVAL; - if (kp->clipcount) { - struct v4l2_clip32 __user *uclips; - struct v4l2_clip __user *kclips; - int n = kp->clipcount; - compat_caddr_t p; - - if (get_user(p, &up->clips)) - return -EFAULT; - uclips = compat_ptr(p); - kclips = compat_alloc_user_space(n * sizeof(*kclips)); - kp->clips = kclips; - while (--n >= 0) { - if (copy_in_user(&kclips->c, &uclips->c, sizeof(uclips->c))) - return -EFAULT; - if (put_user(n ? kclips + 1 : NULL, &kclips->next)) - return -EFAULT; - uclips += 1; - kclips += 1; - } - } else + if (!kp->clipcount) { kp->clips = NULL; + return 0; + } + + n = kp->clipcount; + if (get_user(p, &up->clips)) + return -EFAULT; + uclips = compat_ptr(p); + kclips = compat_alloc_user_space(n * sizeof(*kclips)); + kp->clips = kclips; + while (n--) { + if (copy_in_user(&kclips->c, &uclips->c, sizeof(uclips->c))) + return -EFAULT; + if (put_user(n ? kclips + 1 : NULL, &kclips->next)) + return -EFAULT; + uclips++; + kclips++; + } return 0; } static int put_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user *up) { + struct v4l2_clip __user *kclips = kp->clips; + struct v4l2_clip32 __user *uclips; + u32 n = kp->clipcount; + compat_caddr_t p; + if (copy_to_user(&up->w, &kp->w, sizeof(kp->w)) || put_user(kp->field, &up->field) || put_user(kp->chromakey, &up->chromakey) || put_user(kp->clipcount, &up->clipcount) || put_user(kp->global_alpha, &up->global_alpha)) return -EFAULT; + + if (!kp->clipcount) + return 0; + + if (get_user(p, &up->clips)) + return -EFAULT; + uclips = compat_ptr(p); + while (n--) { + if (copy_in_user(&uclips->c, &kclips->c, sizeof(uclips->c))) + return -EFAULT; + uclips++; + kclips++; + } return 0; } From 30dcb0756b0f026324b8f1537d01c6c9d6ca92fc Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:48:28 +0100 Subject: [PATCH 441/705] media: v4l2-compat-ioctl32.c: drop pr_info for unknown buffer type commit 169f24ca68bf0f247d111aef07af00dd3a02ae88 upstream. There is nothing wrong with using an unknown buffer type. So stop spamming the kernel log whenever this happens. The kernel will just return -EINVAL to signal this. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index 3b5f3c8956f2..75653d756dd9 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -175,8 +175,6 @@ static int __get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __us return copy_from_user(&kp->fmt.sdr, &up->fmt.sdr, sizeof(kp->fmt.sdr)) ? -EFAULT : 0; default: - pr_info("compat_ioctl32: unexpected VIDIOC_FMT type %d\n", - kp->type); return -EINVAL; } } @@ -226,8 +224,6 @@ static int __put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __us return copy_to_user(&up->fmt.sdr, &kp->fmt.sdr, sizeof(kp->fmt.sdr)) ? -EFAULT : 0; default: - pr_info("compat_ioctl32: unexpected VIDIOC_FMT type %d\n", - kp->type); return -EINVAL; } } From 437c4ec62efbc6704e95cfe8a05103708a02b9d7 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:48:29 +0100 Subject: [PATCH 442/705] media: v4l2-compat-ioctl32.c: don't copy back the result for certain errors commit d83a8243aaefe62ace433e4384a4f077bed86acb upstream. Some ioctls need to copy back the result even if the ioctl returned an error. However, don't do this for the error code -ENOTTY. It makes no sense in that cases. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index 75653d756dd9..a40f40f65fc9 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -956,6 +956,9 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar set_fs(old_fs); } + if (err == -ENOTTY) + return err; + /* Special case: even after an error we need to put the results back for these ioctls since the error_idx will contain information on which control failed. */ From f2d4bed9eabf1636adbf538865e06876e18807e8 Mon Sep 17 00:00:00 2001 From: Daniel Mentz Date: Wed, 14 Feb 2018 12:48:30 +0100 Subject: [PATCH 443/705] media: v4l2-compat-ioctl32.c: refactor compat ioctl32 logic commit a1dfb4c48cc1e64eeb7800a27c66a6f7e88d075a upstream. The 32-bit compat v4l2 ioctl handling is implemented based on its 64-bit equivalent. It converts 32-bit data structures into its 64-bit equivalents and needs to provide the data to the 64-bit ioctl in user space memory which is commonly allocated using compat_alloc_user_space(). However, due to how that function is implemented, it can only be called a single time for every syscall invocation. Supposedly to avoid this limitation, the existing code uses a mix of memory from the kernel stack and memory allocated through compat_alloc_user_space(). Under normal circumstances, this would not work, because the 64-bit ioctl expects all pointers to point to user space memory. As a workaround, set_fs(KERNEL_DS) is called to temporarily disable this extra safety check and allow kernel pointers. However, this might introduce a security vulnerability: The result of the 32-bit to 64-bit conversion is writeable by user space because the output buffer has been allocated via compat_alloc_user_space(). A malicious user space process could then manipulate pointers inside this output buffer, and due to the previous set_fs(KERNEL_DS) call, functions like get_user() or put_user() no longer prevent kernel memory access. The new approach is to pre-calculate the total amount of user space memory that is needed, allocate it using compat_alloc_user_space() and then divide up the allocated memory to accommodate all data structures that need to be converted. An alternative approach would have been to retain the union type karg that they allocated on the kernel stack in do_video_ioctl(), copy all data from user space into karg and then back to user space. However, we decided against this approach because it does not align with other compat syscall implementations. Instead, we tried to replicate the get_user/put_user pairs as found in other places in the kernel: if (get_user(clipcount, &up->clipcount) || put_user(clipcount, &kp->clipcount)) return -EFAULT; Notes from hans.verkuil@cisco.com: This patch was taken from: https://github.com/LineageOS/android_kernel_samsung_apq8084/commit/97b733953c06e4f0398ade18850f0817778255f7 Clearly nobody could be bothered to upstream this patch or at minimum tell us :-( We only heard about this a week ago. This patch was rebased and cleaned up. Compared to the original I also swapped the order of the convert_in_user arguments so that they matched copy_in_user. It was hard to review otherwise. I also replaced the ALLOC_USER_SPACE/ALLOC_AND_GET by a normal function. Fixes: 6b5a9492ca ("v4l: introduce string control support.") Signed-off-by: Daniel Mentz Co-developed-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 776 +++++++++++------- 1 file changed, 499 insertions(+), 277 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index a40f40f65fc9..48a39222fdf9 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -22,6 +22,14 @@ #include #include +/* Use the same argument order as copy_in_user */ +#define assign_in_user(to, from) \ +({ \ + typeof(*from) __assign_tmp; \ + \ + get_user(__assign_tmp, from) || put_user(__assign_tmp, to); \ +}) + static long native_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { long ret = -ENOIOCTLCMD; @@ -35,12 +43,12 @@ static long native_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct v4l2_clip32 { struct v4l2_rect c; - compat_caddr_t next; + compat_caddr_t next; }; struct v4l2_window32 { struct v4l2_rect w; - __u32 field; /* enum v4l2_field */ + __u32 field; /* enum v4l2_field */ __u32 chromakey; compat_caddr_t clips; /* actually struct v4l2_clip32 * */ __u32 clipcount; @@ -48,37 +56,41 @@ struct v4l2_window32 { __u8 global_alpha; }; -static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user *up) +static int get_v4l2_window32(struct v4l2_window __user *kp, + struct v4l2_window32 __user *up, + void __user *aux_buf, u32 aux_space) { struct v4l2_clip32 __user *uclips; struct v4l2_clip __user *kclips; compat_caddr_t p; - u32 n; + u32 clipcount; if (!access_ok(VERIFY_READ, up, sizeof(*up)) || - copy_from_user(&kp->w, &up->w, sizeof(up->w)) || - get_user(kp->field, &up->field) || - get_user(kp->chromakey, &up->chromakey) || - get_user(kp->clipcount, &up->clipcount) || - get_user(kp->global_alpha, &up->global_alpha)) + copy_in_user(&kp->w, &up->w, sizeof(up->w)) || + assign_in_user(&kp->field, &up->field) || + assign_in_user(&kp->chromakey, &up->chromakey) || + assign_in_user(&kp->global_alpha, &up->global_alpha) || + get_user(clipcount, &up->clipcount) || + put_user(clipcount, &kp->clipcount)) return -EFAULT; - if (kp->clipcount > 2048) + if (clipcount > 2048) return -EINVAL; - if (!kp->clipcount) { - kp->clips = NULL; - return 0; - } + if (!clipcount) + return put_user(NULL, &kp->clips); - n = kp->clipcount; if (get_user(p, &up->clips)) return -EFAULT; uclips = compat_ptr(p); - kclips = compat_alloc_user_space(n * sizeof(*kclips)); - kp->clips = kclips; - while (n--) { + if (aux_space < clipcount * sizeof(*kclips)) + return -EFAULT; + kclips = aux_buf; + if (put_user(kclips, &kp->clips)) + return -EFAULT; + + while (clipcount--) { if (copy_in_user(&kclips->c, &uclips->c, sizeof(uclips->c))) return -EFAULT; - if (put_user(n ? kclips + 1 : NULL, &kclips->next)) + if (put_user(clipcount ? kclips + 1 : NULL, &kclips->next)) return -EFAULT; uclips++; kclips++; @@ -86,27 +98,28 @@ static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user return 0; } -static int put_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user *up) +static int put_v4l2_window32(struct v4l2_window __user *kp, + struct v4l2_window32 __user *up) { struct v4l2_clip __user *kclips = kp->clips; struct v4l2_clip32 __user *uclips; - u32 n = kp->clipcount; compat_caddr_t p; + u32 clipcount; - if (copy_to_user(&up->w, &kp->w, sizeof(kp->w)) || - put_user(kp->field, &up->field) || - put_user(kp->chromakey, &up->chromakey) || - put_user(kp->clipcount, &up->clipcount) || - put_user(kp->global_alpha, &up->global_alpha)) + if (copy_in_user(&up->w, &kp->w, sizeof(kp->w)) || + assign_in_user(&up->field, &kp->field) || + assign_in_user(&up->chromakey, &kp->chromakey) || + assign_in_user(&up->global_alpha, &kp->global_alpha) || + get_user(clipcount, &kp->clipcount) || + put_user(clipcount, &up->clipcount)) return -EFAULT; - - if (!kp->clipcount) + if (!clipcount) return 0; if (get_user(p, &up->clips)) return -EFAULT; uclips = compat_ptr(p); - while (n--) { + while (clipcount--) { if (copy_in_user(&uclips->c, &kclips->c, sizeof(uclips->c))) return -EFAULT; uclips++; @@ -145,101 +158,158 @@ struct v4l2_create_buffers32 { __u32 reserved[8]; }; -static int __get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user *up) +static int __bufsize_v4l2_format(struct v4l2_format32 __user *up, u32 *size) { - if (get_user(kp->type, &up->type)) + u32 type; + + if (get_user(type, &up->type)) return -EFAULT; - switch (kp->type) { - case V4L2_BUF_TYPE_VIDEO_CAPTURE: - case V4L2_BUF_TYPE_VIDEO_OUTPUT: - return copy_from_user(&kp->fmt.pix, &up->fmt.pix, - sizeof(kp->fmt.pix)) ? -EFAULT : 0; - case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: - case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: - return copy_from_user(&kp->fmt.pix_mp, &up->fmt.pix_mp, - sizeof(kp->fmt.pix_mp)) ? -EFAULT : 0; + switch (type) { case V4L2_BUF_TYPE_VIDEO_OVERLAY: - case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY: - return get_v4l2_window32(&kp->fmt.win, &up->fmt.win); - case V4L2_BUF_TYPE_VBI_CAPTURE: - case V4L2_BUF_TYPE_VBI_OUTPUT: - return copy_from_user(&kp->fmt.vbi, &up->fmt.vbi, - sizeof(kp->fmt.vbi)) ? -EFAULT : 0; - case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE: - case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT: - return copy_from_user(&kp->fmt.sliced, &up->fmt.sliced, - sizeof(kp->fmt.sliced)) ? -EFAULT : 0; - case V4L2_BUF_TYPE_SDR_CAPTURE: - case V4L2_BUF_TYPE_SDR_OUTPUT: - return copy_from_user(&kp->fmt.sdr, &up->fmt.sdr, - sizeof(kp->fmt.sdr)) ? -EFAULT : 0; + case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY: { + u32 clipcount; + + if (get_user(clipcount, &up->fmt.win.clipcount)) + return -EFAULT; + if (clipcount > 2048) + return -EINVAL; + *size = clipcount * sizeof(struct v4l2_clip); + return 0; + } default: - return -EINVAL; + *size = 0; + return 0; } } -static int get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user *up) +static int bufsize_v4l2_format(struct v4l2_format32 __user *up, u32 *size) { if (!access_ok(VERIFY_READ, up, sizeof(*up))) return -EFAULT; - return __get_v4l2_format32(kp, up); + return __bufsize_v4l2_format(up, size); } -static int get_v4l2_create32(struct v4l2_create_buffers *kp, struct v4l2_create_buffers32 __user *up) +static int __get_v4l2_format32(struct v4l2_format __user *kp, + struct v4l2_format32 __user *up, + void __user *aux_buf, u32 aux_space) { - if (!access_ok(VERIFY_READ, up, sizeof(*up)) || - copy_from_user(kp, up, offsetof(struct v4l2_create_buffers32, format))) - return -EFAULT; - return __get_v4l2_format32(&kp->format, &up->format); -} + u32 type; -static int __put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user *up) -{ - if (put_user(kp->type, &up->type)) + if (get_user(type, &up->type) || put_user(type, &kp->type)) return -EFAULT; - switch (kp->type) { + switch (type) { case V4L2_BUF_TYPE_VIDEO_CAPTURE: case V4L2_BUF_TYPE_VIDEO_OUTPUT: - return copy_to_user(&up->fmt.pix, &kp->fmt.pix, + return copy_in_user(&kp->fmt.pix, &up->fmt.pix, sizeof(kp->fmt.pix)) ? -EFAULT : 0; case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: - return copy_to_user(&up->fmt.pix_mp, &kp->fmt.pix_mp, + return copy_in_user(&kp->fmt.pix_mp, &up->fmt.pix_mp, sizeof(kp->fmt.pix_mp)) ? -EFAULT : 0; case V4L2_BUF_TYPE_VIDEO_OVERLAY: case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY: - return put_v4l2_window32(&kp->fmt.win, &up->fmt.win); + return get_v4l2_window32(&kp->fmt.win, &up->fmt.win, + aux_buf, aux_space); case V4L2_BUF_TYPE_VBI_CAPTURE: case V4L2_BUF_TYPE_VBI_OUTPUT: - return copy_to_user(&up->fmt.vbi, &kp->fmt.vbi, + return copy_in_user(&kp->fmt.vbi, &up->fmt.vbi, sizeof(kp->fmt.vbi)) ? -EFAULT : 0; case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE: case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT: - return copy_to_user(&up->fmt.sliced, &kp->fmt.sliced, + return copy_in_user(&kp->fmt.sliced, &up->fmt.sliced, sizeof(kp->fmt.sliced)) ? -EFAULT : 0; case V4L2_BUF_TYPE_SDR_CAPTURE: case V4L2_BUF_TYPE_SDR_OUTPUT: - return copy_to_user(&up->fmt.sdr, &kp->fmt.sdr, + return copy_in_user(&kp->fmt.sdr, &up->fmt.sdr, sizeof(kp->fmt.sdr)) ? -EFAULT : 0; default: return -EINVAL; } } -static int put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user *up) +static int get_v4l2_format32(struct v4l2_format __user *kp, + struct v4l2_format32 __user *up, + void __user *aux_buf, u32 aux_space) +{ + if (!access_ok(VERIFY_READ, up, sizeof(*up))) + return -EFAULT; + return __get_v4l2_format32(kp, up, aux_buf, aux_space); +} + +static int bufsize_v4l2_create(struct v4l2_create_buffers32 __user *up, + u32 *size) +{ + if (!access_ok(VERIFY_READ, up, sizeof(*up))) + return -EFAULT; + return __bufsize_v4l2_format(&up->format, size); +} + +static int get_v4l2_create32(struct v4l2_create_buffers __user *kp, + struct v4l2_create_buffers32 __user *up, + void __user *aux_buf, u32 aux_space) +{ + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || + copy_in_user(kp, up, + offsetof(struct v4l2_create_buffers32, format))) + return -EFAULT; + return __get_v4l2_format32(&kp->format, &up->format, + aux_buf, aux_space); +} + +static int __put_v4l2_format32(struct v4l2_format __user *kp, + struct v4l2_format32 __user *up) +{ + u32 type; + + if (get_user(type, &kp->type)) + return -EFAULT; + + switch (type) { + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + return copy_in_user(&up->fmt.pix, &kp->fmt.pix, + sizeof(kp->fmt.pix)) ? -EFAULT : 0; + case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: + case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: + return copy_in_user(&up->fmt.pix_mp, &kp->fmt.pix_mp, + sizeof(kp->fmt.pix_mp)) ? -EFAULT : 0; + case V4L2_BUF_TYPE_VIDEO_OVERLAY: + case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY: + return put_v4l2_window32(&kp->fmt.win, &up->fmt.win); + case V4L2_BUF_TYPE_VBI_CAPTURE: + case V4L2_BUF_TYPE_VBI_OUTPUT: + return copy_in_user(&up->fmt.vbi, &kp->fmt.vbi, + sizeof(kp->fmt.vbi)) ? -EFAULT : 0; + case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE: + case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT: + return copy_in_user(&up->fmt.sliced, &kp->fmt.sliced, + sizeof(kp->fmt.sliced)) ? -EFAULT : 0; + case V4L2_BUF_TYPE_SDR_CAPTURE: + case V4L2_BUF_TYPE_SDR_OUTPUT: + return copy_in_user(&up->fmt.sdr, &kp->fmt.sdr, + sizeof(kp->fmt.sdr)) ? -EFAULT : 0; + default: + return -EINVAL; + } +} + +static int put_v4l2_format32(struct v4l2_format __user *kp, + struct v4l2_format32 __user *up) { if (!access_ok(VERIFY_WRITE, up, sizeof(*up))) return -EFAULT; return __put_v4l2_format32(kp, up); } -static int put_v4l2_create32(struct v4l2_create_buffers *kp, struct v4l2_create_buffers32 __user *up) +static int put_v4l2_create32(struct v4l2_create_buffers __user *kp, + struct v4l2_create_buffers32 __user *up) { if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || - copy_to_user(up, kp, offsetof(struct v4l2_create_buffers32, format)) || - copy_to_user(up->reserved, kp->reserved, sizeof(kp->reserved))) + copy_in_user(up, kp, + offsetof(struct v4l2_create_buffers32, format)) || + copy_in_user(up->reserved, kp->reserved, sizeof(kp->reserved))) return -EFAULT; return __put_v4l2_format32(&kp->format, &up->format); } @@ -253,24 +323,27 @@ struct v4l2_standard32 { __u32 reserved[4]; }; -static int get_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32 __user *up) +static int get_v4l2_standard32(struct v4l2_standard __user *kp, + struct v4l2_standard32 __user *up) { /* other fields are not set by the user, nor used by the driver */ if (!access_ok(VERIFY_READ, up, sizeof(*up)) || - get_user(kp->index, &up->index)) + assign_in_user(&kp->index, &up->index)) return -EFAULT; return 0; } -static int put_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32 __user *up) +static int put_v4l2_standard32(struct v4l2_standard __user *kp, + struct v4l2_standard32 __user *up) { if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || - put_user(kp->index, &up->index) || - put_user(kp->id, &up->id) || - copy_to_user(up->name, kp->name, sizeof(up->name)) || - copy_to_user(&up->frameperiod, &kp->frameperiod, sizeof(kp->frameperiod)) || - put_user(kp->framelines, &up->framelines) || - copy_to_user(up->reserved, kp->reserved, sizeof(kp->reserved))) + assign_in_user(&up->index, &kp->index) || + assign_in_user(&up->id, &kp->id) || + copy_in_user(up->name, kp->name, sizeof(up->name)) || + copy_in_user(&up->frameperiod, &kp->frameperiod, + sizeof(up->frameperiod)) || + assign_in_user(&up->framelines, &kp->framelines) || + copy_in_user(up->reserved, kp->reserved, sizeof(up->reserved))) return -EFAULT; return 0; } @@ -310,11 +383,11 @@ struct v4l2_buffer32 { __u32 reserved; }; -static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __user *up32, +static int get_v4l2_plane32(struct v4l2_plane __user *up, + struct v4l2_plane32 __user *up32, enum v4l2_memory memory) { - void __user *up_pln; - compat_long_t p; + compat_ulong_t p; if (copy_in_user(up, up32, 2 * sizeof(__u32)) || copy_in_user(&up->data_offset, &up32->data_offset, @@ -329,10 +402,8 @@ static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ return -EFAULT; break; case V4L2_MEMORY_USERPTR: - if (get_user(p, &up32->m.userptr)) - return -EFAULT; - up_pln = compat_ptr(p); - if (put_user((unsigned long)up_pln, &up->m.userptr)) + if (get_user(p, &up32->m.userptr) || + put_user((unsigned long)compat_ptr(p), &up->m.userptr)) return -EFAULT; break; case V4L2_MEMORY_DMABUF: @@ -344,7 +415,8 @@ static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ return 0; } -static int put_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __user *up32, +static int put_v4l2_plane32(struct v4l2_plane __user *up, + struct v4l2_plane32 __user *up32, enum v4l2_memory memory) { unsigned long p; @@ -368,8 +440,7 @@ static int put_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ return -EFAULT; break; case V4L2_MEMORY_DMABUF: - if (copy_in_user(&up32->m.fd, &up->m.fd, - sizeof(up->m.fd))) + if (copy_in_user(&up32->m.fd, &up->m.fd, sizeof(up->m.fd))) return -EFAULT; break; } @@ -377,37 +448,75 @@ static int put_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ return 0; } -static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user *up) +static int bufsize_v4l2_buffer(struct v4l2_buffer32 __user *up, u32 *size) { + u32 type; + u32 length; + + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || + get_user(type, &up->type) || + get_user(length, &up->length)) + return -EFAULT; + + if (V4L2_TYPE_IS_MULTIPLANAR(type)) { + if (length > VIDEO_MAX_PLANES) + return -EINVAL; + + /* + * We don't really care if userspace decides to kill itself + * by passing a very big length value + */ + *size = length * sizeof(struct v4l2_plane); + } else { + *size = 0; + } + return 0; +} + +static int get_v4l2_buffer32(struct v4l2_buffer __user *kp, + struct v4l2_buffer32 __user *up, + void __user *aux_buf, u32 aux_space) +{ + u32 type; + u32 length; + enum v4l2_memory memory; struct v4l2_plane32 __user *uplane32; struct v4l2_plane __user *uplane; compat_caddr_t p; - int num_planes; int ret; if (!access_ok(VERIFY_READ, up, sizeof(*up)) || - get_user(kp->index, &up->index) || - get_user(kp->type, &up->type) || - get_user(kp->flags, &up->flags) || - get_user(kp->memory, &up->memory) || - get_user(kp->length, &up->length)) + assign_in_user(&kp->index, &up->index) || + get_user(type, &up->type) || + put_user(type, &kp->type) || + assign_in_user(&kp->flags, &up->flags) || + get_user(memory, &up->memory) || + put_user(memory, &kp->memory) || + get_user(length, &up->length) || + put_user(length, &kp->length)) return -EFAULT; - if (V4L2_TYPE_IS_OUTPUT(kp->type)) - if (get_user(kp->bytesused, &up->bytesused) || - get_user(kp->field, &up->field) || - get_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || - get_user(kp->timestamp.tv_usec, &up->timestamp.tv_usec)) + if (V4L2_TYPE_IS_OUTPUT(type)) + if (assign_in_user(&kp->bytesused, &up->bytesused) || + assign_in_user(&kp->field, &up->field) || + assign_in_user(&kp->timestamp.tv_sec, + &up->timestamp.tv_sec) || + assign_in_user(&kp->timestamp.tv_usec, + &up->timestamp.tv_usec)) return -EFAULT; - if (V4L2_TYPE_IS_MULTIPLANAR(kp->type)) { - num_planes = kp->length; + if (V4L2_TYPE_IS_MULTIPLANAR(type)) { + u32 num_planes = length; + if (num_planes == 0) { - kp->m.planes = NULL; - /* num_planes == 0 is legal, e.g. when userspace doesn't - * need planes array on DQBUF*/ - return 0; + /* + * num_planes == 0 is legal, e.g. when userspace doesn't + * need planes array on DQBUF + */ + return put_user(NULL, &kp->m.planes); } + if (num_planes > VIDEO_MAX_PLANES) + return -EINVAL; if (get_user(p, &up->m.planes)) return -EFAULT; @@ -417,37 +526,43 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user num_planes * sizeof(*uplane32))) return -EFAULT; - /* We don't really care if userspace decides to kill itself - * by passing a very big num_planes value */ - uplane = compat_alloc_user_space(num_planes * sizeof(*uplane)); - kp->m.planes = (__force struct v4l2_plane *)uplane; + /* + * We don't really care if userspace decides to kill itself + * by passing a very big num_planes value + */ + if (aux_space < num_planes * sizeof(*uplane)) + return -EFAULT; - while (--num_planes >= 0) { - ret = get_v4l2_plane32(uplane, uplane32, kp->memory); + uplane = aux_buf; + if (put_user((__force struct v4l2_plane *)uplane, + &kp->m.planes)) + return -EFAULT; + + while (num_planes--) { + ret = get_v4l2_plane32(uplane, uplane32, memory); if (ret) return ret; - ++uplane; - ++uplane32; + uplane++; + uplane32++; } } else { - switch (kp->memory) { + switch (memory) { case V4L2_MEMORY_MMAP: case V4L2_MEMORY_OVERLAY: - if (get_user(kp->m.offset, &up->m.offset)) + if (assign_in_user(&kp->m.offset, &up->m.offset)) return -EFAULT; break; - case V4L2_MEMORY_USERPTR: - { - compat_long_t tmp; + case V4L2_MEMORY_USERPTR: { + compat_ulong_t userptr; - if (get_user(tmp, &up->m.userptr)) - return -EFAULT; - - kp->m.userptr = (unsigned long)compat_ptr(tmp); - } + if (get_user(userptr, &up->m.userptr) || + put_user((unsigned long)compat_ptr(userptr), + &kp->m.userptr)) + return -EFAULT; break; + } case V4L2_MEMORY_DMABUF: - if (get_user(kp->m.fd, &up->m.fd)) + if (assign_in_user(&kp->m.fd, &up->m.fd)) return -EFAULT; break; } @@ -456,62 +571,70 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user return 0; } -static int put_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user *up) +static int put_v4l2_buffer32(struct v4l2_buffer __user *kp, + struct v4l2_buffer32 __user *up) { + u32 type; + u32 length; + enum v4l2_memory memory; struct v4l2_plane32 __user *uplane32; struct v4l2_plane __user *uplane; compat_caddr_t p; - int num_planes; int ret; if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || - put_user(kp->index, &up->index) || - put_user(kp->type, &up->type) || - put_user(kp->flags, &up->flags) || - put_user(kp->memory, &up->memory)) + assign_in_user(&up->index, &kp->index) || + get_user(type, &kp->type) || + put_user(type, &up->type) || + assign_in_user(&up->flags, &kp->flags) || + get_user(memory, &kp->memory) || + put_user(memory, &up->memory)) return -EFAULT; - if (put_user(kp->bytesused, &up->bytesused) || - put_user(kp->field, &up->field) || - put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || - put_user(kp->timestamp.tv_usec, &up->timestamp.tv_usec) || - copy_to_user(&up->timecode, &kp->timecode, sizeof(kp->timecode)) || - put_user(kp->sequence, &up->sequence) || - put_user(kp->reserved2, &up->reserved2) || - put_user(kp->reserved, &up->reserved) || - put_user(kp->length, &up->length)) + if (assign_in_user(&up->bytesused, &kp->bytesused) || + assign_in_user(&up->field, &kp->field) || + assign_in_user(&up->timestamp.tv_sec, &kp->timestamp.tv_sec) || + assign_in_user(&up->timestamp.tv_usec, &kp->timestamp.tv_usec) || + copy_in_user(&up->timecode, &kp->timecode, sizeof(kp->timecode)) || + assign_in_user(&up->sequence, &kp->sequence) || + assign_in_user(&up->reserved2, &kp->reserved2) || + assign_in_user(&up->reserved, &kp->reserved) || + get_user(length, &kp->length) || + put_user(length, &up->length)) return -EFAULT; - if (V4L2_TYPE_IS_MULTIPLANAR(kp->type)) { - num_planes = kp->length; + if (V4L2_TYPE_IS_MULTIPLANAR(type)) { + u32 num_planes = length; + if (num_planes == 0) return 0; - uplane = (__force struct v4l2_plane __user *)kp->m.planes; + if (get_user(uplane, ((__force struct v4l2_plane __user **)&kp->m.planes))) + return -EFAULT; if (get_user(p, &up->m.planes)) return -EFAULT; uplane32 = compat_ptr(p); - while (--num_planes >= 0) { - ret = put_v4l2_plane32(uplane, uplane32, kp->memory); + while (num_planes--) { + ret = put_v4l2_plane32(uplane, uplane32, memory); if (ret) return ret; ++uplane; ++uplane32; } } else { - switch (kp->memory) { + switch (memory) { case V4L2_MEMORY_MMAP: case V4L2_MEMORY_OVERLAY: - if (put_user(kp->m.offset, &up->m.offset)) + if (assign_in_user(&up->m.offset, &kp->m.offset)) return -EFAULT; break; case V4L2_MEMORY_USERPTR: - if (put_user(kp->m.userptr, &up->m.userptr)) + if (assign_in_user(&up->m.userptr, &kp->m.userptr)) return -EFAULT; break; case V4L2_MEMORY_DMABUF: - if (put_user(kp->m.fd, &up->m.fd)) + if (assign_in_user(&up->m.fd, &kp->m.fd)) return -EFAULT; break; } @@ -523,7 +646,7 @@ static int put_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user struct v4l2_framebuffer32 { __u32 capability; __u32 flags; - compat_caddr_t base; + compat_caddr_t base; struct { __u32 width; __u32 height; @@ -536,29 +659,32 @@ struct v4l2_framebuffer32 { } fmt; }; -static int get_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_framebuffer32 __user *up) +static int get_v4l2_framebuffer32(struct v4l2_framebuffer __user *kp, + struct v4l2_framebuffer32 __user *up) { - u32 tmp; + compat_caddr_t tmp; if (!access_ok(VERIFY_READ, up, sizeof(*up)) || get_user(tmp, &up->base) || - get_user(kp->capability, &up->capability) || - get_user(kp->flags, &up->flags) || - copy_from_user(&kp->fmt, &up->fmt, sizeof(up->fmt))) + put_user((__force void *)compat_ptr(tmp), &kp->base) || + assign_in_user(&kp->capability, &up->capability) || + assign_in_user(&kp->flags, &up->flags) || + copy_in_user(&kp->fmt, &up->fmt, sizeof(kp->fmt))) return -EFAULT; - kp->base = (__force void *)compat_ptr(tmp); return 0; } -static int put_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_framebuffer32 __user *up) +static int put_v4l2_framebuffer32(struct v4l2_framebuffer __user *kp, + struct v4l2_framebuffer32 __user *up) { - u32 tmp = (u32)((unsigned long)kp->base); + void *base; if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || - put_user(tmp, &up->base) || - put_user(kp->capability, &up->capability) || - put_user(kp->flags, &up->flags) || - copy_to_user(&up->fmt, &kp->fmt, sizeof(up->fmt))) + get_user(base, &kp->base) || + put_user(ptr_to_compat(base), &up->base) || + assign_in_user(&up->capability, &kp->capability) || + assign_in_user(&up->flags, &kp->flags) || + copy_in_user(&up->fmt, &kp->fmt, sizeof(kp->fmt))) return -EFAULT; return 0; } @@ -571,21 +697,26 @@ struct v4l2_input32 { __u32 tuner; /* Associated tuner */ compat_u64 std; __u32 status; - __u32 reserved[4]; + __u32 capabilities; + __u32 reserved[3]; }; -/* The 64-bit v4l2_input struct has extra padding at the end of the struct. - Otherwise it is identical to the 32-bit version. */ -static inline int get_v4l2_input32(struct v4l2_input *kp, struct v4l2_input32 __user *up) +/* + * The 64-bit v4l2_input struct has extra padding at the end of the struct. + * Otherwise it is identical to the 32-bit version. + */ +static inline int get_v4l2_input32(struct v4l2_input __user *kp, + struct v4l2_input32 __user *up) { - if (copy_from_user(kp, up, sizeof(*up))) + if (copy_in_user(kp, up, sizeof(*up))) return -EFAULT; return 0; } -static inline int put_v4l2_input32(struct v4l2_input *kp, struct v4l2_input32 __user *up) +static inline int put_v4l2_input32(struct v4l2_input __user *kp, + struct v4l2_input32 __user *up) { - if (copy_to_user(up, kp, sizeof(*up))) + if (copy_in_user(up, kp, sizeof(*up))) return -EFAULT; return 0; } @@ -639,40 +770,64 @@ static inline bool ctrl_is_pointer(struct file *file, u32 id) (qec.flags & V4L2_CTRL_FLAG_HAS_PAYLOAD); } +static int bufsize_v4l2_ext_controls(struct v4l2_ext_controls32 __user *up, + u32 *size) +{ + u32 count; + + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || + get_user(count, &up->count)) + return -EFAULT; + if (count > V4L2_CID_MAX_CTRLS) + return -EINVAL; + *size = count * sizeof(struct v4l2_ext_control); + return 0; +} + static int get_v4l2_ext_controls32(struct file *file, - struct v4l2_ext_controls *kp, - struct v4l2_ext_controls32 __user *up) + struct v4l2_ext_controls __user *kp, + struct v4l2_ext_controls32 __user *up, + void __user *aux_buf, u32 aux_space) { struct v4l2_ext_control32 __user *ucontrols; struct v4l2_ext_control __user *kcontrols; - int n; + u32 count; + u32 n; compat_caddr_t p; if (!access_ok(VERIFY_READ, up, sizeof(*up)) || - get_user(kp->which, &up->which) || - get_user(kp->count, &up->count) || - get_user(kp->error_idx, &up->error_idx) || - copy_from_user(kp->reserved, up->reserved, sizeof(kp->reserved))) + assign_in_user(&kp->which, &up->which) || + get_user(count, &up->count) || + put_user(count, &kp->count) || + assign_in_user(&kp->error_idx, &up->error_idx) || + copy_in_user(kp->reserved, up->reserved, sizeof(kp->reserved))) return -EFAULT; - n = kp->count; - if (n == 0) { - kp->controls = NULL; - return 0; - } + + if (count == 0) + return put_user(NULL, &kp->controls); + if (count > V4L2_CID_MAX_CTRLS) + return -EINVAL; if (get_user(p, &up->controls)) return -EFAULT; ucontrols = compat_ptr(p); - if (!access_ok(VERIFY_READ, ucontrols, n * sizeof(*ucontrols))) + if (!access_ok(VERIFY_READ, ucontrols, count * sizeof(*ucontrols))) return -EFAULT; - kcontrols = compat_alloc_user_space(n * sizeof(*kcontrols)); - kp->controls = (__force struct v4l2_ext_control *)kcontrols; - while (--n >= 0) { + if (aux_space < count * sizeof(*kcontrols)) + return -EFAULT; + kcontrols = aux_buf; + if (put_user((__force struct v4l2_ext_control *)kcontrols, + &kp->controls)) + return -EFAULT; + + for (n = 0; n < count; n++) { u32 id; if (copy_in_user(kcontrols, ucontrols, sizeof(*ucontrols))) return -EFAULT; + if (get_user(id, &kcontrols->id)) return -EFAULT; + if (ctrl_is_pointer(file, id)) { void __user *s; @@ -689,43 +844,54 @@ static int get_v4l2_ext_controls32(struct file *file, } static int put_v4l2_ext_controls32(struct file *file, - struct v4l2_ext_controls *kp, + struct v4l2_ext_controls __user *kp, struct v4l2_ext_controls32 __user *up) { struct v4l2_ext_control32 __user *ucontrols; - struct v4l2_ext_control __user *kcontrols = - (__force struct v4l2_ext_control __user *)kp->controls; - int n = kp->count; + struct v4l2_ext_control __user *kcontrols; + u32 count; + u32 n; compat_caddr_t p; if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || - put_user(kp->which, &up->which) || - put_user(kp->count, &up->count) || - put_user(kp->error_idx, &up->error_idx) || - copy_to_user(up->reserved, kp->reserved, sizeof(up->reserved))) + assign_in_user(&up->which, &kp->which) || + get_user(count, &kp->count) || + put_user(count, &up->count) || + assign_in_user(&up->error_idx, &kp->error_idx) || + copy_in_user(up->reserved, kp->reserved, sizeof(up->reserved)) || + get_user(kcontrols, &kp->controls)) return -EFAULT; - if (!kp->count) - return 0; + if (!count) + return 0; if (get_user(p, &up->controls)) return -EFAULT; ucontrols = compat_ptr(p); - if (!access_ok(VERIFY_WRITE, ucontrols, n * sizeof(*ucontrols))) + if (!access_ok(VERIFY_WRITE, ucontrols, count * sizeof(*ucontrols))) return -EFAULT; - while (--n >= 0) { - unsigned size = sizeof(*ucontrols); + for (n = 0; n < count; n++) { + unsigned int size = sizeof(*ucontrols); u32 id; - if (get_user(id, &kcontrols->id)) + if (get_user(id, &kcontrols->id) || + put_user(id, &ucontrols->id) || + assign_in_user(&ucontrols->size, &kcontrols->size) || + copy_in_user(&ucontrols->reserved2, &kcontrols->reserved2, + sizeof(ucontrols->reserved2))) return -EFAULT; - /* Do not modify the pointer when copying a pointer control. - The contents of the pointer was changed, not the pointer - itself. */ + + /* + * Do not modify the pointer when copying a pointer control. + * The contents of the pointer was changed, not the pointer + * itself. + */ if (ctrl_is_pointer(file, id)) size -= sizeof(ucontrols->value64); + if (copy_in_user(ucontrols, kcontrols, size)) return -EFAULT; + ucontrols++; kcontrols++; } @@ -745,17 +911,18 @@ struct v4l2_event32 { __u32 reserved[8]; }; -static int put_v4l2_event32(struct v4l2_event *kp, struct v4l2_event32 __user *up) +static int put_v4l2_event32(struct v4l2_event __user *kp, + struct v4l2_event32 __user *up) { if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || - put_user(kp->type, &up->type) || - copy_to_user(&up->u, &kp->u, sizeof(kp->u)) || - put_user(kp->pending, &up->pending) || - put_user(kp->sequence, &up->sequence) || - put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || - put_user(kp->timestamp.tv_nsec, &up->timestamp.tv_nsec) || - put_user(kp->id, &up->id) || - copy_to_user(up->reserved, kp->reserved, sizeof(kp->reserved))) + assign_in_user(&up->type, &kp->type) || + copy_in_user(&up->u, &kp->u, sizeof(kp->u)) || + assign_in_user(&up->pending, &kp->pending) || + assign_in_user(&up->sequence, &kp->sequence) || + assign_in_user(&up->timestamp.tv_sec, &kp->timestamp.tv_sec) || + assign_in_user(&up->timestamp.tv_nsec, &kp->timestamp.tv_nsec) || + assign_in_user(&up->id, &kp->id) || + copy_in_user(up->reserved, kp->reserved, sizeof(up->reserved))) return -EFAULT; return 0; } @@ -768,31 +935,34 @@ struct v4l2_edid32 { compat_caddr_t edid; }; -static int get_v4l2_edid32(struct v4l2_edid *kp, struct v4l2_edid32 __user *up) +static int get_v4l2_edid32(struct v4l2_edid __user *kp, + struct v4l2_edid32 __user *up) { - u32 tmp; + compat_uptr_t tmp; if (!access_ok(VERIFY_READ, up, sizeof(*up)) || - get_user(kp->pad, &up->pad) || - get_user(kp->start_block, &up->start_block) || - get_user(kp->blocks, &up->blocks) || + assign_in_user(&kp->pad, &up->pad) || + assign_in_user(&kp->start_block, &up->start_block) || + assign_in_user(&kp->blocks, &up->blocks) || get_user(tmp, &up->edid) || - copy_from_user(kp->reserved, up->reserved, sizeof(kp->reserved))) + put_user(compat_ptr(tmp), &kp->edid) || + copy_in_user(kp->reserved, up->reserved, sizeof(kp->reserved))) return -EFAULT; - kp->edid = (__force u8 *)compat_ptr(tmp); return 0; } -static int put_v4l2_edid32(struct v4l2_edid *kp, struct v4l2_edid32 __user *up) +static int put_v4l2_edid32(struct v4l2_edid __user *kp, + struct v4l2_edid32 __user *up) { - u32 tmp = (u32)((unsigned long)kp->edid); + void *edid; if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || - put_user(kp->pad, &up->pad) || - put_user(kp->start_block, &up->start_block) || - put_user(kp->blocks, &up->blocks) || - put_user(tmp, &up->edid) || - copy_to_user(up->reserved, kp->reserved, sizeof(up->reserved))) + assign_in_user(&up->pad, &kp->pad) || + assign_in_user(&up->start_block, &kp->start_block) || + assign_in_user(&up->blocks, &kp->blocks) || + get_user(edid, &kp->edid) || + put_user(ptr_to_compat(edid), &up->edid) || + copy_in_user(up->reserved, kp->reserved, sizeof(up->reserved))) return -EFAULT; return 0; } @@ -809,7 +979,7 @@ static int put_v4l2_edid32(struct v4l2_edid *kp, struct v4l2_edid32 __user *up) #define VIDIOC_ENUMINPUT32 _IOWR('V', 26, struct v4l2_input32) #define VIDIOC_G_EDID32 _IOWR('V', 40, struct v4l2_edid32) #define VIDIOC_S_EDID32 _IOWR('V', 41, struct v4l2_edid32) -#define VIDIOC_TRY_FMT32 _IOWR('V', 64, struct v4l2_format32) +#define VIDIOC_TRY_FMT32 _IOWR('V', 64, struct v4l2_format32) #define VIDIOC_G_EXT_CTRLS32 _IOWR('V', 71, struct v4l2_ext_controls32) #define VIDIOC_S_EXT_CTRLS32 _IOWR('V', 72, struct v4l2_ext_controls32) #define VIDIOC_TRY_EXT_CTRLS32 _IOWR('V', 73, struct v4l2_ext_controls32) @@ -825,22 +995,23 @@ static int put_v4l2_edid32(struct v4l2_edid *kp, struct v4l2_edid32 __user *up) #define VIDIOC_G_OUTPUT32 _IOR ('V', 46, s32) #define VIDIOC_S_OUTPUT32 _IOWR('V', 47, s32) +static int alloc_userspace(unsigned int size, u32 aux_space, + void __user **up_native) +{ + *up_native = compat_alloc_user_space(size + aux_space); + if (!*up_native) + return -ENOMEM; + if (clear_user(*up_native, size)) + return -EFAULT; + return 0; +} + static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - union { - struct v4l2_format v2f; - struct v4l2_buffer v2b; - struct v4l2_framebuffer v2fb; - struct v4l2_input v2i; - struct v4l2_standard v2s; - struct v4l2_ext_controls v2ecs; - struct v4l2_event v2ev; - struct v4l2_create_buffers v2crt; - struct v4l2_edid v2edid; - unsigned long vx; - int vi; - } karg; void __user *up = compat_ptr(arg); + void __user *up_native = NULL; + void __user *aux_buf; + u32 aux_space; int compatible_arg = 1; long err = 0; @@ -879,30 +1050,52 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar case VIDIOC_STREAMOFF: case VIDIOC_S_INPUT: case VIDIOC_S_OUTPUT: - err = get_user(karg.vi, (s32 __user *)up); + err = alloc_userspace(sizeof(unsigned int), 0, &up_native); + if (!err && assign_in_user((unsigned int __user *)up_native, + (compat_uint_t __user *)up)) + err = -EFAULT; compatible_arg = 0; break; case VIDIOC_G_INPUT: case VIDIOC_G_OUTPUT: + err = alloc_userspace(sizeof(unsigned int), 0, &up_native); compatible_arg = 0; break; case VIDIOC_G_EDID: case VIDIOC_S_EDID: - err = get_v4l2_edid32(&karg.v2edid, up); + err = alloc_userspace(sizeof(struct v4l2_edid), 0, &up_native); + if (!err) + err = get_v4l2_edid32(up_native, up); compatible_arg = 0; break; case VIDIOC_G_FMT: case VIDIOC_S_FMT: case VIDIOC_TRY_FMT: - err = get_v4l2_format32(&karg.v2f, up); + err = bufsize_v4l2_format(up, &aux_space); + if (!err) + err = alloc_userspace(sizeof(struct v4l2_format), + aux_space, &up_native); + if (!err) { + aux_buf = up_native + sizeof(struct v4l2_format); + err = get_v4l2_format32(up_native, up, + aux_buf, aux_space); + } compatible_arg = 0; break; case VIDIOC_CREATE_BUFS: - err = get_v4l2_create32(&karg.v2crt, up); + err = bufsize_v4l2_create(up, &aux_space); + if (!err) + err = alloc_userspace(sizeof(struct v4l2_create_buffers), + aux_space, &up_native); + if (!err) { + aux_buf = up_native + sizeof(struct v4l2_create_buffers); + err = get_v4l2_create32(up_native, up, + aux_buf, aux_space); + } compatible_arg = 0; break; @@ -910,36 +1103,63 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar case VIDIOC_QUERYBUF: case VIDIOC_QBUF: case VIDIOC_DQBUF: - err = get_v4l2_buffer32(&karg.v2b, up); + err = bufsize_v4l2_buffer(up, &aux_space); + if (!err) + err = alloc_userspace(sizeof(struct v4l2_buffer), + aux_space, &up_native); + if (!err) { + aux_buf = up_native + sizeof(struct v4l2_buffer); + err = get_v4l2_buffer32(up_native, up, + aux_buf, aux_space); + } compatible_arg = 0; break; case VIDIOC_S_FBUF: - err = get_v4l2_framebuffer32(&karg.v2fb, up); + err = alloc_userspace(sizeof(struct v4l2_framebuffer), 0, + &up_native); + if (!err) + err = get_v4l2_framebuffer32(up_native, up); compatible_arg = 0; break; case VIDIOC_G_FBUF: + err = alloc_userspace(sizeof(struct v4l2_framebuffer), 0, + &up_native); compatible_arg = 0; break; case VIDIOC_ENUMSTD: - err = get_v4l2_standard32(&karg.v2s, up); + err = alloc_userspace(sizeof(struct v4l2_standard), 0, + &up_native); + if (!err) + err = get_v4l2_standard32(up_native, up); compatible_arg = 0; break; case VIDIOC_ENUMINPUT: - err = get_v4l2_input32(&karg.v2i, up); + err = alloc_userspace(sizeof(struct v4l2_input), 0, &up_native); + if (!err) + err = get_v4l2_input32(up_native, up); compatible_arg = 0; break; case VIDIOC_G_EXT_CTRLS: case VIDIOC_S_EXT_CTRLS: case VIDIOC_TRY_EXT_CTRLS: - err = get_v4l2_ext_controls32(file, &karg.v2ecs, up); + err = bufsize_v4l2_ext_controls(up, &aux_space); + if (!err) + err = alloc_userspace(sizeof(struct v4l2_ext_controls), + aux_space, &up_native); + if (!err) { + aux_buf = up_native + sizeof(struct v4l2_ext_controls); + err = get_v4l2_ext_controls32(file, up_native, up, + aux_buf, aux_space); + } compatible_arg = 0; break; case VIDIOC_DQEVENT: + err = alloc_userspace(sizeof(struct v4l2_event), 0, &up_native); compatible_arg = 0; break; } @@ -948,25 +1168,26 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar if (compatible_arg) err = native_ioctl(file, cmd, (unsigned long)up); - else { - mm_segment_t old_fs = get_fs(); - - set_fs(KERNEL_DS); - err = native_ioctl(file, cmd, (unsigned long)&karg); - set_fs(old_fs); - } + else + err = native_ioctl(file, cmd, (unsigned long)up_native); if (err == -ENOTTY) return err; - /* Special case: even after an error we need to put the - results back for these ioctls since the error_idx will - contain information on which control failed. */ + /* + * Special case: even after an error we need to put the + * results back for these ioctls since the error_idx will + * contain information on which control failed. + */ switch (cmd) { case VIDIOC_G_EXT_CTRLS: case VIDIOC_S_EXT_CTRLS: case VIDIOC_TRY_EXT_CTRLS: - if (put_v4l2_ext_controls32(file, &karg.v2ecs, up)) + if (put_v4l2_ext_controls32(file, up_native, up)) + err = -EFAULT; + break; + case VIDIOC_S_EDID: + if (put_v4l2_edid32(up_native, up)) err = -EFAULT; break; } @@ -978,45 +1199,46 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar case VIDIOC_S_OUTPUT: case VIDIOC_G_INPUT: case VIDIOC_G_OUTPUT: - err = put_user(((s32)karg.vi), (s32 __user *)up); + if (assign_in_user((compat_uint_t __user *)up, + ((unsigned int __user *)up_native))) + err = -EFAULT; break; case VIDIOC_G_FBUF: - err = put_v4l2_framebuffer32(&karg.v2fb, up); + err = put_v4l2_framebuffer32(up_native, up); break; case VIDIOC_DQEVENT: - err = put_v4l2_event32(&karg.v2ev, up); + err = put_v4l2_event32(up_native, up); break; case VIDIOC_G_EDID: - case VIDIOC_S_EDID: - err = put_v4l2_edid32(&karg.v2edid, up); + err = put_v4l2_edid32(up_native, up); break; case VIDIOC_G_FMT: case VIDIOC_S_FMT: case VIDIOC_TRY_FMT: - err = put_v4l2_format32(&karg.v2f, up); + err = put_v4l2_format32(up_native, up); break; case VIDIOC_CREATE_BUFS: - err = put_v4l2_create32(&karg.v2crt, up); + err = put_v4l2_create32(up_native, up); break; case VIDIOC_PREPARE_BUF: case VIDIOC_QUERYBUF: case VIDIOC_QBUF: case VIDIOC_DQBUF: - err = put_v4l2_buffer32(&karg.v2b, up); + err = put_v4l2_buffer32(up_native, up); break; case VIDIOC_ENUMSTD: - err = put_v4l2_standard32(&karg.v2s, up); + err = put_v4l2_standard32(up_native, up); break; case VIDIOC_ENUMINPUT: - err = put_v4l2_input32(&karg.v2i, up); + err = put_v4l2_input32(up_native, up); break; } return err; From a96e820790253c8971ef08670c9c8dd43b619fda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Mon, 5 Feb 2018 11:15:52 +0200 Subject: [PATCH 444/705] crypto: caam - fix endless loop when DECO acquire fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 225ece3e7dad4cfc44cca38ce7a3a80f255ea8f1 upstream. In case DECO0 cannot be acquired - i.e. run_descriptor_deco0() fails with -ENODEV, caam_probe() enters an endless loop: run_descriptor_deco0 ret -ENODEV -> instantiate_rng -ENODEV, overwritten by -EAGAIN ret -EAGAIN -> caam_probe -EAGAIN results in endless loop It turns out the error path in instantiate_rng() is incorrect, the checks are done in the wrong order. Fixes: 1005bccd7a4a6 ("crypto: caam - enable instantiation of all RNG4 state handles") Reported-by: Bryan O'Donoghue Suggested-by: Auer Lukas Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/caam/ctrl.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 98468b96c32f..2ca101ac0c17 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -228,12 +228,16 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask, * without any error (HW optimizations for later * CAAM eras), then try again. */ - rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_IFMASK; - if ((status && status != JRSTA_SSRC_JUMP_HALT_CC) || - !(rdsta_val & (1 << sh_idx))) - ret = -EAGAIN; if (ret) break; + + rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_IFMASK; + if ((status && status != JRSTA_SSRC_JUMP_HALT_CC) || + !(rdsta_val & (1 << sh_idx))) { + ret = -EAGAIN; + break; + } + dev_info(ctrldev, "Instantiated RNG4 SH%d\n", sh_idx); /* Clear the contents before recreating the descriptor */ memset(desc, 0x00, CAAM_CMD_SZ * 7); From 68f2013e1ff864dad8bd09f1d346bce5535764bd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 24 Jan 2018 00:31:27 -0800 Subject: [PATCH 445/705] crypto: sha512-mb - initialize pending lengths correctly commit eff84b379089cd8b4e83599639c1f5f6e34ef7bf upstream. The SHA-512 multibuffer code keeps track of the number of blocks pending in each lane. The minimum of these values is used to identify the next lane that will be completed. Unused lanes are set to a large number (0xFFFFFFFF) so that they don't affect this calculation. However, it was forgotten to set the lengths to this value in the initial state, where all lanes are unused. As a result it was possible for sha512_mb_mgr_get_comp_job_avx2() to select an unused lane, causing a NULL pointer dereference. Specifically this could happen in the case where ->update() was passed fewer than SHA512_BLOCK_SIZE bytes of data, so it then called sha_complete_job() without having actually submitted any blocks to the multi-buffer code. This hit a NULL pointer dereference if another task happened to have submitted blocks concurrently to the same CPU and the flush timer had not yet expired. Fix this by initializing sha512_mb_mgr->lens correctly. As usual, this bug was found by syzkaller. Fixes: 45691e2d9b18 ("crypto: sha512-mb - submit/flush routines for AVX2") Reported-by: syzbot Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c index 36870b26067a..d08805032f01 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c +++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c @@ -57,10 +57,12 @@ void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state) { unsigned int j; - state->lens[0] = 0; - state->lens[1] = 1; - state->lens[2] = 2; - state->lens[3] = 3; + /* initially all lanes are unused */ + state->lens[0] = 0xFFFFFFFF00000000; + state->lens[1] = 0xFFFFFFFF00000001; + state->lens[2] = 0xFFFFFFFF00000002; + state->lens[3] = 0xFFFFFFFF00000003; + state->unused_lanes = 0xFF03020100; for (j = 0; j < 4; j++) state->ldata[j].job_in_lane = NULL; From 51e22c571fadce7d016979cf88d8411ee03032ea Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Feb 2018 17:56:06 +0000 Subject: [PATCH 446/705] arm: KVM: Fix SMCCC handling of unimplemented SMC/HVC calls commit 20e8175d246e9f9deb377f2784b3e7dfb2ad3e86 upstream. KVM doesn't follow the SMCCC when it comes to unimplemented calls, and inject an UNDEF instead of returning an error. Since firmware calls are now used for security mitigation, they are becoming more common, and the undef is counter productive. Instead, let's follow the SMCCC which states that -1 must be returned to the caller when getting an unknown function number. Tested-by: Ard Biesheuvel Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/handle_exit.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 42f5daf715d0..4e57ebca6e69 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -38,7 +38,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) ret = kvm_psci_call(vcpu); if (ret < 0) { - kvm_inject_undefined(vcpu); + vcpu_set_reg(vcpu, 0, ~0UL); return 1; } @@ -47,7 +47,16 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - kvm_inject_undefined(vcpu); + /* + * "If an SMC instruction executed at Non-secure EL1 is + * trapped to EL2 because HCR_EL2.TSC is 1, the exception is a + * Trap exception, not a Secure Monitor Call exception [...]" + * + * We need to advance the PC after the trap, as it would + * otherwise return to the same address... + */ + vcpu_set_reg(vcpu, 0, ~0UL); + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); return 1; } From ba88289e7acbf609b44e27883a621d8f0c3e4d04 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Thu, 9 Nov 2017 20:27:20 +0200 Subject: [PATCH 447/705] KVM: nVMX: Fix races when sending nested PI while dest enters/leaves L2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6b6977117f50d60455ace86b2d256f6fb4f3de05 upstream. Consider the following scenario: 1. CPU A calls vmx_deliver_nested_posted_interrupt() to send an IPI to CPU B via virtual posted-interrupt mechanism. 2. CPU B is currently executing L2 guest. 3. vmx_deliver_nested_posted_interrupt() calls kvm_vcpu_trigger_posted_interrupt() which will note that vcpu->mode == IN_GUEST_MODE. 4. Assume that before CPU A sends the physical POSTED_INTR_NESTED_VECTOR IPI, CPU B exits from L2 to L0 during event-delivery (valid IDT-vectoring-info). 5. CPU A now sends the physical IPI. The IPI is received in host and it's handler (smp_kvm_posted_intr_nested_ipi()) does nothing. 6. Assume that before CPU A sets pi_pending=true and KVM_REQ_EVENT, CPU B continues to run in L0 and reach vcpu_enter_guest(). As KVM_REQ_EVENT is not set yet, vcpu_enter_guest() will continue and resume L2 guest. 7. At this point, CPU A sets pi_pending=true and KVM_REQ_EVENT but it's too late! CPU B already entered L2 and KVM_REQ_EVENT will only be consumed at next L2 entry! Another scenario to consider: 1. CPU A calls vmx_deliver_nested_posted_interrupt() to send an IPI to CPU B via virtual posted-interrupt mechanism. 2. Assume that before CPU A calls kvm_vcpu_trigger_posted_interrupt(), CPU B is at L0 and is about to resume into L2. Further assume that it is in vcpu_enter_guest() after check for KVM_REQ_EVENT. 3. At this point, CPU A calls kvm_vcpu_trigger_posted_interrupt() which will note that vcpu->mode != IN_GUEST_MODE. Therefore, do nothing and return false. Then, will set pi_pending=true and KVM_REQ_EVENT. 4. Now CPU B continue and resumes into L2 guest without processing the posted-interrupt until next L2 entry! To fix both issues, we just need to change vmx_deliver_nested_posted_interrupt() to set pi_pending=true and KVM_REQ_EVENT before calling kvm_vcpu_trigger_posted_interrupt(). It will fix the first scenario by chaging step (6) to note that KVM_REQ_EVENT and pi_pending=true and therefore process nested posted-interrupt. It will fix the second scenario by two possible ways: 1. If kvm_vcpu_trigger_posted_interrupt() is called while CPU B has changed vcpu->mode to IN_GUEST_MODE, physical IPI will be sent and will be received when CPU resumes into L2. 2. If kvm_vcpu_trigger_posted_interrupt() is called while CPU B hasn't yet changed vcpu->mode to IN_GUEST_MODE, then after CPU B will change vcpu->mode it will call kvm_request_pending() which will return true and therefore force another round of vcpu_enter_guest() which will note that KVM_REQ_EVENT and pi_pending=true and therefore process nested posted-interrupt. Fixes: 705699a13994 ("KVM: nVMX: Enable nested posted interrupt processing") Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Reviewed-by: Krish Sadhukhan [Add kvm_vcpu_kick to also handle the case where L1 doesn't intercept L2 HLT and L2 executes HLT instruction. - Paolo] Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d49da86e3099..d66224e695cf 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4967,14 +4967,15 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, if (is_guest_mode(vcpu) && vector == vmx->nested.posted_intr_nv) { - /* the PIR and ON have been set by L1. */ - kvm_vcpu_trigger_posted_interrupt(vcpu); /* * If a posted intr is not recognized by hardware, * we will accomplish it in the next vmentry. */ vmx->nested.pi_pending = true; kvm_make_request(KVM_REQ_EVENT, vcpu); + /* the PIR and ON have been set by L1. */ + if (!kvm_vcpu_trigger_posted_interrupt(vcpu)) + kvm_vcpu_kick(vcpu); return 0; } return -1; From f6741799aa5321c52a83ff5d9ce753165774d63c Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 22 Jan 2018 18:19:06 +0000 Subject: [PATCH 448/705] KVM: arm/arm64: Handle CPU_PM_ENTER_FAILED commit 58d6b15e9da5042a99c9c30ad725792e4569150e upstream. cpu_pm_enter() calls the pm notifier chain with CPU_PM_ENTER, then if there is a failure: CPU_PM_ENTER_FAILED. When KVM receives CPU_PM_ENTER it calls cpu_hyp_reset() which will return us to the hyp-stub. If we subsequently get a CPU_PM_ENTER_FAILED, KVM does nothing, leaving the CPU running with the hyp-stub, at odds with kvm_arm_hardware_enabled. Add CPU_PM_ENTER_FAILED as a fallthrough for CPU_PM_EXIT, this reloads KVM based on kvm_arm_hardware_enabled. This is safe even if CPU_PM_ENTER never gets as far as KVM, as cpu_hyp_reinit() calls cpu_hyp_reset() to make sure the hyp-stub is loaded before reloading KVM. Fixes: 67f691976662 ("arm64: kvm: allows kvm cpu hotplug") CC: Lorenzo Pieralisi Reviewed-by: Christoffer Dall Signed-off-by: James Morse Signed-off-by: Christoffer Dall Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/arm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 19b5f5c1c0ff..c38bfbeec306 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -1165,6 +1165,7 @@ static int hyp_init_cpu_pm_notifier(struct notifier_block *self, cpu_hyp_reset(); return NOTIFY_OK; + case CPU_PM_ENTER_FAILED: case CPU_PM_EXIT: if (__this_cpu_read(kvm_arm_hardware_enabled)) /* The hardware was enabled before suspend. */ From 76376783a453c6893d38266cbbaed9fc0d3b7f74 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Mon, 8 Jan 2018 16:01:04 +0000 Subject: [PATCH 449/705] ASoC: rockchip: i2s: fix playback after runtime resume commit c66234cfedfc3e6e3b62563a5f2c1562be09a35d upstream. When restoring registers during runtime resume, we must not write to I2S_TXDR which is the transmit FIFO as this queues up a sample to be output and pushes all of the output channels down by one. This can be demonstrated with the speaker-test utility: for i in a b c; do speaker-test -c 2 -s 1; done which should play a test through the left speaker three times but if the I2S hardware starts runtime suspended the first sample will be played through the right speaker. Fix this by marking I2S_TXDR as volatile (which also requires marking it as readble, even though it technically isn't). This seems to be the most robust fix, the alternative of giving I2S_TXDR a default value is more fragile since it does not prevent regcache writing to the register in all circumstances. While here, also fix the configuration of I2S_RXDR and I2S_FIFOLR; these are not writable so they do not suffer from the same problem as I2S_TXDR but reading from I2S_RXDR does suffer from a similar problem. Fixes: f0447f6cbb20 ("ASoC: rockchip: i2s: restore register during runtime_suspend/resume cycle", 2016-09-07) Signed-off-by: John Keeping Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/rockchip/rockchip_i2s.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/soc/rockchip/rockchip_i2s.c b/sound/soc/rockchip/rockchip_i2s.c index 974915cb4c4f..08bfee447a36 100644 --- a/sound/soc/rockchip/rockchip_i2s.c +++ b/sound/soc/rockchip/rockchip_i2s.c @@ -476,6 +476,7 @@ static bool rockchip_i2s_rd_reg(struct device *dev, unsigned int reg) case I2S_INTCR: case I2S_XFER: case I2S_CLR: + case I2S_TXDR: case I2S_RXDR: case I2S_FIFOLR: case I2S_INTSR: @@ -490,6 +491,9 @@ static bool rockchip_i2s_volatile_reg(struct device *dev, unsigned int reg) switch (reg) { case I2S_INTSR: case I2S_CLR: + case I2S_FIFOLR: + case I2S_TXDR: + case I2S_RXDR: return true; default: return false; @@ -499,6 +503,8 @@ static bool rockchip_i2s_volatile_reg(struct device *dev, unsigned int reg) static bool rockchip_i2s_precious_reg(struct device *dev, unsigned int reg) { switch (reg) { + case I2S_RXDR: + return true; default: return false; } From eb10c5973eb2bf293fdfb1421577afb8acd37555 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 3 Jan 2018 16:38:46 +0100 Subject: [PATCH 450/705] ASoC: skl: Fix kernel warning due to zero NHTL entry commit 20a1ea2222e7cbf96e9bf8579362e971491e6aea upstream. I got the following kernel warning when loading snd-soc-skl module on Dell Latitude 7270 laptop: memremap attempted on mixed range 0x0000000000000000 size: 0x0 WARNING: CPU: 0 PID: 484 at kernel/memremap.c:98 memremap+0x8a/0x180 Call Trace: skl_nhlt_init+0x82/0xf0 [snd_soc_skl] skl_probe+0x2ee/0x7c0 [snd_soc_skl] .... It seems that the machine doesn't support the SKL DSP gives the empty NHLT entry, and it triggers the warning. For avoiding it, let do the zero check before calling memremap(). Signed-off-by: Takashi Iwai Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/skylake/skl-nhlt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c index 3f8e6f0b7eb5..dcf03691ebc8 100644 --- a/sound/soc/intel/skylake/skl-nhlt.c +++ b/sound/soc/intel/skylake/skl-nhlt.c @@ -41,7 +41,8 @@ struct nhlt_acpi_table *skl_nhlt_init(struct device *dev) obj = acpi_evaluate_dsm(handle, OSC_UUID, 1, 1, NULL); if (obj && obj->type == ACPI_TYPE_BUFFER) { nhlt_ptr = (struct nhlt_resource_desc *)obj->buffer.pointer; - nhlt_table = (struct nhlt_acpi_table *) + if (nhlt_ptr->length) + nhlt_table = (struct nhlt_acpi_table *) memremap(nhlt_ptr->min_addr, nhlt_ptr->length, MEMREMAP_WB); ACPI_FREE(obj); From b7f9df60f4107d683bfda88dbc2c83039ee89ce8 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 1 Jan 2018 18:26:47 +0100 Subject: [PATCH 451/705] watchdog: imx2_wdt: restore previous timeout after suspend+resume commit 0be267255cef64e1c58475baa7b25568355a3816 upstream. When the watchdog device is suspended, its timeout is set to the maximum value. During resume, the previously set timeout should be restored. This does not work at the moment. The suspend function calls imx2_wdt_set_timeout(wdog, IMX2_WDT_MAX_TIME); and resume reverts this by calling imx2_wdt_set_timeout(wdog, wdog->timeout); However, imx2_wdt_set_timeout() updates wdog->timeout. Therefore, wdog->timeout is set to IMX2_WDT_MAX_TIME when we enter the resume function. Fix this by adding a new function __imx2_wdt_set_timeout() which only updates the hardware settings. imx2_wdt_set_timeout() now calls __imx2_wdt_set_timeout() and then saves the new timeout to wdog->timeout. During suspend, we call __imx2_wdt_set_timeout() directly so that wdog->timeout won't be updated and we can restore the previous value during resume. This approach makes wdog->timeout different from the actual setting in the hardware which is usually not a good thing. However, the two differ only while we're suspended and no kernel code is running, so it should be ok in this case. Signed-off-by: Martin Kaiser Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Greg Kroah-Hartman --- drivers/watchdog/imx2_wdt.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/watchdog/imx2_wdt.c b/drivers/watchdog/imx2_wdt.c index 4874b0f18650..518dfa1047cb 100644 --- a/drivers/watchdog/imx2_wdt.c +++ b/drivers/watchdog/imx2_wdt.c @@ -169,15 +169,21 @@ static int imx2_wdt_ping(struct watchdog_device *wdog) return 0; } -static int imx2_wdt_set_timeout(struct watchdog_device *wdog, - unsigned int new_timeout) +static void __imx2_wdt_set_timeout(struct watchdog_device *wdog, + unsigned int new_timeout) { struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog); - wdog->timeout = new_timeout; - regmap_update_bits(wdev->regmap, IMX2_WDT_WCR, IMX2_WDT_WCR_WT, WDOG_SEC_TO_COUNT(new_timeout)); +} + +static int imx2_wdt_set_timeout(struct watchdog_device *wdog, + unsigned int new_timeout) +{ + __imx2_wdt_set_timeout(wdog, new_timeout); + + wdog->timeout = new_timeout; return 0; } @@ -371,7 +377,11 @@ static int imx2_wdt_suspend(struct device *dev) /* The watchdog IP block is running */ if (imx2_wdt_is_running(wdev)) { - imx2_wdt_set_timeout(wdog, IMX2_WDT_MAX_TIME); + /* + * Don't update wdog->timeout, we'll restore the current value + * during resume. + */ + __imx2_wdt_set_timeout(wdog, IMX2_WDT_MAX_TIME); imx2_wdt_ping(wdog); } From d1d85ae79d5e5592dccba6890658c0999b864ddc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 30 Nov 2017 11:55:46 -0500 Subject: [PATCH 452/705] media: dvb-frontends: fix i2c access helpers for KASAN commit 3cd890dbe2a4f14cc44c85bb6cf37e5e22d4dd0e upstream. A typical code fragment was copied across many dvb-frontend drivers and causes large stack frames when built with with CONFIG_KASAN on gcc-5/6/7: drivers/media/dvb-frontends/cxd2841er.c:3225:1: error: the frame size of 3992 bytes is larger than 3072 bytes [-Werror=frame-larger-than=] drivers/media/dvb-frontends/cxd2841er.c:3404:1: error: the frame size of 3136 bytes is larger than 3072 bytes [-Werror=frame-larger-than=] drivers/media/dvb-frontends/stv0367.c:3143:1: error: the frame size of 4016 bytes is larger than 3072 bytes [-Werror=frame-larger-than=] drivers/media/dvb-frontends/stv090x.c:3430:1: error: the frame size of 5312 bytes is larger than 3072 bytes [-Werror=frame-larger-than=] drivers/media/dvb-frontends/stv090x.c:4248:1: error: the frame size of 4872 bytes is larger than 3072 bytes [-Werror=frame-larger-than=] gcc-8 now solves this by consolidating the stack slots for the argument variables, but on older compilers we can get the same behavior by taking the pointer of a local variable rather than the inline function argument. Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 Signed-off-by: Arnd Bergmann Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-frontends/ascot2e.c | 4 +++- drivers/media/dvb-frontends/cxd2841er.c | 4 +++- drivers/media/dvb-frontends/helene.c | 4 +++- drivers/media/dvb-frontends/horus3a.c | 4 +++- drivers/media/dvb-frontends/itd1000.c | 5 +++-- drivers/media/dvb-frontends/mt312.c | 5 ++++- drivers/media/dvb-frontends/stb0899_drv.c | 3 ++- drivers/media/dvb-frontends/stb6100.c | 6 ++++-- drivers/media/dvb-frontends/stv0367.c | 4 +++- drivers/media/dvb-frontends/stv090x.c | 4 +++- drivers/media/dvb-frontends/stv6110x.c | 4 +++- drivers/media/dvb-frontends/zl10039.c | 4 +++- 12 files changed, 37 insertions(+), 14 deletions(-) diff --git a/drivers/media/dvb-frontends/ascot2e.c b/drivers/media/dvb-frontends/ascot2e.c index ad304eed656d..c61227cfff25 100644 --- a/drivers/media/dvb-frontends/ascot2e.c +++ b/drivers/media/dvb-frontends/ascot2e.c @@ -155,7 +155,9 @@ static int ascot2e_write_regs(struct ascot2e_priv *priv, static int ascot2e_write_reg(struct ascot2e_priv *priv, u8 reg, u8 val) { - return ascot2e_write_regs(priv, reg, &val, 1); + u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return ascot2e_write_regs(priv, reg, &tmp, 1); } static int ascot2e_read_regs(struct ascot2e_priv *priv, diff --git a/drivers/media/dvb-frontends/cxd2841er.c b/drivers/media/dvb-frontends/cxd2841er.c index fd0f25ee251f..b97647cd7dc6 100644 --- a/drivers/media/dvb-frontends/cxd2841er.c +++ b/drivers/media/dvb-frontends/cxd2841er.c @@ -261,7 +261,9 @@ static int cxd2841er_write_regs(struct cxd2841er_priv *priv, static int cxd2841er_write_reg(struct cxd2841er_priv *priv, u8 addr, u8 reg, u8 val) { - return cxd2841er_write_regs(priv, addr, reg, &val, 1); + u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return cxd2841er_write_regs(priv, addr, reg, &tmp, 1); } static int cxd2841er_read_regs(struct cxd2841er_priv *priv, diff --git a/drivers/media/dvb-frontends/helene.c b/drivers/media/dvb-frontends/helene.c index dc43c5f6d0ea..e06bcd4b3ddc 100644 --- a/drivers/media/dvb-frontends/helene.c +++ b/drivers/media/dvb-frontends/helene.c @@ -331,7 +331,9 @@ static int helene_write_regs(struct helene_priv *priv, static int helene_write_reg(struct helene_priv *priv, u8 reg, u8 val) { - return helene_write_regs(priv, reg, &val, 1); + u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return helene_write_regs(priv, reg, &tmp, 1); } static int helene_read_regs(struct helene_priv *priv, diff --git a/drivers/media/dvb-frontends/horus3a.c b/drivers/media/dvb-frontends/horus3a.c index 0c089b5986a1..4ebddc895137 100644 --- a/drivers/media/dvb-frontends/horus3a.c +++ b/drivers/media/dvb-frontends/horus3a.c @@ -89,7 +89,9 @@ static int horus3a_write_regs(struct horus3a_priv *priv, static int horus3a_write_reg(struct horus3a_priv *priv, u8 reg, u8 val) { - return horus3a_write_regs(priv, reg, &val, 1); + u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return horus3a_write_regs(priv, reg, &tmp, 1); } static int horus3a_enter_power_save(struct horus3a_priv *priv) diff --git a/drivers/media/dvb-frontends/itd1000.c b/drivers/media/dvb-frontends/itd1000.c index cadcae4cff89..ac9d2591bb6f 100644 --- a/drivers/media/dvb-frontends/itd1000.c +++ b/drivers/media/dvb-frontends/itd1000.c @@ -99,8 +99,9 @@ static int itd1000_read_reg(struct itd1000_state *state, u8 reg) static inline int itd1000_write_reg(struct itd1000_state *state, u8 r, u8 v) { - int ret = itd1000_write_regs(state, r, &v, 1); - state->shadow[r] = v; + u8 tmp = v; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + int ret = itd1000_write_regs(state, r, &tmp, 1); + state->shadow[r] = tmp; return ret; } diff --git a/drivers/media/dvb-frontends/mt312.c b/drivers/media/dvb-frontends/mt312.c index fc08429c99b7..7824926a3744 100644 --- a/drivers/media/dvb-frontends/mt312.c +++ b/drivers/media/dvb-frontends/mt312.c @@ -142,7 +142,10 @@ static inline int mt312_readreg(struct mt312_state *state, static inline int mt312_writereg(struct mt312_state *state, const enum mt312_reg_addr reg, const u8 val) { - return mt312_write(state, reg, &val, 1); + u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + + return mt312_write(state, reg, &tmp, 1); } static inline u32 mt312_div(u32 a, u32 b) diff --git a/drivers/media/dvb-frontends/stb0899_drv.c b/drivers/media/dvb-frontends/stb0899_drv.c index 3d171b0e00c2..3deddbcaa8b7 100644 --- a/drivers/media/dvb-frontends/stb0899_drv.c +++ b/drivers/media/dvb-frontends/stb0899_drv.c @@ -552,7 +552,8 @@ int stb0899_write_regs(struct stb0899_state *state, unsigned int reg, u8 *data, int stb0899_write_reg(struct stb0899_state *state, unsigned int reg, u8 data) { - return stb0899_write_regs(state, reg, &data, 1); + u8 tmp = data; + return stb0899_write_regs(state, reg, &tmp, 1); } /* diff --git a/drivers/media/dvb-frontends/stb6100.c b/drivers/media/dvb-frontends/stb6100.c index 5add1182c3ca..4746b1e0d637 100644 --- a/drivers/media/dvb-frontends/stb6100.c +++ b/drivers/media/dvb-frontends/stb6100.c @@ -226,12 +226,14 @@ static int stb6100_write_reg_range(struct stb6100_state *state, u8 buf[], int st static int stb6100_write_reg(struct stb6100_state *state, u8 reg, u8 data) { + u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + if (unlikely(reg >= STB6100_NUMREGS)) { dprintk(verbose, FE_ERROR, 1, "Invalid register offset 0x%x", reg); return -EREMOTEIO; } - data = (data & stb6100_template[reg].mask) | stb6100_template[reg].set; - return stb6100_write_reg_range(state, &data, reg, 1); + tmp = (tmp & stb6100_template[reg].mask) | stb6100_template[reg].set; + return stb6100_write_reg_range(state, &tmp, reg, 1); } diff --git a/drivers/media/dvb-frontends/stv0367.c b/drivers/media/dvb-frontends/stv0367.c index abc379aea713..94cec81d0a5c 100644 --- a/drivers/media/dvb-frontends/stv0367.c +++ b/drivers/media/dvb-frontends/stv0367.c @@ -804,7 +804,9 @@ int stv0367_writeregs(struct stv0367_state *state, u16 reg, u8 *data, int len) static int stv0367_writereg(struct stv0367_state *state, u16 reg, u8 data) { - return stv0367_writeregs(state, reg, &data, 1); + u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return stv0367_writeregs(state, reg, &tmp, 1); } static u8 stv0367_readreg(struct stv0367_state *state, u16 reg) diff --git a/drivers/media/dvb-frontends/stv090x.c b/drivers/media/dvb-frontends/stv090x.c index 25bdf6e0f963..f0377e2b341b 100644 --- a/drivers/media/dvb-frontends/stv090x.c +++ b/drivers/media/dvb-frontends/stv090x.c @@ -761,7 +761,9 @@ static int stv090x_write_regs(struct stv090x_state *state, unsigned int reg, u8 static int stv090x_write_reg(struct stv090x_state *state, unsigned int reg, u8 data) { - return stv090x_write_regs(state, reg, &data, 1); + u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return stv090x_write_regs(state, reg, &tmp, 1); } static int stv090x_i2c_gate_ctrl(struct stv090x_state *state, int enable) diff --git a/drivers/media/dvb-frontends/stv6110x.c b/drivers/media/dvb-frontends/stv6110x.c index c611ad210b5c..924f16fee1fb 100644 --- a/drivers/media/dvb-frontends/stv6110x.c +++ b/drivers/media/dvb-frontends/stv6110x.c @@ -97,7 +97,9 @@ static int stv6110x_write_regs(struct stv6110x_state *stv6110x, int start, u8 da static int stv6110x_write_reg(struct stv6110x_state *stv6110x, u8 reg, u8 data) { - return stv6110x_write_regs(stv6110x, reg, &data, 1); + u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return stv6110x_write_regs(stv6110x, reg, &tmp, 1); } static int stv6110x_init(struct dvb_frontend *fe) diff --git a/drivers/media/dvb-frontends/zl10039.c b/drivers/media/dvb-frontends/zl10039.c index f8c271be196c..0d2bef62ff05 100644 --- a/drivers/media/dvb-frontends/zl10039.c +++ b/drivers/media/dvb-frontends/zl10039.c @@ -138,7 +138,9 @@ static inline int zl10039_writereg(struct zl10039_state *state, const enum zl10039_reg_addr reg, const u8 val) { - return zl10039_write(state, reg, &val, 1); + const u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return zl10039_write(state, reg, &tmp, 1); } static int zl10039_init(struct dvb_frontend *fe) From b2e7c63cad18341525643716bc62e5ac4ee50485 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 10 Jan 2018 07:20:39 -0500 Subject: [PATCH 453/705] media: ts2020: avoid integer overflows on 32 bit machines commit 81742be14b6a90c9fd0ff6eb4218bdf696ad8e46 upstream. Before this patch, when compiled for arm32, the signal strength were reported as: Lock (0x1f) Signal= 4294908.66dBm C/N= 12.79dB Because of a 32 bit integer overflow. After it, it is properly reported as: Lock (0x1f) Signal= -58.64dBm C/N= 12.79dB Fixes: 0f91c9d6bab9 ("[media] TS2020: Calculate tuner gain correctly") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-frontends/ts2020.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/dvb-frontends/ts2020.c b/drivers/media/dvb-frontends/ts2020.c index a9f6bbea6df3..103b9c824f1f 100644 --- a/drivers/media/dvb-frontends/ts2020.c +++ b/drivers/media/dvb-frontends/ts2020.c @@ -369,7 +369,7 @@ static int ts2020_read_tuner_gain(struct dvb_frontend *fe, unsigned v_agc, gain2 = clamp_t(long, gain2, 0, 13); v_agc = clamp_t(long, v_agc, 400, 1100); - *_gain = -(gain1 * 2330 + + *_gain = -((__s64)gain1 * 2330 + gain2 * 3500 + v_agc * 24 / 10 * 10 + 10000); @@ -387,7 +387,7 @@ static int ts2020_read_tuner_gain(struct dvb_frontend *fe, unsigned v_agc, gain3 = clamp_t(long, gain3, 0, 6); v_agc = clamp_t(long, v_agc, 600, 1600); - *_gain = -(gain1 * 2650 + + *_gain = -((__s64)gain1 * 2650 + gain2 * 3380 + gain3 * 2850 + v_agc * 176 / 100 * 10 - From 1666d38f4ed6cbf2bacaad21f04d77ca64c4808a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 24 Jan 2018 06:01:57 -0500 Subject: [PATCH 454/705] media: cxusb, dib0700: ignore XC2028_I2C_FLUSH commit 9893b905e743ded332575ca04486bd586c0772f7 upstream. The XC2028_I2C_FLUSH only needs to be implemented on a few devices. Others can safely ignore it. That prevents filling the dmesg with lots of messages like: dib0700: stk7700ph_xc3028_callback: unknown command 2, arg 0 Fixes: 4d37ece757a8 ("[media] tuner/xc2028: Add I2C flush callback") Reported-by: Enrico Mioso Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb/cxusb.c | 2 ++ drivers/media/usb/dvb-usb/dib0700_devices.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/media/usb/dvb-usb/cxusb.c b/drivers/media/usb/dvb-usb/cxusb.c index 9fd43a37154c..b20f03d86e00 100644 --- a/drivers/media/usb/dvb-usb/cxusb.c +++ b/drivers/media/usb/dvb-usb/cxusb.c @@ -820,6 +820,8 @@ static int dvico_bluebird_xc2028_callback(void *ptr, int component, case XC2028_RESET_CLK: deb_info("%s: XC2028_RESET_CLK %d\n", __func__, arg); break; + case XC2028_I2C_FLUSH: + break; default: deb_info("%s: unknown command %d, arg %d\n", __func__, command, arg); diff --git a/drivers/media/usb/dvb-usb/dib0700_devices.c b/drivers/media/usb/dvb-usb/dib0700_devices.c index caa55402052e..2868766893c8 100644 --- a/drivers/media/usb/dvb-usb/dib0700_devices.c +++ b/drivers/media/usb/dvb-usb/dib0700_devices.c @@ -431,6 +431,7 @@ static int stk7700ph_xc3028_callback(void *ptr, int component, state->dib7000p_ops.set_gpio(adap->fe_adap[0].fe, 8, 0, 1); break; case XC2028_RESET_CLK: + case XC2028_I2C_FLUSH: break; default: err("%s: unknown command %d, arg %d\n", __func__, From da3b224658d3d8aa8ec7877fb1fe808b4b4da029 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 6 Feb 2018 15:37:13 -0800 Subject: [PATCH 455/705] fs/proc/kcore.c: use probe_kernel_read() instead of memcpy() commit d0290bc20d4739b7a900ae37eb5d4cc3be2b393f upstream. Commit df04abfd181a ("fs/proc/kcore.c: Add bounce buffer for ktext data") added a bounce buffer to avoid hardened usercopy checks. Copying to the bounce buffer was implemented with a simple memcpy() assuming that it is always valid to read from kernel memory iff the kern_addr_valid() check passed. A simple, but pointless, test case like "dd if=/proc/kcore of=/dev/null" now can easily crash the kernel, since the former execption handling on invalid kernel addresses now doesn't work anymore. Also adding a kern_addr_valid() implementation wouldn't help here. Most architectures simply return 1 here, while a couple implemented a page table walk to figure out if something is mapped at the address in question. With DEBUG_PAGEALLOC active mappings are established and removed all the time, so that relying on the result of kern_addr_valid() before executing the memcpy() also doesn't work. Therefore simply use probe_kernel_read() to copy to the bounce buffer. This also allows to simplify read_kcore(). At least on s390 this fixes the observed crashes and doesn't introduce warnings that were removed with df04abfd181a ("fs/proc/kcore.c: Add bounce buffer for ktext data"), even though the generic probe_kernel_read() implementation uses uaccess functions. While looking into this I'm also wondering if kern_addr_valid() could be completely removed...(?) Link: http://lkml.kernel.org/r/20171202132739.99971-1-heiko.carstens@de.ibm.com Fixes: df04abfd181a ("fs/proc/kcore.c: Add bounce buffer for ktext data") Fixes: f5509cc18daa ("mm: Hardened usercopy") Signed-off-by: Heiko Carstens Acked-by: Kees Cook Cc: Jiri Olsa Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/proc/kcore.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 5c89a07e3d7f..df7e07986ead 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -507,23 +507,15 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) return -EFAULT; } else { if (kern_addr_valid(start)) { - unsigned long n; - /* * Using bounce buffer to bypass the * hardened user copy kernel text checks. */ - memcpy(buf, (char *) start, tsz); - n = copy_to_user(buffer, buf, tsz); - /* - * We cannot distinguish between fault on source - * and fault on destination. When this happens - * we clear too and hope it will trigger the - * EFAULT again. - */ - if (n) { - if (clear_user(buffer + tsz - n, - n)) + if (probe_kernel_read(buf, (void *) start, tsz)) { + if (clear_user(buffer, tsz)) + return -EFAULT; + } else { + if (copy_to_user(buffer, buf, tsz)) return -EFAULT; } } else { From 33a4459bdef12df44f31abbf645489a39fcff321 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 6 Feb 2018 15:37:55 -0800 Subject: [PATCH 456/705] kernel/async.c: revert "async: simplify lowest_in_progress()" commit 4f7e988e63e336827f4150de48163bed05d653bd upstream. This reverts commit 92266d6ef60c ("async: simplify lowest_in_progress()") which was simply wrong: In the case where domain is NULL, we now use the wrong offsetof() in the list_first_entry macro, so we don't actually fetch the ->cookie value, but rather the eight bytes located sizeof(struct list_head) further into the struct async_entry. On 64 bit, that's the data member, while on 32 bit, that's a u64 built from func and data in some order. I think the bug happens to be harmless in practice: It obviously only affects callers which pass a NULL domain, and AFAICT the only such caller is async_synchronize_full() -> async_synchronize_full_domain(NULL) -> async_synchronize_cookie_domain(ASYNC_COOKIE_MAX, NULL) and the ASYNC_COOKIE_MAX means that in practice we end up waiting for the async_global_pending list to be empty - but it would break if somebody happened to pass (void*)-1 as the data element to async_schedule, and of course also if somebody ever does a async_synchronize_cookie_domain(, NULL) with a "finite" cookie value. Maybe the "harmless in practice" means this isn't -stable material. But I'm not completely confident my quick git grep'ing is enough, and there might be affected code in one of the earlier kernels that has since been removed, so I'll leave the decision to the stable guys. Link: http://lkml.kernel.org/r/20171128104938.3921-1-linux@rasmusvillemoes.dk Fixes: 92266d6ef60c "async: simplify lowest_in_progress()" Signed-off-by: Rasmus Villemoes Acked-by: Tejun Heo Cc: Arjan van de Ven Cc: Adam Wallis Cc: Lai Jiangshan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/async.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/kernel/async.c b/kernel/async.c index d2edd6efec56..d84d4860992e 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -84,20 +84,24 @@ static atomic_t entry_count; static async_cookie_t lowest_in_progress(struct async_domain *domain) { - struct list_head *pending; + struct async_entry *first = NULL; async_cookie_t ret = ASYNC_COOKIE_MAX; unsigned long flags; spin_lock_irqsave(&async_lock, flags); - if (domain) - pending = &domain->pending; - else - pending = &async_global_pending; + if (domain) { + if (!list_empty(&domain->pending)) + first = list_first_entry(&domain->pending, + struct async_entry, domain_list); + } else { + if (!list_empty(&async_global_pending)) + first = list_first_entry(&async_global_pending, + struct async_entry, global_list); + } - if (!list_empty(pending)) - ret = list_first_entry(pending, struct async_entry, - domain_list)->cookie; + if (first) + ret = first->cookie; spin_unlock_irqrestore(&async_lock, flags); return ret; From 91cebf98cd940099a25a351558e52003f53a7e25 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 6 Feb 2018 15:40:24 -0800 Subject: [PATCH 457/705] kernel/relay.c: revert "kernel/relay.c: fix potential memory leak" commit a1be1f3931bfe0a42b46fef77a04593c2b136e7f upstream. This reverts commit ba62bafe942b ("kernel/relay.c: fix potential memory leak"). This commit introduced a double free bug, because 'chan' is already freed by the line: kref_put(&chan->kref, relay_destroy_channel); This bug was found by syzkaller, using the BLKTRACESETUP ioctl. Link: http://lkml.kernel.org/r/20180127004759.101823-1-ebiggers3@gmail.com Fixes: ba62bafe942b ("kernel/relay.c: fix potential memory leak") Signed-off-by: Eric Biggers Reported-by: syzbot Reviewed-by: Andrew Morton Cc: Zhouyi Zhou Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/relay.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/relay.c b/kernel/relay.c index 8f18d314a96a..2603e04f55f9 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -611,7 +611,6 @@ free_bufs: kref_put(&chan->kref, relay_destroy_channel); mutex_unlock(&relay_channels_mutex); - kfree(chan); return NULL; } EXPORT_SYMBOL_GPL(relay_open); From a705c24b5d5012be1165f0bc43997babd9ed2614 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 6 Feb 2018 15:41:53 -0800 Subject: [PATCH 458/705] pipe: actually allow root to exceed the pipe buffer limits commit 85c2dd5473b2718b4b63e74bfeb1ca876868e11f upstream. pipe-user-pages-hard and pipe-user-pages-soft are only supposed to apply to unprivileged users, as documented in both Documentation/sysctl/fs.txt and the pipe(7) man page. However, the capabilities are actually only checked when increasing a pipe's size using F_SETPIPE_SZ, not when creating a new pipe. Therefore, if pipe-user-pages-hard has been set, the root user can run into it and be unable to create pipes. Similarly, if pipe-user-pages-soft has been set, the root user can run into it and have their pipes limited to 1 page each. Fix this by allowing the privileged override in both cases. Link: http://lkml.kernel.org/r/20180111052902.14409-4-ebiggers3@gmail.com Fixes: 759c01142a5d ("pipe: limit the per-user amount of pages allocated in pipes") Signed-off-by: Eric Biggers Acked-by: Kees Cook Acked-by: Joe Lawrence Cc: Alexander Viro Cc: "Luis R . Rodriguez" Cc: Michael Kerrisk Cc: Mikulas Patocka Cc: Willy Tarreau Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/pipe.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 9faecf1b4a27..54e803f89743 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -617,6 +617,11 @@ static bool too_many_pipe_buffers_hard(unsigned long user_bufs) return pipe_user_pages_hard && user_bufs >= pipe_user_pages_hard; } +static bool is_unprivileged_user(void) +{ + return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); +} + struct pipe_inode_info *alloc_pipe_info(void) { struct pipe_inode_info *pipe; @@ -633,12 +638,12 @@ struct pipe_inode_info *alloc_pipe_info(void) user_bufs = account_pipe_buffers(user, 0, pipe_bufs); - if (too_many_pipe_buffers_soft(user_bufs)) { + if (too_many_pipe_buffers_soft(user_bufs) && is_unprivileged_user()) { user_bufs = account_pipe_buffers(user, pipe_bufs, 1); pipe_bufs = 1; } - if (too_many_pipe_buffers_hard(user_bufs)) + if (too_many_pipe_buffers_hard(user_bufs) && is_unprivileged_user()) goto out_revert_acct; pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer), @@ -1069,7 +1074,7 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) if (nr_pages > pipe->buffers && (too_many_pipe_buffers_hard(user_bufs) || too_many_pipe_buffers_soft(user_bufs)) && - !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) { + is_unprivileged_user()) { ret = -EPERM; goto out_revert_acct; } From 71baf27d8c2b3db340e07412756093474be90883 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 6 Feb 2018 15:41:56 -0800 Subject: [PATCH 459/705] pipe: fix off-by-one error when checking buffer limits commit 9903a91c763ecdae333a04a9d89d79d2b8966503 upstream. With pipe-user-pages-hard set to 'N', users were actually only allowed up to 'N - 1' buffers; and likewise for pipe-user-pages-soft. Fix this to allow up to 'N' buffers, as would be expected. Link: http://lkml.kernel.org/r/20180111052902.14409-5-ebiggers3@gmail.com Fixes: b0b91d18e2e9 ("pipe: fix limit checking in pipe_set_size()") Signed-off-by: Eric Biggers Acked-by: Willy Tarreau Acked-by: Kees Cook Acked-by: Joe Lawrence Cc: Alexander Viro Cc: "Luis R . Rodriguez" Cc: Michael Kerrisk Cc: Mikulas Patocka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/pipe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 54e803f89743..34345535f63d 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -609,12 +609,12 @@ static unsigned long account_pipe_buffers(struct user_struct *user, static bool too_many_pipe_buffers_soft(unsigned long user_bufs) { - return pipe_user_pages_soft && user_bufs >= pipe_user_pages_soft; + return pipe_user_pages_soft && user_bufs > pipe_user_pages_soft; } static bool too_many_pipe_buffers_hard(unsigned long user_bufs) { - return pipe_user_pages_hard && user_bufs >= pipe_user_pages_hard; + return pipe_user_pages_hard && user_bufs > pipe_user_pages_hard; } static bool is_unprivileged_user(void) From df9658e806052cb9f2b0f9bbfb2aaac5bc9e9993 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 17 Jan 2018 21:05:55 +0100 Subject: [PATCH 460/705] HID: quirks: Fix keyboard + touchpad on Toshiba Click Mini not working commit edfc3722cfef4217c7fe92b272cbe0288ba1ff57 upstream. The Toshiba Click Mini uses an i2c attached keyboard/touchpad combo (single i2c_hid device for both) which has a vid:pid of 04F3:0401, which is also used by a bunch of Elan touchpads which are handled by the drivers/input/mouse/elan_i2c driver, but that driver deals with pure touchpads and does not work for a combo device such as the one on the Toshiba Click Mini. The combo on the Mini has an ACPI id of ELAN0800, which is not claimed by the elan_i2c driver, so check for that and if it is found do not ignore the device. This fixes the keyboard/touchpad combo on the Mini not working (although with the touchpad in mouse emulation mode). Signed-off-by: Hans de Goede Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-core.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index e32862ca5223..03cac5731afc 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -2365,7 +2365,6 @@ static const struct hid_device_id hid_ignore_list[] = { { HID_USB_DEVICE(USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EARTHMATE) }, { HID_USB_DEVICE(USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EM_LT20) }, { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, 0x0400) }, - { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, 0x0401) }, { HID_USB_DEVICE(USB_VENDOR_ID_ESSENTIAL_REALITY, USB_DEVICE_ID_ESSENTIAL_REALITY_P5) }, { HID_USB_DEVICE(USB_VENDOR_ID_ETT, USB_DEVICE_ID_TC5UH) }, { HID_USB_DEVICE(USB_VENDOR_ID_ETT, USB_DEVICE_ID_TC4UM) }, @@ -2635,6 +2634,17 @@ bool hid_ignore(struct hid_device *hdev) strncmp(hdev->name, "www.masterkit.ru MA901", 22) == 0) return true; break; + case USB_VENDOR_ID_ELAN: + /* + * Many Elan devices have a product id of 0x0401 and are handled + * by the elan_i2c input driver. But the ACPI HID ELAN0800 dev + * is not (and cannot be) handled by that driver -> + * Ignore all 0x0401 devs except for the ELAN0800 dev. + */ + if (hdev->product == 0x0401 && + strncmp(hdev->name, "ELAN0800", 8) != 0) + return true; + break; } if (hdev->type == HID_TYPE_USBMOUSE && From 6913d1b190b99855b740f648ea70cede43e52eff Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 29 Nov 2017 20:29:07 +0100 Subject: [PATCH 461/705] Bluetooth: btsdio: Do not bind to non-removable BCM43341 commit b4cdaba274247c9c841c6a682c08fa91fb3aa549 upstream. BCM43341 devices soldered onto the PCB (non-removable) always (AFAICT) use an UART connection for bluetooth. But they also advertise btsdio support on their 3th sdio function, this causes 2 problems: 1) A non functioning BT HCI getting registered 2) Since the btsdio driver does not have suspend/resume callbacks, mmc_sdio_pre_suspend will return -ENOSYS, causing mmc_pm_notify() to react as if the SDIO-card is removed and since the slot is marked as non-removable it will never get detected as inserted again. Which results in wifi no longer working after a suspend/resume. This commit fixes both by making btsdio ignore BCM43341 devices when connected to a slot which is marked non-removable. Signed-off-by: Hans de Goede Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btsdio.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c index 1cb958e199eb..94e914a33a99 100644 --- a/drivers/bluetooth/btsdio.c +++ b/drivers/bluetooth/btsdio.c @@ -31,6 +31,7 @@ #include #include +#include #include #include @@ -291,6 +292,14 @@ static int btsdio_probe(struct sdio_func *func, tuple = tuple->next; } + /* BCM43341 devices soldered onto the PCB (non-removable) use an + * uart connection for bluetooth, ignore the BT SDIO interface. + */ + if (func->vendor == SDIO_VENDOR_ID_BROADCOM && + func->device == SDIO_DEVICE_ID_BROADCOM_43341 && + !mmc_card_is_removable(func->card->host)) + return -ENODEV; + data = devm_kzalloc(&func->dev, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; From 84bf682f53422a08086dd3f1b0db1dbda46fead3 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 20 Dec 2017 19:00:07 +0800 Subject: [PATCH 462/705] Revert "Bluetooth: btusb: fix QCA Rome suspend/resume" commit 7d06d5895c159f64c46560dc258e553ad8670fe0 upstream. This reverts commit fd865802c66bc451dc515ed89360f84376ce1a56. This commit causes a regression on some QCA ROME chips. The USB device reset happens in btusb_open(), hence firmware loading gets interrupted. Furthermore, this commit stops working after commit ("a0085f2510e8976614ad8f766b209448b385492f Bluetooth: btusb: driver to enable the usb-wakeup feature"). Reset-resume quirk only gets enabled in btusb_suspend() when it's not a wakeup source. If we really want to reset the USB device, we need to do it before btusb_open(). Let's handle it in drivers/usb/core/quirks.c. Cc: Leif Liddy Cc: Matthias Kaehlcke Cc: Brian Norris Cc: Daniel Drake Signed-off-by: Kai-Heng Feng Reviewed-by: Brian Norris Tested-by: Brian Norris Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 693028659ccc..74e677ac8e37 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2925,12 +2925,6 @@ static int btusb_probe(struct usb_interface *intf, if (id->driver_info & BTUSB_QCA_ROME) { data->setup_on_usb = btusb_setup_qca; hdev->set_bdaddr = btusb_set_bdaddr_ath3012; - - /* QCA Rome devices lose their updated firmware over suspend, - * but the USB hub doesn't notice any status change. - * Explicitly request a device reset on resume. - */ - set_bit(BTUSB_RESET_RESUME, &data->flags); } #ifdef CONFIG_BT_HCIBTUSB_RTL From 5795b076bd7f98fc80ce088f8f79bbe02e2553ca Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 8 Jan 2018 10:44:16 +0100 Subject: [PATCH 463/705] Bluetooth: btusb: Restore QCA Rome suspend/resume fix with a "rewritten" version commit 61f5acea8737d9b717fcc22bb6679924f3c82b98 upstream. Commit 7d06d5895c15 ("Revert "Bluetooth: btusb: fix QCA...suspend/resume"") removed the setting of the BTUSB_RESET_RESUME quirk for QCA Rome devices, instead favoring adding USB_QUIRK_RESET_RESUME quirks in usb/core/quirks.c. This was done because the DIY BTUSB_RESET_RESUME reset-resume handling has several issues (see the original commit message). An added advantage of moving over to the USB-core reset-resume handling is that it also disables autosuspend for these devices, which is similarly broken on these. But there are 2 issues with this approach: 1) It leaves the broken DIY BTUSB_RESET_RESUME code in place for Realtek devices. 2) Sofar only 2 of the 10 QCA devices known to the btusb code have been added to usb/core/quirks.c and if we fix the Realtek case the same way we need to add an additional 14 entries. So in essence we need to duplicate a large part of the usb_device_id table in btusb.c in usb/core/quirks.c and manually keep them in sync. This commit instead restores setting a reset-resume quirk for QCA devices in the btusb.c code, avoiding the duplicate usb_device_id table problem. This commit avoids the problems with the original DIY BTUSB_RESET_RESUME code by simply setting the USB_QUIRK_RESET_RESUME quirk directly on the usb_device. This commit also moves the BTUSB_REALTEK case over to directly setting the USB_QUIRK_RESET_RESUME on the usb_device and removes the now unused BTUSB_RESET_RESUME code. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1514836 Fixes: 7d06d5895c15 ("Revert "Bluetooth: btusb: fix QCA...suspend/resume"") Cc: Leif Liddy Cc: Matthias Kaehlcke Cc: Brian Norris Cc: Daniel Drake Cc: Kai-Heng Feng Signed-off-by: Hans de Goede Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 74e677ac8e37..3257647d4f74 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -369,8 +370,8 @@ static const struct usb_device_id blacklist_table[] = { #define BTUSB_FIRMWARE_LOADED 7 #define BTUSB_FIRMWARE_FAILED 8 #define BTUSB_BOOTING 9 -#define BTUSB_RESET_RESUME 10 -#define BTUSB_DIAG_RUNNING 11 +#define BTUSB_DIAG_RUNNING 10 +#define BTUSB_OOB_WAKE_ENABLED 11 struct btusb_data { struct hci_dev *hdev; @@ -2925,6 +2926,12 @@ static int btusb_probe(struct usb_interface *intf, if (id->driver_info & BTUSB_QCA_ROME) { data->setup_on_usb = btusb_setup_qca; hdev->set_bdaddr = btusb_set_bdaddr_ath3012; + + /* QCA Rome devices lose their updated firmware over suspend, + * but the USB hub doesn't notice any status change. + * explicitly request a device reset on resume. + */ + interface_to_usbdev(intf)->quirks |= USB_QUIRK_RESET_RESUME; } #ifdef CONFIG_BT_HCIBTUSB_RTL @@ -2935,7 +2942,7 @@ static int btusb_probe(struct usb_interface *intf, * but the USB hub doesn't notice any status change. * Explicitly request a device reset on resume. */ - set_bit(BTUSB_RESET_RESUME, &data->flags); + interface_to_usbdev(intf)->quirks |= USB_QUIRK_RESET_RESUME; } #endif @@ -3092,14 +3099,6 @@ static int btusb_suspend(struct usb_interface *intf, pm_message_t message) btusb_stop_traffic(data); usb_kill_anchored_urbs(&data->tx_anchor); - /* Optionally request a device reset on resume, but only when - * wakeups are disabled. If wakeups are enabled we assume the - * device will stay powered up throughout suspend. - */ - if (test_bit(BTUSB_RESET_RESUME, &data->flags) && - !device_may_wakeup(&data->udev->dev)) - data->udev->reset_resume = 1; - return 0; } From 498b8b7453a345460fa22a13f5011d43d311b60a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 1 Aug 2017 04:16:47 -0500 Subject: [PATCH 464/705] signal/openrisc: Fix do_unaligned_access to send the proper signal commit 500d58300571b6602341b041f97c082a461ef994 upstream. While reviewing the signal sending on openrisc the do_unaligned_access function stood out because it is obviously wrong. A comment about an si_code set above when actually si_code is never set. Leading to a random si_code being sent to userspace in the event of an unaligned access. Looking further SIGBUS BUS_ADRALN is the proper pair of signal and si_code to send for an unaligned access. That is what other architectures do and what is required by posix. Given that do_unaligned_access is broken in a way that no one can be relying on it on openrisc fix the code to just do the right thing. Fixes: 769a8a96229e ("OpenRISC: Traps") Cc: Jonas Bonn Cc: Stefan Kristiansson Cc: Stafford Horne Cc: Arnd Bergmann Cc: openrisc@lists.librecores.org Acked-by: Stafford Horne Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- arch/openrisc/kernel/traps.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index 3d3f6062f49c..605a284922fb 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -302,12 +302,12 @@ asmlinkage void do_unaligned_access(struct pt_regs *regs, unsigned long address) siginfo_t info; if (user_mode(regs)) { - /* Send a SIGSEGV */ - info.si_signo = SIGSEGV; + /* Send a SIGBUS */ + info.si_signo = SIGBUS; info.si_errno = 0; - /* info.si_code has been set above */ - info.si_addr = (void *)address; - force_sig_info(SIGSEGV, &info, current); + info.si_code = BUS_ADRALN; + info.si_addr = (void __user *)address; + force_sig_info(SIGBUS, &info, current); } else { printk("KERNEL: Unaligned Access 0x%.8lx\n", address); show_registers(regs); From 21f94109d0f17d2fb775ae0e51ef98feda2b9a47 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 24 Jul 2017 17:30:30 -0500 Subject: [PATCH 465/705] signal/sh: Ensure si_signo is initialized in do_divide_error commit 0e88bb002a9b2ee8cc3cc9478ce2dc126f849696 upstream. Set si_signo. Cc: Yoshinori Sato Cc: Rich Felker Cc: Paul Mundt Cc: linux-sh@vger.kernel.org Fixes: 0983b31849bb ("sh: Wire up division and address error exceptions on SH-2A.") Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- arch/sh/kernel/traps_32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c index ff639342a8be..c5b997757988 100644 --- a/arch/sh/kernel/traps_32.c +++ b/arch/sh/kernel/traps_32.c @@ -607,7 +607,8 @@ asmlinkage void do_divide_error(unsigned long r4) break; } - force_sig_info(SIGFPE, &info, current); + info.si_signo = SIGFPE; + force_sig_info(info.si_signo, &info, current); } #endif From 68d18e90eeec864b38e1313e0d6d10b4e1e5a1fc Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 2 Jan 2018 14:01:34 -0500 Subject: [PATCH 466/705] alpha: fix crash if pthread_create races with signal delivery commit 21ffceda1c8b3807615c40d440d7815e0c85d366 upstream. On alpha, a process will crash if it attempts to start a thread and a signal is delivered at the same time. The crash can be reproduced with this program: https://cygwin.com/ml/cygwin/2014-11/msg00473.html The reason for the crash is this: * we call the clone syscall * we go to the function copy_process * copy process calls copy_thread_tls, it is a wrapper around copy_thread * copy_thread sets the tls pointer: childti->pcb.unique = regs->r20 * copy_thread sets regs->r20 to zero * we go back to copy_process * copy process checks "if (signal_pending(current))" and returns -ERESTARTNOINTR * the clone syscall is restarted, but this time, regs->r20 is zero, so the new thread is created with zero tls pointer * the new thread crashes in start_thread when attempting to access tls The comment in the code says that setting the register r20 is some compatibility with OSF/1. But OSF/1 doesn't use the CLONE_SETTLS flag, so we don't have to zero r20 if CLONE_SETTLS is set. This patch fixes the bug by zeroing regs->r20 only if CLONE_SETTLS is not set. Signed-off-by: Mikulas Patocka Signed-off-by: Matt Turner Signed-off-by: Greg Kroah-Hartman --- arch/alpha/kernel/process.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index b483156698d5..60c17b9bf04d 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -265,12 +265,13 @@ copy_thread(unsigned long clone_flags, unsigned long usp, application calling fork. */ if (clone_flags & CLONE_SETTLS) childti->pcb.unique = regs->r20; + else + regs->r20 = 0; /* OSF/1 has some strange fork() semantics. */ childti->pcb.usp = usp ?: rdusp(); *childregs = *regs; childregs->r0 = 0; childregs->r19 = 0; childregs->r20 = 1; /* OSF/1 has some strange fork() semantics. */ - regs->r20 = 0; stack = ((struct switch_stack *) regs) - 1; *childstack = *stack; childstack->r26 = (unsigned long) ret_from_fork; From 7d22d92ca6c9cc72629eb00b576de4f0f0dc8f74 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 2 Jan 2018 13:59:54 -0500 Subject: [PATCH 467/705] alpha: fix reboot on Avanti platform commit 55fc633c41a08ce9244ff5f528f420b16b1e04d6 upstream. We need to define NEED_SRM_SAVE_RESTORE on the Avanti, otherwise we get machine check exception when attempting to reboot the machine. Signed-off-by: Mikulas Patocka Signed-off-by: Matt Turner Signed-off-by: Greg Kroah-Hartman --- arch/alpha/kernel/pci_impl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/alpha/kernel/pci_impl.h b/arch/alpha/kernel/pci_impl.h index 2b0ac429f5eb..412bb3c24f36 100644 --- a/arch/alpha/kernel/pci_impl.h +++ b/arch/alpha/kernel/pci_impl.h @@ -143,7 +143,8 @@ struct pci_iommu_arena }; #if defined(CONFIG_ALPHA_SRM) && \ - (defined(CONFIG_ALPHA_CIA) || defined(CONFIG_ALPHA_LCA)) + (defined(CONFIG_ALPHA_CIA) || defined(CONFIG_ALPHA_LCA) || \ + defined(CONFIG_ALPHA_AVANTI)) # define NEED_SRM_SAVE_RESTORE #else # undef NEED_SRM_SAVE_RESTORE From 71611b37cca4982da64d1807ab54749826628805 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 2 Jan 2018 14:00:32 -0500 Subject: [PATCH 468/705] alpha: fix formating of stack content commit 4b01abdb32fc36abe877503bfbd33019159fad71 upstream. Since version 4.9, the kernel automatically breaks printk calls into multiple newlines unless pr_cont is used. Fix the alpha stacktrace code, so that it prints stack trace in four columns, as it was initially intended. Signed-off-by: Mikulas Patocka Signed-off-by: Matt Turner Signed-off-by: Greg Kroah-Hartman --- arch/alpha/kernel/traps.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index 74aceead06e9..32ba92cdf5f6 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -158,11 +158,16 @@ void show_stack(struct task_struct *task, unsigned long *sp) for(i=0; i < kstack_depth_to_print; i++) { if (((long) stack & (THREAD_SIZE-1)) == 0) break; - if (i && ((i % 4) == 0)) - printk("\n "); - printk("%016lx ", *stack++); + if ((i % 4) == 0) { + if (i) + pr_cont("\n"); + printk(" "); + } else { + pr_cont(" "); + } + pr_cont("%016lx", *stack++); } - printk("\n"); + pr_cont("\n"); dik_show_trace(sp); } From 2d4e295284a9614726cf02b3da1bb2591ec60770 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 5 Jan 2018 14:27:58 -0800 Subject: [PATCH 469/705] xtensa: fix futex_atomic_cmpxchg_inatomic commit ca47480921587ae30417dd234a9f79af188e3666 upstream. Return 0 if the operation was successful, not the userspace memory value. Check that userspace value equals passed oldval, not itself. Don't update *uval if the value wasn't read from userspace memory. This fixes process hang due to infinite loop in futex_lock_pi. It also fixes a bunch of glibc tests nptl/tst-mutexpi*. Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/include/asm/futex.h | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/arch/xtensa/include/asm/futex.h b/arch/xtensa/include/asm/futex.h index b39531babec0..72bfc1cbc2b5 100644 --- a/arch/xtensa/include/asm/futex.h +++ b/arch/xtensa/include/asm/futex.h @@ -109,7 +109,6 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { int ret = 0; - u32 prev; if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; @@ -120,26 +119,24 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, __asm__ __volatile__ ( " # futex_atomic_cmpxchg_inatomic\n" - "1: l32i %1, %3, 0\n" - " mov %0, %5\n" - " wsr %1, scompare1\n" - "2: s32c1i %0, %3, 0\n" - "3:\n" + " wsr %5, scompare1\n" + "1: s32c1i %1, %4, 0\n" + " s32i %1, %6, 0\n" + "2:\n" " .section .fixup,\"ax\"\n" " .align 4\n" - "4: .long 3b\n" - "5: l32r %1, 4b\n" - " movi %0, %6\n" + "3: .long 2b\n" + "4: l32r %1, 3b\n" + " movi %0, %7\n" " jx %1\n" " .previous\n" " .section __ex_table,\"a\"\n" - " .long 1b,5b,2b,5b\n" + " .long 1b,4b\n" " .previous\n" - : "+r" (ret), "=&r" (prev), "+m" (*uaddr) - : "r" (uaddr), "r" (oldval), "r" (newval), "I" (-EFAULT) + : "+r" (ret), "+r" (newval), "+m" (*uaddr), "+m" (*uval) + : "r" (uaddr), "r" (oldval), "r" (uval), "I" (-EFAULT) : "memory"); - *uval = prev; return ret; } From 10ddc77ffb92152fe97341b66fcb2d735c2955be Mon Sep 17 00:00:00 2001 From: James Hogan Date: Mon, 13 Nov 2017 16:12:06 +0000 Subject: [PATCH 470/705] EDAC, octeon: Fix an uninitialized variable warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 544e92581a2ac44607d7cc602c6b54d18656f56d upstream. Fix an uninitialized variable warning in the Octeon EDAC driver, as seen in MIPS cavium_octeon_defconfig builds since v4.14 with Codescape GNU Tools 2016.05-03: drivers/edac/octeon_edac-lmc.c In function ‘octeon_lmc_edac_poll_o2’: drivers/edac/octeon_edac-lmc.c:87:24: warning: ‘((long unsigned int*)&int_reg)[1]’ may \ be used uninitialized in this function [-Wmaybe-uninitialized] if (int_reg.s.sec_err || int_reg.s.ded_err) { ^ Iinitialise the whole int_reg variable to zero before the conditional assignments in the error injection case. Signed-off-by: James Hogan Acked-by: David Daney Cc: linux-edac Cc: linux-mips@linux-mips.org Fixes: 1bc021e81565 ("EDAC: Octeon: Add error injection support") Link: http://lkml.kernel.org/r/20171113161206.20990-1-james.hogan@mips.com Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- drivers/edac/octeon_edac-lmc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/edac/octeon_edac-lmc.c b/drivers/edac/octeon_edac-lmc.c index cda6dab5067a..6b65a102b49d 100644 --- a/drivers/edac/octeon_edac-lmc.c +++ b/drivers/edac/octeon_edac-lmc.c @@ -79,6 +79,7 @@ static void octeon_lmc_edac_poll_o2(struct mem_ctl_info *mci) if (!pvt->inject) int_reg.u64 = cvmx_read_csr(CVMX_LMCX_INT(mci->mc_idx)); else { + int_reg.u64 = 0; if (pvt->error_type == 1) int_reg.s.sec_err = 1; if (pvt->error_type == 2) From 86d408d10efd1d6161f456ddba59cb2a0fb5f763 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 29 Nov 2017 16:25:44 +0300 Subject: [PATCH 471/705] pinctrl: intel: Initialize GPIO properly when used through irqchip commit f5a26acf0162477af6ee4c11b4fb9cffe5d3e257 upstream. When a GPIO is requested using gpiod_get_* APIs the intel pinctrl driver switches the pin to GPIO mode and makes sure interrupts are routed to the GPIO hardware instead of IOAPIC. However, if the GPIO is used directly through irqchip, as is the case with many I2C-HID devices where I2C core automatically configures interrupt for the device, the pin is not initialized as GPIO. Instead we rely that the BIOS configures the pin accordingly which seems not to be the case at least in Asus X540NA SKU3 with Focaltech touchpad. When the pin is not properly configured it might result weird behaviour like interrupts suddenly stop firing completely and the touchpad stops responding to user input. Fix this by properly initializing the pin to GPIO mode also when it is used directly through irqchip. Fixes: 7981c0015af2 ("pinctrl: intel: Add Intel Sunrisepoint pin controller and GPIO support") Reported-by: Daniel Drake Reported-and-tested-by: Chris Chiu Signed-off-by: Mika Westerberg Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/intel/pinctrl-intel.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index b40a074822cf..df63b7d997e8 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -368,6 +368,18 @@ static void __intel_gpio_set_direction(void __iomem *padcfg0, bool input) writel(value, padcfg0); } +static void intel_gpio_set_gpio_mode(void __iomem *padcfg0) +{ + u32 value; + + /* Put the pad into GPIO mode */ + value = readl(padcfg0) & ~PADCFG0_PMODE_MASK; + /* Disable SCI/SMI/NMI generation */ + value &= ~(PADCFG0_GPIROUTIOXAPIC | PADCFG0_GPIROUTSCI); + value &= ~(PADCFG0_GPIROUTSMI | PADCFG0_GPIROUTNMI); + writel(value, padcfg0); +} + static int intel_gpio_request_enable(struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range, unsigned pin) @@ -375,7 +387,6 @@ static int intel_gpio_request_enable(struct pinctrl_dev *pctldev, struct intel_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctldev); void __iomem *padcfg0; unsigned long flags; - u32 value; raw_spin_lock_irqsave(&pctrl->lock, flags); @@ -385,13 +396,7 @@ static int intel_gpio_request_enable(struct pinctrl_dev *pctldev, } padcfg0 = intel_get_padcfg(pctrl, pin, PADCFG0); - /* Put the pad into GPIO mode */ - value = readl(padcfg0) & ~PADCFG0_PMODE_MASK; - /* Disable SCI/SMI/NMI generation */ - value &= ~(PADCFG0_GPIROUTIOXAPIC | PADCFG0_GPIROUTSCI); - value &= ~(PADCFG0_GPIROUTSMI | PADCFG0_GPIROUTNMI); - writel(value, padcfg0); - + intel_gpio_set_gpio_mode(padcfg0); /* Disable TX buffer and enable RX (this will be input) */ __intel_gpio_set_direction(padcfg0, true); @@ -770,6 +775,8 @@ static int intel_gpio_irq_type(struct irq_data *d, unsigned type) raw_spin_lock_irqsave(&pctrl->lock, flags); + intel_gpio_set_gpio_mode(reg); + value = readl(reg); value &= ~(PADCFG0_RXEVCFG_MASK | PADCFG0_RXINV); From 944723bf84d3ff53d09f949d9a4a659c26b53ccf Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 2 Jan 2018 11:39:47 -0800 Subject: [PATCH 472/705] pktcdvd: Fix pkt_setup_dev() error path commit 5a0ec388ef0f6e33841aeb810d7fa23f049ec4cd upstream. Commit 523e1d399ce0 ("block: make gendisk hold a reference to its queue") modified add_disk() and disk_release() but did not update any of the error paths that trigger a put_disk() call after disk->queue has been assigned. That introduced the following behavior in the pktcdvd driver if pkt_new_dev() fails: Kernel BUG at 00000000e98fd882 [verbose debug info unavailable] Since disk_release() calls blk_put_queue() anyway if disk->queue != NULL, fix this by removing the blk_cleanup_queue() call from the pkt_setup_dev() error path. Fixes: commit 523e1d399ce0 ("block: make gendisk hold a reference to its queue") Signed-off-by: Bart Van Assche Cc: Tejun Heo Cc: Maciej S. Szmigiero Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/pktcdvd.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 90fa4ac149db..7e4ef0502796 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2779,7 +2779,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) pd->pkt_dev = MKDEV(pktdev_major, idx); ret = pkt_new_dev(pd, dev); if (ret) - goto out_new_dev; + goto out_mem2; /* inherit events of the host device */ disk->events = pd->bdev->bd_disk->events; @@ -2797,8 +2797,6 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) mutex_unlock(&ctl_mutex); return 0; -out_new_dev: - blk_cleanup_queue(disk->queue); out_mem2: put_disk(disk); out_mem: From 1bb09d05a41cbfcdc5cfe6ba953780e9bc63abcc Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 8 Jan 2018 14:28:50 +0100 Subject: [PATCH 473/705] clocksource/drivers/stm32: Fix kernel panic with multiple timers commit e0aeca3d8cbaea514eb98df1149faa918f9ec42d upstream. The current code hides a couple of bugs: - The global variable 'clock_event_ddata' is overwritten each time the init function is invoked. This is fixed with a kmemdup() instead of assigning the global variable. That prevents a memory corruption when several timers are defined in the DT. - The clockevent's event_handler is NULL if the time framework does not select the clockevent when registering it, this is fine but the init code generates in any case an interrupt leading to dereference this NULL pointer. The stm32 timer works with shadow registers, a mechanism to cache the registers. When a change is done in one buffered register, we need to artificially generate an event to force the timer to copy the content of the register to the shadowed register. The auto-reload register (ARR) is one of the shadowed register as well as the prescaler register (PSC), so in order to force the copy, we issue an event which in turn leads to an interrupt and the NULL dereference. This is fixed by inverting two lines where we clear the status register before enabling the update event interrupt. As this kernel crash is resulting from the combination of these two bugs, the fixes are grouped into a single patch. Tested-by: Benjamin Gaignard Signed-off-by: Daniel Lezcano Acked-by: Benjamin Gaignard Cc: Alexandre Torgue Cc: Linus Torvalds Cc: Maxime Coquelin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-11-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- drivers/clocksource/timer-stm32.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c index 1b2574c4fb97..b167cc634fae 100644 --- a/drivers/clocksource/timer-stm32.c +++ b/drivers/clocksource/timer-stm32.c @@ -16,6 +16,7 @@ #include #include #include +#include #define TIM_CR1 0x00 #define TIM_DIER 0x0c @@ -106,6 +107,10 @@ static int __init stm32_clockevent_init(struct device_node *np) unsigned long rate, max_delta; int irq, ret, bits, prescaler = 1; + data = kmemdup(&clock_event_ddata, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + clk = of_clk_get(np, 0); if (IS_ERR(clk)) { ret = PTR_ERR(clk); @@ -156,8 +161,8 @@ static int __init stm32_clockevent_init(struct device_node *np) writel_relaxed(prescaler - 1, data->base + TIM_PSC); writel_relaxed(TIM_EGR_UG, data->base + TIM_EGR); - writel_relaxed(TIM_DIER_UIE, data->base + TIM_DIER); writel_relaxed(0, data->base + TIM_SR); + writel_relaxed(TIM_DIER_UIE, data->base + TIM_DIER); data->periodic_top = DIV_ROUND_CLOSEST(rate, prescaler * HZ); @@ -184,6 +189,7 @@ err_iomap: err_clk_enable: clk_put(clk); err_clk_get: + kfree(data); return ret; } From 3f8130127c0c2439559050ff0ee2ba85af5d8693 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 6 Feb 2018 15:40:38 -0800 Subject: [PATCH 474/705] lib/ubsan.c: s/missaligned/misaligned/ commit b8fe1120b4ba342b4f156d24e952d6e686b20298 upstream. A vist from the spelling fairy. Cc: David Laight Cc: Andrey Ryabinin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- lib/ubsan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/ubsan.c b/lib/ubsan.c index fb0409df1bcf..1e2328fa002d 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -281,7 +281,7 @@ static void handle_null_ptr_deref(struct type_mismatch_data *data) ubsan_epilogue(&flags); } -static void handle_missaligned_access(struct type_mismatch_data *data, +static void handle_misaligned_access(struct type_mismatch_data *data, unsigned long ptr) { unsigned long flags; @@ -322,7 +322,7 @@ void __ubsan_handle_type_mismatch(struct type_mismatch_data *data, if (!ptr) handle_null_ptr_deref(data); else if (data->alignment && !IS_ALIGNED(ptr, data->alignment)) - handle_missaligned_access(data, ptr); + handle_misaligned_access(data, ptr); else handle_object_size_mismatch(data, ptr); } From 3c83fe52b5c1559ea8c1da23ad2c600acc9c6ab3 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Tue, 6 Feb 2018 15:40:42 -0800 Subject: [PATCH 475/705] lib/ubsan: add type mismatch handler for new GCC/Clang commit 42440c1f9911b4b7b8ba3dc4e90c1197bc561211 upstream. UBSAN=y fails to build with new GCC/clang: arch/x86/kernel/head64.o: In function `sanitize_boot_params': arch/x86/include/asm/bootparam_utils.h:37: undefined reference to `__ubsan_handle_type_mismatch_v1' because Clang and GCC 8 slightly changed ABI for 'type mismatch' errors. Compiler now uses new __ubsan_handle_type_mismatch_v1() function with slightly modified 'struct type_mismatch_data'. Let's add new 'struct type_mismatch_data_common' which is independent from compiler's layout of 'struct type_mismatch_data'. And make __ubsan_handle_type_mismatch[_v1]() functions transform compiler-dependent type mismatch data to our internal representation. This way, we can support both old and new compilers with minimal amount of change. Link: http://lkml.kernel.org/r/20180119152853.16806-1-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin Reported-by: Sodagudi Prasad Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- lib/ubsan.c | 48 ++++++++++++++++++++++++++++++++++++++---------- lib/ubsan.h | 14 ++++++++++++++ 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/lib/ubsan.c b/lib/ubsan.c index 1e2328fa002d..50d1d5c25deb 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -265,14 +265,14 @@ void __ubsan_handle_divrem_overflow(struct overflow_data *data, } EXPORT_SYMBOL(__ubsan_handle_divrem_overflow); -static void handle_null_ptr_deref(struct type_mismatch_data *data) +static void handle_null_ptr_deref(struct type_mismatch_data_common *data) { unsigned long flags; - if (suppress_report(&data->location)) + if (suppress_report(data->location)) return; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(data->location, &flags); pr_err("%s null pointer of type %s\n", type_check_kinds[data->type_check_kind], @@ -281,15 +281,15 @@ static void handle_null_ptr_deref(struct type_mismatch_data *data) ubsan_epilogue(&flags); } -static void handle_misaligned_access(struct type_mismatch_data *data, +static void handle_misaligned_access(struct type_mismatch_data_common *data, unsigned long ptr) { unsigned long flags; - if (suppress_report(&data->location)) + if (suppress_report(data->location)) return; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(data->location, &flags); pr_err("%s misaligned address %p for type %s\n", type_check_kinds[data->type_check_kind], @@ -299,15 +299,15 @@ static void handle_misaligned_access(struct type_mismatch_data *data, ubsan_epilogue(&flags); } -static void handle_object_size_mismatch(struct type_mismatch_data *data, +static void handle_object_size_mismatch(struct type_mismatch_data_common *data, unsigned long ptr) { unsigned long flags; - if (suppress_report(&data->location)) + if (suppress_report(data->location)) return; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(data->location, &flags); pr_err("%s address %p with insufficient space\n", type_check_kinds[data->type_check_kind], (void *) ptr); @@ -315,7 +315,7 @@ static void handle_object_size_mismatch(struct type_mismatch_data *data, ubsan_epilogue(&flags); } -void __ubsan_handle_type_mismatch(struct type_mismatch_data *data, +static void ubsan_type_mismatch_common(struct type_mismatch_data_common *data, unsigned long ptr) { @@ -326,8 +326,36 @@ void __ubsan_handle_type_mismatch(struct type_mismatch_data *data, else handle_object_size_mismatch(data, ptr); } + +void __ubsan_handle_type_mismatch(struct type_mismatch_data *data, + unsigned long ptr) +{ + struct type_mismatch_data_common common_data = { + .location = &data->location, + .type = data->type, + .alignment = data->alignment, + .type_check_kind = data->type_check_kind + }; + + ubsan_type_mismatch_common(&common_data, ptr); +} EXPORT_SYMBOL(__ubsan_handle_type_mismatch); +void __ubsan_handle_type_mismatch_v1(struct type_mismatch_data_v1 *data, + unsigned long ptr) +{ + + struct type_mismatch_data_common common_data = { + .location = &data->location, + .type = data->type, + .alignment = 1UL << data->log_alignment, + .type_check_kind = data->type_check_kind + }; + + ubsan_type_mismatch_common(&common_data, ptr); +} +EXPORT_SYMBOL(__ubsan_handle_type_mismatch_v1); + void __ubsan_handle_nonnull_return(struct nonnull_return_data *data) { unsigned long flags; diff --git a/lib/ubsan.h b/lib/ubsan.h index b2d18d4a53f5..d8b8085e5dac 100644 --- a/lib/ubsan.h +++ b/lib/ubsan.h @@ -36,6 +36,20 @@ struct type_mismatch_data { unsigned char type_check_kind; }; +struct type_mismatch_data_v1 { + struct source_location location; + struct type_descriptor *type; + unsigned char log_alignment; + unsigned char type_check_kind; +}; + +struct type_mismatch_data_common { + struct source_location *location; + struct type_descriptor *type; + unsigned long alignment; + unsigned char type_check_kind; +}; + struct nonnull_arg_data { struct source_location location; struct source_location attr_location; From 8fe7ceaf8a4ef194793365339e3e84f7fabdca97 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 5 Dec 2017 09:29:19 +0200 Subject: [PATCH 476/705] btrfs: Handle btrfs_set_extent_delalloc failure in fixup worker commit f3038ee3a3f1017a1cbe9907e31fa12d366c5dcb upstream. This function was introduced by 247e743cbe6e ("Btrfs: Use async helpers to deal with pages that have been improperly dirtied") and it didn't do any error handling then. This function might very well fail in ENOMEM situation, yet it's not handled, this could lead to inconsistent state. So let's handle the failure by setting the mapping error bit. Signed-off-by: Nikolay Borisov Reviewed-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 894d56361ea9..a8a1fb40e258 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2063,8 +2063,15 @@ again: goto out; } - btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state, - 0); + ret = btrfs_set_extent_delalloc(inode, page_start, page_end, + &cached_state, 0); + if (ret) { + mapping_set_error(page->mapping, ret); + end_extent_writepage(page, ret, page_start, page_end); + ClearPageChecked(page); + goto out; + } + ClearPageChecked(page); set_page_dirty(page); out: From 3169a7c06e913d3575713753f0807554b1165996 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 29 Nov 2017 19:51:37 +0200 Subject: [PATCH 477/705] drm/i915: Avoid PPS HW/SW state mismatch due to rounding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5643205c6340b565a3be0fe0e7305dc4aa551c74 upstream. We store a SW state of the t11_t12 timing in 100usec units but have to program it in 100msec as required by HW. The rounding used during programming means there will be a mismatch between the SW and HW states of this value triggering a "PPS state mismatch" error. Avoid this by storing the already rounded-up value in the SW state. Note that we still calculate panel_power_cycle_delay with the finer 100usec granularity to avoid any needless waits using that version of the delay. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103903 Cc: joks Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171129175137.2889-1-imre.deak@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_dp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 7fdc42e5aac8..74163a928cba 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -5063,6 +5063,12 @@ intel_dp_init_panel_power_sequencer(struct drm_device *dev, */ final->t8 = 1; final->t9 = 1; + + /* + * HW has only a 100msec granularity for t11_t12 so round it up + * accordingly. + */ + final->t11_t12 = roundup(final->t11_t12, 100 * 10); } static void From 623c28ee02b36a9f45780be0ded6d13ad74e2d0e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 19 Jan 2018 10:06:03 +0100 Subject: [PATCH 478/705] ACPI: sbshc: remove raw pointer from printk() message commit 43cdd1b716b26f6af16da4e145b6578f98798bf6 upstream. There's no need to be printing a raw kernel pointer to the kernel log at every boot. So just remove it, and change the whole message to use the correct dev_info() call at the same time. Reported-by: Wang Qize Signed-off-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/sbshc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/sbshc.c b/drivers/acpi/sbshc.c index 2fa8304171e0..7a3431018e0a 100644 --- a/drivers/acpi/sbshc.c +++ b/drivers/acpi/sbshc.c @@ -275,8 +275,8 @@ static int acpi_smbus_hc_add(struct acpi_device *device) device->driver_data = hc; acpi_ec_add_query_handler(hc->ec, hc->query_bit, NULL, smbus_alarm, hc); - printk(KERN_INFO PREFIX "SBS HC: EC = 0x%p, offset = 0x%0x, query_bit = 0x%0x\n", - hc->ec, hc->offset, hc->query_bit); + dev_info(&device->dev, "SBS HC: offset = 0x%0x, query_bit = 0x%0x\n", + hc->offset, hc->query_bit); return 0; } From a468a3749bb5630b8744fe2c1e41ed86f2a27f79 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Fri, 2 Feb 2018 14:00:36 -0700 Subject: [PATCH 479/705] acpi, nfit: fix register dimm error handling commit 23fbd7c70aec7600e3227eb24259fc55bf6e4881 upstream. A NULL pointer reference kernel bug was observed when acpi_nfit_add_dimm() called in acpi_nfit_register_dimms() failed. This error path does not set nfit_mem->nvdimm, but the 2nd list_for_each_entry() loop in the function assumes it's always set. Add a check to nfit_mem->nvdimm. Fixes: ba9c8dd3c222 ("acpi, nfit: add dimm device notification support") Signed-off-by: Toshi Kani Cc: "Rafael J. Wysocki" Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/nfit/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index fe03d00de22b..b1815b20a99c 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1535,6 +1535,9 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) struct kernfs_node *nfit_kernfs; nvdimm = nfit_mem->nvdimm; + if (!nvdimm) + continue; + nfit_kernfs = sysfs_get_dirent(nvdimm_kobj(nvdimm)->sd, "nfit"); if (nfit_kernfs) nfit_mem->flags_attr = sysfs_get_dirent(nfit_kernfs, From 38e3bc59e0dd2885b223dc5466304af3eba1dd4b Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 8 Nov 2017 09:39:46 +0200 Subject: [PATCH 480/705] ovl: fix failure to fsync lower dir MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d796e77f1dd541fe34481af2eee6454688d13982 upstream. As a writable mount, it is not expected for overlayfs to return EINVAL/EROFS for fsync, even if dir/file is not changed. This commit fixes the case of fsync of directory, which is easier to address, because overlayfs already implements fsync file operation for directories. The problem reported by Raphael is that new PostgreSQL 10.0 with a database in overlayfs where lower layer in squashfs fails to start. The failure is due to fsync error, when PostgreSQL does fsync on all existing db directories on startup and a specific directory exists lower layer with no changes. Reported-by: Raphael Hertzog Signed-off-by: Amir Goldstein Tested-by: Raphaël Hertzog Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/readdir.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index f241b4ee3d8a..a1be6baabca3 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -434,10 +434,14 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, struct dentry *dentry = file->f_path.dentry; struct file *realfile = od->realfile; + /* Nothing to sync for lower */ + if (!OVL_TYPE_UPPER(ovl_path_type(dentry))) + return 0; + /* * Need to check if we started out being a lower dir, but got copied up */ - if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) { + if (!od->is_upper) { struct inode *inode = file_inode(file); realfile = lockless_dereference(od->upperfile); From df113487f844b9ba36cd87ed6c68d006b3f2e519 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 1 Aug 2017 05:02:38 -0500 Subject: [PATCH 481/705] mn10300/misalignment: Use SIGSEGV SEGV_MAPERR to report a failed user copy commit 6ac1dc736b323011a55ecd1fc5897c24c4f77cbd upstream. Setting si_code to 0 is the same a setting si_code to SI_USER which is definitely not correct. With si_code set to SI_USER si_pid and si_uid will be copied to userspace instead of si_addr. Which is very wrong. So fix this by using a sensible si_code (SEGV_MAPERR) for this failure. Fixes: b920de1b77b7 ("mn10300: add the MN10300/AM33 architecture to the kernel") Cc: David Howells Cc: Masakazu Urade Cc: Koichi Yasutake Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- arch/mn10300/mm/misalignment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mn10300/mm/misalignment.c b/arch/mn10300/mm/misalignment.c index b9920b1edd5a..70cef54dc40f 100644 --- a/arch/mn10300/mm/misalignment.c +++ b/arch/mn10300/mm/misalignment.c @@ -437,7 +437,7 @@ transfer_failed: info.si_signo = SIGSEGV; info.si_errno = 0; - info.si_code = 0; + info.si_code = SEGV_MAPERR; info.si_addr = (void *) regs->pc; force_sig_info(SIGSEGV, &info, current); return; From 2de1085e8deb0339effd860c2929848301d20d3d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 5 Feb 2018 22:05:31 -0500 Subject: [PATCH 482/705] ftrace: Remove incorrect setting of glob search field commit 7b6586562708d2b3a04fe49f217ddbadbbbb0546 upstream. __unregister_ftrace_function_probe() will incorrectly parse the glob filter because it resets the search variable that was setup by filter_parse_regex(). Al Viro reported this: After that call of filter_parse_regex() we could have func_g.search not equal to glob only if glob started with '!' or '*'. In the former case we would've buggered off with -EINVAL (not = 1). In the latter we would've set func_g.search equal to glob + 1, calculated the length of that thing in func_g.len and proceeded to reset func_g.search back to glob. Suppose the glob is e.g. *foo*. We end up with func_g.type = MATCH_MIDDLE_ONLY; func_g.len = 3; func_g.search = "*foo"; Feeding that to ftrace_match_record() will not do anything sane - we will be looking for names containing "*foo" (->len is ignored for that one). Link: http://lkml.kernel.org/r/20180127031706.GE13338@ZenIV.linux.org.uk Fixes: 3ba009297149f ("ftrace: Introduce ftrace_glob structure") Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Reviewed-by: Masami Hiramatsu Reported-by: Al Viro Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ftrace.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 5b8d7189e147..2884fe01cb54 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3911,7 +3911,6 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, func_g.type = filter_parse_regex(glob, strlen(glob), &func_g.search, ¬); func_g.len = strlen(func_g.search); - func_g.search = glob; /* we do not support '!' for function probes */ if (WARN_ON(not)) From 3e598a7089eef1fe04d5b87cc154295302960e62 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 17 Feb 2018 13:21:21 +0100 Subject: [PATCH 483/705] Linux 4.9.82 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4d5753f1c37b..d338530540e0 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 81 +SUBLEVEL = 82 EXTRAVERSION = NAME = Roaring Lionus From aed3b970e68235dcf465b18f4c12a7efd09fb177 Mon Sep 17 00:00:00 2001 From: Steffen Weber Date: Tue, 2 Jan 2018 19:24:09 +0100 Subject: [PATCH 484/705] scsi: smartpqi: allow static build ("built-in") commit dc2db1dc5fb9ab3a43b305c2720fee5278dbee2a upstream. If CONFIG_SCSI_SMARTPQI=y then don't build this driver as a module. Signed-off-by: Steffen Weber Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/smartpqi/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/smartpqi/Makefile b/drivers/scsi/smartpqi/Makefile index 0f42a225a664..e6b779930230 100644 --- a/drivers/scsi/smartpqi/Makefile +++ b/drivers/scsi/smartpqi/Makefile @@ -1,3 +1,3 @@ ccflags-y += -I. -obj-m += smartpqi.o +obj-$(CONFIG_SCSI_SMARTPQI) += smartpqi.o smartpqi-objs := smartpqi_init.o smartpqi_sis.o smartpqi_sas_transport.o From d7b2a68485787a655a4e7f71def1d08289f644e6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 21 Nov 2017 12:09:38 -0500 Subject: [PATCH 485/705] drm/radeon: Add dpm quirk for Jet PRO (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 239b5f64e12b1f09f506c164dff0374924782979 upstream. Fixes stability issues. v2: clamp sclk to 600 Mhz Bug: https://bugs.freedesktop.org/show_bug.cgi?id=103370 Acked-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/si_dpm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 8bd9e6c371d1..574ab0016a57 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -3029,6 +3029,11 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, max_sclk = 75000; max_mclk = 80000; } + if ((rdev->pdev->revision == 0xC3) || + (rdev->pdev->device == 0x6665)) { + max_sclk = 60000; + max_mclk = 80000; + } } else if (rdev->family == CHIP_OLAND) { if ((rdev->pdev->revision == 0xC7) || (rdev->pdev->revision == 0x80) || From 9172bbcdef221d76976c8df9caafe8ada07c3389 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 27 Jan 2018 15:28:15 +0100 Subject: [PATCH 486/705] drm/radeon: adjust tested variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3a61b527b4e1f285d21b6e9e623dc45cf8bb391f upstream. Check the variable that was most recently initialized. The semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression x, y, f, g, e, m; statement S1,S2,S3,S4; @@ x = f(...); if (\(<+...x...+>\&e\)) S1 else S2 ( x = g(...); | m = g(...,&x,...); | y = g(...); *if (e) S3 else S4 ) // Signed-off-by: Julia Lawall Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_uvd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 0cd0e7bdee55..16239b07ce45 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -995,7 +995,7 @@ int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev, /* calc dclk divider with current vco freq */ dclk_div = radeon_uvd_calc_upll_post_div(vco_freq, dclk, pd_min, pd_even); - if (vclk_div > pd_max) + if (dclk_div > pd_max) break; /* vco is too big, it has to stop */ /* calc score with current vco freq */ From b7dc0f532107ec150ccaf381d926c07f6c3fa30d Mon Sep 17 00:00:00 2001 From: Stewart Smith Date: Tue, 2 Aug 2016 11:50:16 +1000 Subject: [PATCH 487/705] rtc-opal: Fix handling of firmware error codes, prevent busy loops commit 5b8b58063029f02da573120ef4dc9079822e3cda upstream. According to the OPAL docs: skiboot-5.2.5/doc/opal-api/opal-rtc-read-3.txt skiboot-5.2.5/doc/opal-api/opal-rtc-write-4.txt OPAL_HARDWARE may be returned from OPAL_RTC_READ or OPAL_RTC_WRITE and this indicates either a transient or permanent error. Prior to this patch, Linux was not dealing with OPAL_HARDWARE being a permanent error particularly well, in that you could end up in a busy loop. This was not too hard to trigger on an AMI BMC based OpenPOWER machine doing a continuous "ipmitool mc reset cold" to the BMC, the result of that being that we'd get stuck in an infinite loop in opal_get_rtc_time(). We now retry a few times before returning the error higher up the stack. Fixes: 16b1d26e77b1 ("rtc/tpo: Driver to support rtc and wakeup on PowerNV platform") Cc: stable@vger.kernel.org # v3.19+ Signed-off-by: Stewart Smith Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-opal.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-opal.c b/drivers/rtc/rtc-opal.c index ea20f627dabe..e4324dcf9508 100644 --- a/drivers/rtc/rtc-opal.c +++ b/drivers/rtc/rtc-opal.c @@ -58,6 +58,7 @@ static void tm_to_opal(struct rtc_time *tm, u32 *y_m_d, u64 *h_m_s_ms) static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm) { long rc = OPAL_BUSY; + int retries = 10; u32 y_m_d; u64 h_m_s_ms; __be32 __y_m_d; @@ -67,8 +68,11 @@ static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm) rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms); if (rc == OPAL_BUSY_EVENT) opal_poll_events(NULL); - else + else if (retries-- && (rc == OPAL_HARDWARE + || rc == OPAL_INTERNAL_ERROR)) msleep(10); + else if (rc != OPAL_BUSY && rc != OPAL_BUSY_EVENT) + break; } if (rc != OPAL_SUCCESS) @@ -84,6 +88,7 @@ static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm) static int opal_set_rtc_time(struct device *dev, struct rtc_time *tm) { long rc = OPAL_BUSY; + int retries = 10; u32 y_m_d = 0; u64 h_m_s_ms = 0; @@ -92,8 +97,11 @@ static int opal_set_rtc_time(struct device *dev, struct rtc_time *tm) rc = opal_rtc_write(y_m_d, h_m_s_ms); if (rc == OPAL_BUSY_EVENT) opal_poll_events(NULL); - else + else if (retries-- && (rc == OPAL_HARDWARE + || rc == OPAL_INTERNAL_ERROR)) msleep(10); + else if (rc != OPAL_BUSY && rc != OPAL_BUSY_EVENT) + break; } return rc == OPAL_SUCCESS ? 0 : -EIO; From 9cb2d0bc289e8edfdd6b5fb9962fd41b4c6e9b16 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Sun, 7 Jan 2018 16:22:35 -0500 Subject: [PATCH 488/705] mbcache: initialize entry->e_referenced in mb_cache_entry_create() commit 3876bbe27d04b848750d5310a37d6b76b593f648 upstream. KMSAN reported use of uninitialized |entry->e_referenced| in a condition in mb_cache_shrink(): ================================================================== BUG: KMSAN: use of uninitialized memory in mb_cache_shrink+0x3b4/0xc50 fs/mbcache.c:287 CPU: 2 PID: 816 Comm: kswapd1 Not tainted 4.11.0-rc5+ #2877 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x172/0x1c0 lib/dump_stack.c:52 kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:927 __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:469 mb_cache_shrink+0x3b4/0xc50 fs/mbcache.c:287 mb_cache_scan+0x67/0x80 fs/mbcache.c:321 do_shrink_slab mm/vmscan.c:397 [inline] shrink_slab+0xc3d/0x12d0 mm/vmscan.c:500 shrink_node+0x208f/0x2fd0 mm/vmscan.c:2603 kswapd_shrink_node mm/vmscan.c:3172 [inline] balance_pgdat mm/vmscan.c:3289 [inline] kswapd+0x160f/0x2850 mm/vmscan.c:3478 kthread+0x46c/0x5f0 kernel/kthread.c:230 ret_from_fork+0x29/0x40 arch/x86/entry/entry_64.S:430 chained origin: save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302 [inline] kmsan_save_stack mm/kmsan/kmsan.c:317 [inline] kmsan_internal_chain_origin+0x12a/0x1f0 mm/kmsan/kmsan.c:547 __msan_store_shadow_origin_1+0xac/0x110 mm/kmsan/kmsan_instr.c:257 mb_cache_entry_create+0x3b3/0xc60 fs/mbcache.c:95 ext4_xattr_cache_insert fs/ext4/xattr.c:1647 [inline] ext4_xattr_block_set+0x4c82/0x5530 fs/ext4/xattr.c:1022 ext4_xattr_set_handle+0x1332/0x20a0 fs/ext4/xattr.c:1252 ext4_xattr_set+0x4d2/0x680 fs/ext4/xattr.c:1306 ext4_xattr_trusted_set+0x8d/0xa0 fs/ext4/xattr_trusted.c:36 __vfs_setxattr+0x703/0x790 fs/xattr.c:149 __vfs_setxattr_noperm+0x27a/0x6f0 fs/xattr.c:180 vfs_setxattr fs/xattr.c:223 [inline] setxattr+0x6ae/0x790 fs/xattr.c:449 path_setxattr+0x1eb/0x380 fs/xattr.c:468 SYSC_lsetxattr+0x8d/0xb0 fs/xattr.c:490 SyS_lsetxattr+0x77/0xa0 fs/xattr.c:486 entry_SYSCALL_64_fastpath+0x13/0x94 origin: save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302 [inline] kmsan_internal_poison_shadow+0xb1/0x1a0 mm/kmsan/kmsan.c:198 kmsan_kmalloc+0x7f/0xe0 mm/kmsan/kmsan.c:337 kmem_cache_alloc+0x1c2/0x1e0 mm/slub.c:2766 mb_cache_entry_create+0x283/0xc60 fs/mbcache.c:86 ext4_xattr_cache_insert fs/ext4/xattr.c:1647 [inline] ext4_xattr_block_set+0x4c82/0x5530 fs/ext4/xattr.c:1022 ext4_xattr_set_handle+0x1332/0x20a0 fs/ext4/xattr.c:1252 ext4_xattr_set+0x4d2/0x680 fs/ext4/xattr.c:1306 ext4_xattr_trusted_set+0x8d/0xa0 fs/ext4/xattr_trusted.c:36 __vfs_setxattr+0x703/0x790 fs/xattr.c:149 __vfs_setxattr_noperm+0x27a/0x6f0 fs/xattr.c:180 vfs_setxattr fs/xattr.c:223 [inline] setxattr+0x6ae/0x790 fs/xattr.c:449 path_setxattr+0x1eb/0x380 fs/xattr.c:468 SYSC_lsetxattr+0x8d/0xb0 fs/xattr.c:490 SyS_lsetxattr+0x77/0xa0 fs/xattr.c:486 entry_SYSCALL_64_fastpath+0x13/0x94 ================================================================== Signed-off-by: Alexander Potapenko Signed-off-by: Eric Biggers Cc: stable@vger.kernel.org # v4.6 Signed-off-by: Greg Kroah-Hartman --- fs/mbcache.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/mbcache.c b/fs/mbcache.c index c5bd19ffa326..27e6bf6f09c6 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -93,6 +93,7 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, entry->e_key = key; entry->e_block = block; entry->e_reusable = reusable; + entry->e_referenced = 0; head = mb_cache_entry_head(cache, key); hlist_bl_lock(head); hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) { From 99a89d8fb506abaaf5f88e99cd29bb288d805c8e Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Wed, 10 Jan 2018 00:27:29 -0500 Subject: [PATCH 489/705] jbd2: fix sphinx kernel-doc build warnings commit f69120ce6c024aa634a8fc25787205e42f0ccbe6 upstream. Sphinx emits various (26) warnings when building make target 'htmldocs'. Currently struct definitions contain duplicate documentation, some as kernel-docs and some as standard c89 comments. We can reduce duplication while cleaning up the kernel docs. Move all kernel-docs to right above each struct member. Use the set of all existing comments (kernel-doc and c89). Add documentation for missing struct members and function arguments. Signed-off-by: Tobin C. Harding Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/jbd2/transaction.c | 5 +- include/linux/jbd2.h | 431 ++++++++++++++++++++++++++---------------- 2 files changed, 272 insertions(+), 164 deletions(-) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 5e659ee08d6a..4e5c6103b76c 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -488,8 +488,10 @@ void jbd2_journal_free_reserved(handle_t *handle) EXPORT_SYMBOL(jbd2_journal_free_reserved); /** - * int jbd2_journal_start_reserved(handle_t *handle) - start reserved handle + * int jbd2_journal_start_reserved() - start reserved handle * @handle: handle to start + * @type: for handle statistics + * @line_no: for handle statistics * * Start handle that has been previously reserved with jbd2_journal_reserve(). * This attaches @handle to the running transaction (or creates one if there's @@ -619,6 +621,7 @@ error_out: * int jbd2_journal_restart() - restart a handle . * @handle: handle to restart * @nblocks: nr credits requested + * @gfp_mask: memory allocation flags (for start_this_handle) * * Restart a handle for a multi-transaction filesystem * operation. diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index dfaa1f4dcb0c..d073470cb342 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -418,26 +418,41 @@ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) #define JI_WAIT_DATA (1 << __JI_WAIT_DATA) /** - * struct jbd_inode is the structure linking inodes in ordered mode - * present in a transaction so that we can sync them during commit. + * struct jbd_inode - The jbd_inode type is the structure linking inodes in + * ordered mode present in a transaction so that we can sync them during commit. */ struct jbd2_inode { - /* Which transaction does this inode belong to? Either the running - * transaction or the committing one. [j_list_lock] */ + /** + * @i_transaction: + * + * Which transaction does this inode belong to? Either the running + * transaction or the committing one. [j_list_lock] + */ transaction_t *i_transaction; - /* Pointer to the running transaction modifying inode's data in case - * there is already a committing transaction touching it. [j_list_lock] */ + /** + * @i_next_transaction: + * + * Pointer to the running transaction modifying inode's data in case + * there is already a committing transaction touching it. [j_list_lock] + */ transaction_t *i_next_transaction; - /* List of inodes in the i_transaction [j_list_lock] */ + /** + * @i_list: List of inodes in the i_transaction [j_list_lock] + */ struct list_head i_list; - /* VFS inode this inode belongs to [constant during the lifetime - * of the structure] */ + /** + * @i_vfs_inode: + * + * VFS inode this inode belongs to [constant for lifetime of structure] + */ struct inode *i_vfs_inode; - /* Flags of inode [j_list_lock] */ + /** + * @i_flags: Flags of inode [j_list_lock] + */ unsigned long i_flags; }; @@ -447,12 +462,20 @@ struct jbd2_revoke_table_s; * struct handle_s - The handle_s type is the concrete type associated with * handle_t. * @h_transaction: Which compound transaction is this update a part of? + * @h_journal: Which journal handle belongs to - used iff h_reserved set. + * @h_rsv_handle: Handle reserved for finishing the logical operation. * @h_buffer_credits: Number of remaining buffers we are allowed to dirty. - * @h_ref: Reference count on this handle - * @h_err: Field for caller's use to track errors through large fs operations - * @h_sync: flag for sync-on-close - * @h_jdata: flag to force data journaling - * @h_aborted: flag indicating fatal error on handle + * @h_ref: Reference count on this handle. + * @h_err: Field for caller's use to track errors through large fs operations. + * @h_sync: Flag for sync-on-close. + * @h_jdata: Flag to force data journaling. + * @h_reserved: Flag for handle for reserved credits. + * @h_aborted: Flag indicating fatal error on handle. + * @h_type: For handle statistics. + * @h_line_no: For handle statistics. + * @h_start_jiffies: Handle Start time. + * @h_requested_credits: Holds @h_buffer_credits after handle is started. + * @saved_alloc_context: Saved context while transaction is open. **/ /* Docbook can't yet cope with the bit fields, but will leave the documentation @@ -462,32 +485,23 @@ struct jbd2_revoke_table_s; struct jbd2_journal_handle { union { - /* Which compound transaction is this update a part of? */ transaction_t *h_transaction; /* Which journal handle belongs to - used iff h_reserved set */ journal_t *h_journal; }; - /* Handle reserved for finishing the logical operation */ handle_t *h_rsv_handle; - - /* Number of remaining buffers we are allowed to dirty: */ int h_buffer_credits; - - /* Reference count on this handle */ int h_ref; - - /* Field for caller's use to track errors through large fs */ - /* operations */ int h_err; /* Flags [no locking] */ - unsigned int h_sync: 1; /* sync-on-close */ - unsigned int h_jdata: 1; /* force data journaling */ - unsigned int h_reserved: 1; /* handle with reserved credits */ - unsigned int h_aborted: 1; /* fatal error on handle */ - unsigned int h_type: 8; /* for handle statistics */ - unsigned int h_line_no: 16; /* for handle statistics */ + unsigned int h_sync: 1; + unsigned int h_jdata: 1; + unsigned int h_reserved: 1; + unsigned int h_aborted: 1; + unsigned int h_type: 8; + unsigned int h_line_no: 16; unsigned long h_start_jiffies; unsigned int h_requested_credits; @@ -727,228 +741,253 @@ jbd2_time_diff(unsigned long start, unsigned long end) /** * struct journal_s - The journal_s type is the concrete type associated with * journal_t. - * @j_flags: General journaling state flags - * @j_errno: Is there an outstanding uncleared error on the journal (from a - * prior abort)? - * @j_sb_buffer: First part of superblock buffer - * @j_superblock: Second part of superblock buffer - * @j_format_version: Version of the superblock format - * @j_state_lock: Protect the various scalars in the journal - * @j_barrier_count: Number of processes waiting to create a barrier lock - * @j_barrier: The barrier lock itself - * @j_running_transaction: The current running transaction.. - * @j_committing_transaction: the transaction we are pushing to disk - * @j_checkpoint_transactions: a linked circular list of all transactions - * waiting for checkpointing - * @j_wait_transaction_locked: Wait queue for waiting for a locked transaction - * to start committing, or for a barrier lock to be released - * @j_wait_done_commit: Wait queue for waiting for commit to complete - * @j_wait_commit: Wait queue to trigger commit - * @j_wait_updates: Wait queue to wait for updates to complete - * @j_wait_reserved: Wait queue to wait for reserved buffer credits to drop - * @j_checkpoint_mutex: Mutex for locking against concurrent checkpoints - * @j_head: Journal head - identifies the first unused block in the journal - * @j_tail: Journal tail - identifies the oldest still-used block in the - * journal. - * @j_free: Journal free - how many free blocks are there in the journal? - * @j_first: The block number of the first usable block - * @j_last: The block number one beyond the last usable block - * @j_dev: Device where we store the journal - * @j_blocksize: blocksize for the location where we store the journal. - * @j_blk_offset: starting block offset for into the device where we store the - * journal - * @j_fs_dev: Device which holds the client fs. For internal journal this will - * be equal to j_dev - * @j_reserved_credits: Number of buffers reserved from the running transaction - * @j_maxlen: Total maximum capacity of the journal region on disk. - * @j_list_lock: Protects the buffer lists and internal buffer state. - * @j_inode: Optional inode where we store the journal. If present, all journal - * block numbers are mapped into this inode via bmap(). - * @j_tail_sequence: Sequence number of the oldest transaction in the log - * @j_transaction_sequence: Sequence number of the next transaction to grant - * @j_commit_sequence: Sequence number of the most recently committed - * transaction - * @j_commit_request: Sequence number of the most recent transaction wanting - * commit - * @j_uuid: Uuid of client object. - * @j_task: Pointer to the current commit thread for this journal - * @j_max_transaction_buffers: Maximum number of metadata buffers to allow in a - * single compound commit transaction - * @j_commit_interval: What is the maximum transaction lifetime before we begin - * a commit? - * @j_commit_timer: The timer used to wakeup the commit thread - * @j_revoke_lock: Protect the revoke table - * @j_revoke: The revoke table - maintains the list of revoked blocks in the - * current transaction. - * @j_revoke_table: alternate revoke tables for j_revoke - * @j_wbuf: array of buffer_heads for jbd2_journal_commit_transaction - * @j_wbufsize: maximum number of buffer_heads allowed in j_wbuf, the - * number that will fit in j_blocksize - * @j_last_sync_writer: most recent pid which did a synchronous write - * @j_history_lock: Protect the transactions statistics history - * @j_proc_entry: procfs entry for the jbd statistics directory - * @j_stats: Overall statistics - * @j_private: An opaque pointer to fs-private information. - * @j_trans_commit_map: Lockdep entity to track transaction commit dependencies */ - struct journal_s { - /* General journaling state flags [j_state_lock] */ + /** + * @j_flags: General journaling state flags [j_state_lock] + */ unsigned long j_flags; - /* + /** + * @j_errno: + * * Is there an outstanding uncleared error on the journal (from a prior * abort)? [j_state_lock] */ int j_errno; - /* The superblock buffer */ + /** + * @j_sb_buffer: The first part of the superblock buffer. + */ struct buffer_head *j_sb_buffer; + + /** + * @j_superblock: The second part of the superblock buffer. + */ journal_superblock_t *j_superblock; - /* Version of the superblock format */ + /** + * @j_format_version: Version of the superblock format. + */ int j_format_version; - /* - * Protect the various scalars in the journal + /** + * @j_state_lock: Protect the various scalars in the journal. */ rwlock_t j_state_lock; - /* + /** + * @j_barrier_count: + * * Number of processes waiting to create a barrier lock [j_state_lock] */ int j_barrier_count; - /* The barrier lock itself */ + /** + * @j_barrier: The barrier lock itself. + */ struct mutex j_barrier; - /* + /** + * @j_running_transaction: + * * Transactions: The current running transaction... * [j_state_lock] [caller holding open handle] */ transaction_t *j_running_transaction; - /* + /** + * @j_committing_transaction: + * * the transaction we are pushing to disk * [j_state_lock] [caller holding open handle] */ transaction_t *j_committing_transaction; - /* + /** + * @j_checkpoint_transactions: + * * ... and a linked circular list of all transactions waiting for * checkpointing. [j_list_lock] */ transaction_t *j_checkpoint_transactions; - /* + /** + * @j_wait_transaction_locked: + * * Wait queue for waiting for a locked transaction to start committing, - * or for a barrier lock to be released + * or for a barrier lock to be released. */ wait_queue_head_t j_wait_transaction_locked; - /* Wait queue for waiting for commit to complete */ + /** + * @j_wait_done_commit: Wait queue for waiting for commit to complete. + */ wait_queue_head_t j_wait_done_commit; - /* Wait queue to trigger commit */ + /** + * @j_wait_commit: Wait queue to trigger commit. + */ wait_queue_head_t j_wait_commit; - /* Wait queue to wait for updates to complete */ + /** + * @j_wait_updates: Wait queue to wait for updates to complete. + */ wait_queue_head_t j_wait_updates; - /* Wait queue to wait for reserved buffer credits to drop */ + /** + * @j_wait_reserved: + * + * Wait queue to wait for reserved buffer credits to drop. + */ wait_queue_head_t j_wait_reserved; - /* Semaphore for locking against concurrent checkpoints */ + /** + * @j_checkpoint_mutex: + * + * Semaphore for locking against concurrent checkpoints. + */ struct mutex j_checkpoint_mutex; - /* + /** + * @j_chkpt_bhs: + * * List of buffer heads used by the checkpoint routine. This * was moved from jbd2_log_do_checkpoint() to reduce stack * usage. Access to this array is controlled by the - * j_checkpoint_mutex. [j_checkpoint_mutex] + * @j_checkpoint_mutex. [j_checkpoint_mutex] */ struct buffer_head *j_chkpt_bhs[JBD2_NR_BATCH]; - - /* + + /** + * @j_head: + * * Journal head: identifies the first unused block in the journal. * [j_state_lock] */ unsigned long j_head; - /* + /** + * @j_tail: + * * Journal tail: identifies the oldest still-used block in the journal. * [j_state_lock] */ unsigned long j_tail; - /* + /** + * @j_free: + * * Journal free: how many free blocks are there in the journal? * [j_state_lock] */ unsigned long j_free; - /* - * Journal start and end: the block numbers of the first usable block - * and one beyond the last usable block in the journal. [j_state_lock] + /** + * @j_first: + * + * The block number of the first usable block in the journal + * [j_state_lock]. */ unsigned long j_first; + + /** + * @j_last: + * + * The block number one beyond the last usable block in the journal + * [j_state_lock]. + */ unsigned long j_last; - /* - * Device, blocksize and starting block offset for the location where we - * store the journal. + /** + * @j_dev: Device where we store the journal. */ struct block_device *j_dev; + + /** + * @j_blocksize: Block size for the location where we store the journal. + */ int j_blocksize; + + /** + * @j_blk_offset: + * + * Starting block offset into the device where we store the journal. + */ unsigned long long j_blk_offset; + + /** + * @j_devname: Journal device name. + */ char j_devname[BDEVNAME_SIZE+24]; - /* + /** + * @j_fs_dev: + * * Device which holds the client fs. For internal journal this will be * equal to j_dev. */ struct block_device *j_fs_dev; - /* Total maximum capacity of the journal region on disk. */ + /** + * @j_maxlen: Total maximum capacity of the journal region on disk. + */ unsigned int j_maxlen; - /* Number of buffers reserved from the running transaction */ + /** + * @j_reserved_credits: + * + * Number of buffers reserved from the running transaction. + */ atomic_t j_reserved_credits; - /* - * Protects the buffer lists and internal buffer state. + /** + * @j_list_lock: Protects the buffer lists and internal buffer state. */ spinlock_t j_list_lock; - /* Optional inode where we store the journal. If present, all */ - /* journal block numbers are mapped into this inode via */ - /* bmap(). */ + /** + * @j_inode: + * + * Optional inode where we store the journal. If present, all + * journal block numbers are mapped into this inode via bmap(). + */ struct inode *j_inode; - /* + /** + * @j_tail_sequence: + * * Sequence number of the oldest transaction in the log [j_state_lock] */ tid_t j_tail_sequence; - /* + /** + * @j_transaction_sequence: + * * Sequence number of the next transaction to grant [j_state_lock] */ tid_t j_transaction_sequence; - /* + /** + * @j_commit_sequence: + * * Sequence number of the most recently committed transaction * [j_state_lock]. */ tid_t j_commit_sequence; - /* + /** + * @j_commit_request: + * * Sequence number of the most recent transaction wanting commit * [j_state_lock] */ tid_t j_commit_request; - /* + /** + * @j_uuid: + * * Journal uuid: identifies the object (filesystem, LVM volume etc) * backed by this journal. This will eventually be replaced by an array * of uuids, allowing us to index multiple devices within a single @@ -956,85 +995,151 @@ struct journal_s */ __u8 j_uuid[16]; - /* Pointer to the current commit thread for this journal */ + /** + * @j_task: Pointer to the current commit thread for this journal. + */ struct task_struct *j_task; - /* + /** + * @j_max_transaction_buffers: + * * Maximum number of metadata buffers to allow in a single compound - * commit transaction + * commit transaction. */ int j_max_transaction_buffers; - /* + /** + * @j_commit_interval: + * * What is the maximum transaction lifetime before we begin a commit? */ unsigned long j_commit_interval; - /* The timer used to wakeup the commit thread: */ + /** + * @j_commit_timer: The timer used to wakeup the commit thread. + */ struct timer_list j_commit_timer; - /* - * The revoke table: maintains the list of revoked blocks in the - * current transaction. [j_revoke_lock] + /** + * @j_revoke_lock: Protect the revoke table. */ spinlock_t j_revoke_lock; + + /** + * @j_revoke: + * + * The revoke table - maintains the list of revoked blocks in the + * current transaction. + */ struct jbd2_revoke_table_s *j_revoke; + + /** + * @j_revoke_table: Alternate revoke tables for j_revoke. + */ struct jbd2_revoke_table_s *j_revoke_table[2]; - /* - * array of bhs for jbd2_journal_commit_transaction + /** + * @j_wbuf: Array of bhs for jbd2_journal_commit_transaction. */ struct buffer_head **j_wbuf; + + /** + * @j_wbufsize: + * + * Size of @j_wbuf array. + */ int j_wbufsize; - /* - * this is the pid of hte last person to run a synchronous operation - * through the journal + /** + * @j_last_sync_writer: + * + * The pid of the last person to run a synchronous operation + * through the journal. */ pid_t j_last_sync_writer; - /* - * the average amount of time in nanoseconds it takes to commit a + /** + * @j_average_commit_time: + * + * The average amount of time in nanoseconds it takes to commit a * transaction to disk. [j_state_lock] */ u64 j_average_commit_time; - /* - * minimum and maximum times that we should wait for - * additional filesystem operations to get batched into a - * synchronous handle in microseconds + /** + * @j_min_batch_time: + * + * Minimum time that we should wait for additional filesystem operations + * to get batched into a synchronous handle in microseconds. */ u32 j_min_batch_time; + + /** + * @j_max_batch_time: + * + * Maximum time that we should wait for additional filesystem operations + * to get batched into a synchronous handle in microseconds. + */ u32 j_max_batch_time; - /* This function is called when a transaction is closed */ + /** + * @j_commit_callback: + * + * This function is called when a transaction is closed. + */ void (*j_commit_callback)(journal_t *, transaction_t *); /* * Journal statistics */ + + /** + * @j_history_lock: Protect the transactions statistics history. + */ spinlock_t j_history_lock; + + /** + * @j_proc_entry: procfs entry for the jbd statistics directory. + */ struct proc_dir_entry *j_proc_entry; + + /** + * @j_stats: Overall statistics. + */ struct transaction_stats_s j_stats; - /* Failed journal commit ID */ + /** + * @j_failed_commit: Failed journal commit ID. + */ unsigned int j_failed_commit; - /* + /** + * @j_private: + * * An opaque pointer to fs-private information. ext3 puts its - * superblock pointer here + * superblock pointer here. */ void *j_private; - /* Reference to checksum algorithm driver via cryptoapi */ + /** + * @j_chksum_driver: + * + * Reference to checksum algorithm driver via cryptoapi. + */ struct crypto_shash *j_chksum_driver; - /* Precomputed journal UUID checksum for seeding other checksums */ + /** + * @j_csum_seed: + * + * Precomputed journal UUID checksum for seeding other checksums. + */ __u32 j_csum_seed; #ifdef CONFIG_DEBUG_LOCK_ALLOC - /* + /** + * @j_trans_commit_map: + * * Lockdep entity to track transaction commit dependencies. Handles * hold this "lock" for read, when we wait for commit, we acquire the * "lock" for writing. This matches the properties of jbd2 journalling From 539deabfccb0a5d7e17daa432b385ffe608e0ccb Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Wed, 10 Jan 2018 00:13:13 -0500 Subject: [PATCH 490/705] ext4: fix a race in the ext4 shutdown path commit abbc3f9395c76d554a9ed27d4b1ebfb5d9b0e4ca upstream. This patch fixes a race between the shutdown path and bio completion handling. In the ext4 direct io path with async io, after submitting a bio to the block layer, if journal starting fails, ext4_direct_IO_write() would bail out pretending that the IO failed. The caller would have had no way of knowing whether or not the IO was successfully submitted. So instead, we return -EIOCBQUEUED in this case. Now, the caller knows that the IO was submitted. The bio completion handler takes care of the error. Tested: Ran the shutdown xfstest test 461 in loop for over 2 hours across 4 machines resulting in over 400 runs. Verified that the race didn't occur. Usually the race was seen in about 20-30 iterations. Signed-off-by: Harshad Shirwadkar Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ec28e8ebb984..5cccec68a0a5 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3526,10 +3526,18 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter) /* Credits for sb + inode write */ handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); if (IS_ERR(handle)) { - /* This is really bad luck. We've written the data - * but cannot extend i_size. Bail out and pretend - * the write failed... */ - ret = PTR_ERR(handle); + /* + * We wrote the data but cannot extend + * i_size. Bail out. In async io case, we do + * not return error here because we have + * already submmitted the corresponding + * bio. Returning error here makes the caller + * think that this IO is done and failed + * resulting in race with bio's completion + * handler. + */ + if (!ret) + ret = PTR_ERR(handle); if (inode->i_nlink) ext4_orphan_del(NULL, inode); From 4a36f437bed1305495196274b08cf5b8d9b4a76d Mon Sep 17 00:00:00 2001 From: Zhouyi Zhou Date: Wed, 10 Jan 2018 00:34:19 -0500 Subject: [PATCH 491/705] ext4: save error to disk in __ext4_grp_locked_error() commit 06f29cc81f0350261f59643a505010531130eea0 upstream. In the function __ext4_grp_locked_error(), __save_error_info() is called to save error info in super block block, but does not sync that information to disk to info the subsequence fsck after reboot. This patch writes the error information to disk. After this patch, I think there is no obvious EXT4 error handle branches which leads to "Remounting filesystem read-only" will leave the disk partition miss the subsequence fsck. Signed-off-by: Zhouyi Zhou Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 1f581791b39d..1ec4b6e34747 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -720,6 +720,7 @@ __acquires(bitlock) } ext4_unlock_group(sb, grp); + ext4_commit_super(sb, 1); ext4_handle_error(sb); /* * We only get here in the ERRORS_RO case; relocking the group From f0b59c20145b4d8a0739960b2ad2b292fa365ef7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ernesto=20A=2E=20Fern=C3=A1ndez?= Date: Thu, 11 Jan 2018 13:43:33 -0500 Subject: [PATCH 492/705] ext4: correct documentation for grpid mount option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9f0372488cc9243018a812e8cfbf27de650b187b upstream. The grpid option is currently described as being the same as nogrpid. Signed-off-by: Ernesto A. Fernández Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- Documentation/filesystems/ext4.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 6c0108eb0137..2139ea253142 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -233,7 +233,7 @@ data_err=ignore(*) Just print an error message if an error occurs data_err=abort Abort the journal if an error occurs in a file data buffer in ordered mode. -grpid Give objects the same group ID as their creator. +grpid New objects have the group ID of their parent. bsdgroups nogrpid (*) New objects have the group ID of their creator. From 7318454518ad56ad38b60dbb3d185fd14d0d0b93 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 16 Feb 2018 16:25:53 +0100 Subject: [PATCH 493/705] mm: hide a #warning for COMPILE_TEST commit af27d9403f5b80685b79c88425086edccecaf711 upstream. We get a warning about some slow configurations in randconfig kernels: mm/memory.c:83:2: error: #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid. [-Werror=cpp] The warning is reasonable by itself, but gets in the way of randconfig build testing, so I'm hiding it whenever CONFIG_COMPILE_TEST is set. The warning was added in 2013 in commit 75980e97dacc ("mm: fold page->_last_nid into page->flags where possible"). Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index 1aa63e7dd790..e2e68767a373 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -75,7 +75,7 @@ #include "internal.h" -#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS +#if defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS) && !defined(CONFIG_COMPILE_TEST) #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid. #endif From cbd0c0fc54e2fa27d05779cb984fe9c989652be8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= Date: Fri, 19 Jan 2018 16:27:54 -0800 Subject: [PATCH 494/705] mm: Fix memory size alignment in devm_memremap_pages_release() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 10a0cd6e4932b5078215b1ec2c896597eec0eff9 upstream. The functions devm_memremap_pages() and devm_memremap_pages_release() use different ways to calculate the section-aligned amount of memory. The latter function may use an incorrect size if the memory region is small but straddles a section border. Use the same code for both. Cc: Fixes: 5f29a77cd957 ("mm: fix mixed zone detection in devm_memremap_pages") Signed-off-by: Jan H. Schönherr Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- kernel/memremap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/memremap.c b/kernel/memremap.c index 06123234f118..426547a21a0c 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -245,7 +245,8 @@ static void devm_memremap_pages_release(struct device *dev, void *data) /* pages are dead and unused, undo the arch mapping */ align_start = res->start & ~(SECTION_SIZE - 1); - align_size = ALIGN(resource_size(res), SECTION_SIZE); + align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) + - align_start; lock_device_hotplug(); mem_hotplug_begin(); From 3d95c4faf85e3c5b67cda113d13bf701a40ee1d0 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Wed, 17 Jan 2018 19:56:38 +0100 Subject: [PATCH 495/705] MIPS: Fix typo BIG_ENDIAN to CPU_BIG_ENDIAN commit 2e6522c565522a2e18409c315c49d78c8b74807b upstream. MIPS_GENERIC selects some options conditional on BIG_ENDIAN which does not exist. Replace BIG_ENDIAN with CPU_BIG_ENDIAN which is the correct kconfig name. Note that BMIPS_GENERIC does the same which confirms that this patch is needed. Fixes: eed0eabd12ef0 ("MIPS: generic: Introduce generic DT-based board support") Signed-off-by: Corentin Labbe Reviewed-by: James Hogan Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: # 4.9+ Patchwork: https://patchwork.linux-mips.org/patch/18495/ [jhogan@kernel.org: Clean up commit message] Signed-off-by: James Hogan Signed-off-by: Greg Kroah-Hartman --- arch/mips/Kconfig | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 5e844f68e847..2d2fd79ced9d 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -112,12 +112,12 @@ config MIPS_GENERIC select SYS_SUPPORTS_MULTITHREADING select SYS_SUPPORTS_RELOCATABLE select SYS_SUPPORTS_SMARTMIPS - select USB_EHCI_BIG_ENDIAN_DESC if BIG_ENDIAN - select USB_EHCI_BIG_ENDIAN_MMIO if BIG_ENDIAN - select USB_OHCI_BIG_ENDIAN_DESC if BIG_ENDIAN - select USB_OHCI_BIG_ENDIAN_MMIO if BIG_ENDIAN - select USB_UHCI_BIG_ENDIAN_DESC if BIG_ENDIAN - select USB_UHCI_BIG_ENDIAN_MMIO if BIG_ENDIAN + select USB_EHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN + select USB_EHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN + select USB_OHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN + select USB_OHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN + select USB_UHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN + select USB_UHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN select USE_OF help Select this to build a kernel which aims to support multiple boards, From e1afa7bb38d5379f29fa015e9402e946a226afe8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 17 Nov 2017 14:38:31 +0100 Subject: [PATCH 496/705] PCI: keystone: Fix interrupt-controller-node lookup commit eac56aa3bc8af3d9b9850345d0f2da9d83529134 upstream. Fix child-node lookup during initialisation which was using the wrong OF-helper and ended up searching the whole device tree depth-first starting at the parent rather than just matching on its children. To make things worse, the parent pci node could end up being prematurely freed as of_find_node_by_name() drops a reference to its first argument. Any matching child interrupt-controller node was also leaked. Fixes: 0c4ffcfe1fbc ("PCI: keystone: Add TI Keystone PCIe driver") Cc: stable # 3.18 Acked-by: Murali Karicheri Signed-off-by: Johan Hovold [lorenzo.pieralisi@arm.com: updated commit subject] Signed-off-by: Lorenzo Pieralisi Signed-off-by: Greg Kroah-Hartman --- drivers/pci/host/pci-keystone.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/pci/host/pci-keystone.c b/drivers/pci/host/pci-keystone.c index 043c19a05da1..eac0a1238e9d 100644 --- a/drivers/pci/host/pci-keystone.c +++ b/drivers/pci/host/pci-keystone.c @@ -181,7 +181,7 @@ static int ks_pcie_get_irq_controller_info(struct keystone_pcie *ks_pcie, } /* interrupt controller is in a child node */ - *np_temp = of_find_node_by_name(np_pcie, controller); + *np_temp = of_get_child_by_name(np_pcie, controller); if (!(*np_temp)) { dev_err(dev, "Node for %s is absent\n", controller); return -EINVAL; @@ -190,6 +190,7 @@ static int ks_pcie_get_irq_controller_info(struct keystone_pcie *ks_pcie, temp = of_irq_count(*np_temp); if (!temp) { dev_err(dev, "No IRQ entries in %s\n", controller); + of_node_put(*np_temp); return -EINVAL; } @@ -207,6 +208,8 @@ static int ks_pcie_get_irq_controller_info(struct keystone_pcie *ks_pcie, break; } + of_node_put(*np_temp); + if (temp) { *num_irqs = temp; return 0; From dca0dc604aa37fda4681227250522dd9abe47298 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 29 Dec 2017 19:48:43 +0100 Subject: [PATCH 497/705] video: fbdev: atmel_lcdfb: fix display-timings lookup commit 9cb18db0701f6b74f0c45c23ad767b3ebebe37f6 upstream. Fix child-node lookup during probe, which ended up searching the whole device tree depth-first starting at the parent rather than just matching on its children. To make things worse, the parent display node was also prematurely freed. Note that the display and timings node references are never put after a successful dt-initialisation so the nodes would leak on later probe deferrals and on driver unbind. Fixes: b985172b328a ("video: atmel_lcdfb: add device tree suport") Cc: stable # 3.13 Cc: Jean-Christophe PLAGNIOL-VILLARD Cc: Nicolas Ferre Cc: Alexandre Belloni Signed-off-by: Johan Hovold Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/atmel_lcdfb.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/atmel_lcdfb.c b/drivers/video/fbdev/atmel_lcdfb.c index 669ecc755fa9..8f439fd58db6 100644 --- a/drivers/video/fbdev/atmel_lcdfb.c +++ b/drivers/video/fbdev/atmel_lcdfb.c @@ -1119,7 +1119,7 @@ static int atmel_lcdfb_of_init(struct atmel_lcdfb_info *sinfo) goto put_display_node; } - timings_np = of_find_node_by_name(display_np, "display-timings"); + timings_np = of_get_child_by_name(display_np, "display-timings"); if (!timings_np) { dev_err(dev, "failed to find display-timings node\n"); ret = -ENODEV; @@ -1140,6 +1140,12 @@ static int atmel_lcdfb_of_init(struct atmel_lcdfb_info *sinfo) fb_add_videomode(&fb_vm, &info->modelist); } + /* + * FIXME: Make sure we are not referencing any fields in display_np + * and timings_np and drop our references to them before returning to + * avoid leaking the nodes on probe deferral and driver unbind. + */ + return 0; put_timings_node: From 81c1ef9a0fcf696f443346ff2315e698a1d2b69e Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 15 Jan 2018 17:04:22 +0100 Subject: [PATCH 498/705] console/dummy: leave .con_font_get set to NULL commit 724ba8b30b044aa0d94b1cd374fc15806cdd6f18 upstream. When this method is set, the caller expects struct console_font fields to be properly initialized when it returns. Leave it unset otherwise nonsensical (leaked kernel stack) values are returned to user space. Signed-off-by: Nicolas Pitre Cc: stable@vger.kernel.org Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Greg Kroah-Hartman --- drivers/video/console/dummycon.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/video/console/dummycon.c b/drivers/video/console/dummycon.c index 9269d5685239..b90ef96e43d6 100644 --- a/drivers/video/console/dummycon.c +++ b/drivers/video/console/dummycon.c @@ -67,7 +67,6 @@ const struct consw dummy_con = { .con_switch = DUMMY, .con_blank = DUMMY, .con_font_set = DUMMY, - .con_font_get = DUMMY, .con_font_default = DUMMY, .con_font_copy = DUMMY, }; From 28130f4d2340a87ea4b84e55f43016ebf86b9a2e Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 5 Feb 2018 12:38:11 -0600 Subject: [PATCH 499/705] rtlwifi: rtl8821ae: Fix connection lost problem correctly commit c713fb071edc0efc01a955f65a006b0e1795d2eb upstream. There has been a coding error in rtl8821ae since it was first introduced, namely that an 8-bit register was read using a 16-bit read in _rtl8821ae_dbi_read(). This error was fixed with commit 40b368af4b75 ("rtlwifi: Fix alignment issues"); however, this change led to instability in the connection. To restore stability, this change was reverted in commit b8b8b16352cd ("rtlwifi: rtl8821ae: Fix connection lost problem"). Unfortunately, the unaligned access causes machine checks in ARM architecture, and we were finally forced to find the actual cause of the problem on x86 platforms. Following a suggestion from Pkshih , it was found that increasing the ASPM L1 latency from 0 to 7 fixed the instability. This parameter was varied to see if a smaller value would work; however, it appears that 7 is the safest value. A new symbol is defined for this quantity, thus it can be easily changed if necessary. Fixes: b8b8b16352cd ("rtlwifi: rtl8821ae: Fix connection lost problem") Cc: Stable # 4.14+ Fix-suggested-by: Pkshih Signed-off-by: Larry Finger Tested-by: James Cameron # x86_64 OLPC NL3 Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c | 5 +++-- drivers/net/wireless/realtek/rtlwifi/wifi.h | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c index 82d53895ce4d..0c3fe177fd14 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c @@ -1128,7 +1128,7 @@ static u8 _rtl8821ae_dbi_read(struct rtl_priv *rtlpriv, u16 addr) } if (0 == tmp) { read_addr = REG_DBI_RDATA + addr % 4; - ret = rtl_read_word(rtlpriv, read_addr); + ret = rtl_read_byte(rtlpriv, read_addr); } return ret; } @@ -1170,7 +1170,8 @@ static void _rtl8821ae_enable_aspm_back_door(struct ieee80211_hw *hw) } tmp = _rtl8821ae_dbi_read(rtlpriv, 0x70f); - _rtl8821ae_dbi_write(rtlpriv, 0x70f, tmp | BIT(7)); + _rtl8821ae_dbi_write(rtlpriv, 0x70f, tmp | BIT(7) | + ASPM_L1_LATENCY << 3); tmp = _rtl8821ae_dbi_read(rtlpriv, 0x719); _rtl8821ae_dbi_write(rtlpriv, 0x719, tmp | BIT(3) | BIT(4)); diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h index dafe486f8448..340e7b324ef8 100644 --- a/drivers/net/wireless/realtek/rtlwifi/wifi.h +++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h @@ -99,6 +99,7 @@ #define RTL_USB_MAX_RX_COUNT 100 #define QBSS_LOAD_SIZE 5 #define MAX_WMMELE_LENGTH 64 +#define ASPM_L1_LATENCY 7 #define TOTAL_CAM_ENTRY 32 From fffc0fcaebebefd06b07838e4aad5ef5f30803fd Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Wed, 13 Dec 2017 18:22:30 +0100 Subject: [PATCH 500/705] target/iscsi: avoid NULL dereference in CHAP auth error path commit ce512d79d0466a604793addb6b769d12ee326822 upstream. If chap_server_compute_md5() fails early, e.g. via CHAP_N mismatch, then crypto_free_shash() is called with a NULL pointer which gets dereferenced in crypto_shash_tfm(). Fixes: 69110e3cedbb ("iscsi-target: Use shash and ahash") Suggested-by: Markus Elfring Signed-off-by: David Disseldorp Cc: stable@vger.kernel.org # 4.6+ Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target_auth.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c index e116f0e845c0..98f75e5811c8 100644 --- a/drivers/target/iscsi/iscsi_target_auth.c +++ b/drivers/target/iscsi/iscsi_target_auth.c @@ -413,7 +413,8 @@ static int chap_server_compute_md5( auth_ret = 0; out: kzfree(desc); - crypto_free_shash(tfm); + if (tfm) + crypto_free_shash(tfm); kfree(challenge); kfree(challenge_binhex); return auth_ret; From ecd72fd604b1f3aa3ec03962e4676760a8e1a1b4 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 25 Jan 2018 11:02:50 -0700 Subject: [PATCH 501/705] Btrfs: fix deadlock in run_delalloc_nocow commit e89166990f11c3f21e1649d760dd35f9e410321c upstream. @cur_offset is not set back to what it should be (@cow_start) if btrfs_next_leaf() returns something wrong, and the range [cow_start, cur_offset) remains locked forever. cc: Signed-off-by: Liu Bo Reviewed-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a8a1fb40e258..c3d413494779 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1320,8 +1320,11 @@ next_slot: leaf = path->nodes[0]; if (path->slots[0] >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(root, path); - if (ret < 0) + if (ret < 0) { + if (cow_start != (u64)-1) + cur_offset = cow_start; goto error; + } if (ret > 0) break; leaf = path->nodes[0]; From 0f4adc1468fe5495e25e6da520d438f947f6cd7e Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 25 Jan 2018 11:02:51 -0700 Subject: [PATCH 502/705] Btrfs: fix crash due to not cleaning up tree log block's dirty bits commit 1846430c24d66e85cc58286b3319c82cd54debb2 upstream. In cases that the whole fs flips into readonly status due to failures in critical sections, then log tree's blocks are still dirty, and this leads to a crash during umount time, the crash is about use-after-free, umount -> close_ctree -> stop workers -> iput(btree_inode) -> iput_final -> write_inode_now -> ... -> queue job on stop'd workers cc: v3.12+ Fixes: 681ae50917df ("Btrfs: cleanup reserved space when freeing tree log on error") Signed-off-by: Liu Bo Reviewed-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 309313b71617..f547d80b5bf1 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2463,6 +2463,9 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); + } else { + if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags)) + clear_extent_buffer_dirty(next); } WARN_ON(root_owner != @@ -2542,6 +2545,9 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); + } else { + if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags)) + clear_extent_buffer_dirty(next); } WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); @@ -2618,6 +2624,9 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, clean_tree_block(trans, log->fs_info, next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); + } else { + if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags)) + clear_extent_buffer_dirty(next); } WARN_ON(log->root_key.objectid != From bc0d431e74adb5967fe39bc07aad68a6a865612d Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 25 Jan 2018 11:02:52 -0700 Subject: [PATCH 503/705] Btrfs: fix extent state leak from tree log commit 55237a5f2431a72435e3ed39e4306e973c0446b7 upstream. It's possible that btrfs_sync_log() bails out after one of the two btrfs_write_marked_extents() which convert extent state's state bit into EXTENT_NEED_WAIT from EXTENT_DIRTY/EXTENT_NEW, however only EXTENT_DIRTY and EXTENT_NEW are searched by free_log_tree() so that those extent states with EXTENT_NEED_WAIT lead to memory leak. cc: Signed-off-by: Liu Bo Reviewed-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index f547d80b5bf1..1dca7d272348 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3013,13 +3013,14 @@ static void free_log_tree(struct btrfs_trans_handle *trans, while (1) { ret = find_first_extent_bit(&log->dirty_log_pages, - 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW, + 0, &start, &end, + EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT, NULL); if (ret) break; clear_extent_bits(&log->dirty_log_pages, start, end, - EXTENT_DIRTY | EXTENT_NEW); + EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT); } /* From b48edd6d7658a9fdb05c162f1f099245e7cd22e5 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 25 Jan 2018 11:02:53 -0700 Subject: [PATCH 504/705] Btrfs: fix btrfs_evict_inode to handle abnormal inodes correctly commit e8f1bc1493855e32b7a2a019decc3c353d94daf6 upstream. This regression is introduced in commit 3d48d9810de4 ("btrfs: Handle uninitialised inode eviction"). There are two problems, a) it is ->destroy_inode() that does the final free on inode, not ->evict_inode(), b) clear_inode() must be called before ->evict_inode() returns. This could end up hitting BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); in evict() because I_CLEAR is set in clear_inode(). Fixes: commit 3d48d9810de4 ("btrfs: Handle uninitialised inode eviction") Cc: # v4.7-rc6+ Signed-off-by: Liu Bo Reviewed-by: Nikolay Borisov Reviewed-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c3d413494779..d196ce4be31c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5229,7 +5229,7 @@ void btrfs_evict_inode(struct inode *inode) trace_btrfs_inode_evict(inode); if (!root) { - kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); + clear_inode(inode); return; } From efb1cbc229f122bc1d1b5e85bdd2da971c953f2e Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 25 Jan 2018 11:02:56 -0700 Subject: [PATCH 505/705] Btrfs: fix unexpected -EEXIST when creating new inode commit 900c9981680067573671ecc5cbfa7c5770be3a40 upstream. The highest objectid, which is assigned to new inode, is decided at the time of initializing fs roots. However, in cases where log replay gets processed, the btree which fs root owns might be changed, so we have to search it again for the highest objectid, otherwise creating new inode would end up with -EEXIST. cc: v4.4-rc6+ Fixes: f32e48e92596 ("Btrfs: Initialize btrfs_root->highest_objectid when loading tree root and subvolume roots") Signed-off-by: Liu Bo Reviewed-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1dca7d272348..5539f0b95efa 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -28,6 +28,7 @@ #include "hash.h" #include "compression.h" #include "qgroup.h" +#include "inode-map.h" /* magic values for the inode_only field in btrfs_log_inode: * @@ -5661,6 +5662,23 @@ again: path); } + if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) { + struct btrfs_root *root = wc.replay_dest; + + btrfs_release_path(path); + + /* + * We have just replayed everything, and the highest + * objectid of fs roots probably has changed in case + * some inode_item's got replayed. + * + * root->objectid_mutex is not acquired as log replay + * could only happen during mount. + */ + ret = btrfs_find_highest_objectid(root, + &root->highest_objectid); + } + key.offset = found_key.offset - 1; wc.replay_dest->log_root = NULL; free_extent_buffer(log->node); From 39e0a6bd07182815062c74fc73ce0ea94578ce00 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Mon, 22 Jan 2018 22:02:05 +0100 Subject: [PATCH 506/705] 9p/trans_virtio: discard zero-length reply commit 26d99834f89e76514076d9cd06f61e56e6a509b8 upstream. When a 9p request is successfully flushed, the server is expected to just mark it as used without sending a 9p reply (ie, without writing data into the buffer). In this case, virtqueue_get_buf() will return len == 0 and we must not report a REQ_STATUS_RCVD status to the client, otherwise the client will erroneously assume the request has not been flushed. Cc: stable@vger.kernel.org Signed-off-by: Greg Kurz Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- net/9p/trans_virtio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index f3a4efcf1456..3aa5a93ad107 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -160,7 +160,8 @@ static void req_done(struct virtqueue *vq) spin_unlock_irqrestore(&chan->lock, flags); /* Wakeup if anyone waiting for VirtIO ring space. */ wake_up(chan->vc_wq); - p9_client_cb(chan->client, req, REQ_STATUS_RCVD); + if (len) + p9_client_cb(chan->client, req, REQ_STATUS_RCVD); } } From 921f860ade863666b8b4be9ec5d0347bb8891ea4 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Fri, 9 Feb 2018 13:21:42 +0100 Subject: [PATCH 507/705] mtd: nand: vf610: set correct ooblayout commit ea56fb282368ea08c2a313af6b55cb597aec4db1 upstream. With commit 3cf32d180227 ("mtd: nand: vf610: switch to mtd_ooblayout_ops") the driver started to use the NAND cores default large page ooblayout. However, shortly after commit 6a623e076944 ("mtd: nand: add ooblayout for old hamming layout") changed the default layout to the old hamming layout, which is not what vf610_nfc is using. Specify the default large page layout explicitly. Fixes: 6a623e076944 ("mtd: nand: add ooblayout for old hamming layout") Cc: # v4.12+ Signed-off-by: Stefan Agner Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/vf610_nfc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/nand/vf610_nfc.c b/drivers/mtd/nand/vf610_nfc.c index 3ad514c44dcb..ddc629e3f63a 100644 --- a/drivers/mtd/nand/vf610_nfc.c +++ b/drivers/mtd/nand/vf610_nfc.c @@ -752,10 +752,8 @@ static int vf610_nfc_probe(struct platform_device *pdev) if (mtd->oobsize > 64) mtd->oobsize = 64; - /* - * mtd->ecclayout is not specified here because we're using the - * default large page ECC layout defined in NAND core. - */ + /* Use default large page ECC layout defined in NAND core */ + mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops); if (chip->ecc.strength == 32) { nfc->ecc_mode = ECC_60_BYTE; chip->ecc.bytes = 60; From 3dc694f4a436ec5f8017161ff8e8a8e49856014d Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Mon, 29 Jan 2018 14:23:15 +0800 Subject: [PATCH 508/705] ALSA: hda - Fix headset mic detection problem for two Dell machines commit 3f2f7c553d077be6a30cb96b2976a2c940bf5335 upstream. One of them has the codec of alc256 and the other one has the codec of alc289. Cc: Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 71a058fcf884..bed98b2b6a30 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5993,6 +5993,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x12, 0xb7a60130}, {0x14, 0x90170110}, {0x21, 0x02211020}), + SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x12, 0x90a60130}, + {0x14, 0x90170110}, + {0x14, 0x01011020}, + {0x21, 0x0221101f}), SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, ALC256_STANDARD_PINS), SND_HDA_PIN_QUIRK(0x10ec0280, 0x103c, "HP", ALC280_FIXUP_HP_GPIO4, @@ -6049,6 +6054,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x12, 0x90a60120}, {0x14, 0x90170110}, {0x21, 0x0321101f}), + SND_HDA_PIN_QUIRK(0x10ec0289, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x12, 0xb7a60130}, + {0x14, 0x90170110}, + {0x21, 0x04211020}), SND_HDA_PIN_QUIRK(0x10ec0290, 0x103c, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1, ALC290_STANDARD_PINS, {0x15, 0x04211040}, From 31cb8df3f8ea21d1d33d371b160fddd903888e19 Mon Sep 17 00:00:00 2001 From: Kirill Marinushkin Date: Mon, 29 Jan 2018 06:37:55 +0100 Subject: [PATCH 509/705] ALSA: usb-audio: Fix UAC2 get_ctl request with a RANGE attribute commit 447cae58cecd69392b74a4a42cd0ab9cabd816af upstream. The layout of the UAC2 Control request and response varies depending on the request type. With the current implementation, only the Layout 2 Parameter Block (with the 2-byte sized RANGE attribute) is handled properly. For the Control requests with the 1-byte sized RANGE attribute (Bass Control, Mid Control, Tremble Control), the response is parsed incorrectly. This commit: * fixes the wLength field value in the request * fixes parsing the range values from the response Fixes: 23caaf19b11e ("ALSA: usb-mixer: Add support for Audio Class v2.0") Signed-off-by: Kirill Marinushkin Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 08015c139116..dedf8eb4570e 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -344,17 +344,20 @@ static int get_ctl_value_v2(struct usb_mixer_elem_info *cval, int request, int validx, int *value_ret) { struct snd_usb_audio *chip = cval->head.mixer->chip; - unsigned char buf[4 + 3 * sizeof(__u32)]; /* enough space for one range */ + /* enough space for one range */ + unsigned char buf[sizeof(__u16) + 3 * sizeof(__u32)]; unsigned char *val; - int idx = 0, ret, size; + int idx = 0, ret, val_size, size; __u8 bRequest; + val_size = uac2_ctl_value_size(cval->val_type); + if (request == UAC_GET_CUR) { bRequest = UAC2_CS_CUR; - size = uac2_ctl_value_size(cval->val_type); + size = val_size; } else { bRequest = UAC2_CS_RANGE; - size = sizeof(buf); + size = sizeof(__u16) + 3 * val_size; } memset(buf, 0, sizeof(buf)); @@ -387,16 +390,17 @@ error: val = buf + sizeof(__u16); break; case UAC_GET_MAX: - val = buf + sizeof(__u16) * 2; + val = buf + sizeof(__u16) + val_size; break; case UAC_GET_RES: - val = buf + sizeof(__u16) * 3; + val = buf + sizeof(__u16) + val_size * 2; break; default: return -EINVAL; } - *value_ret = convert_signed_value(cval, snd_usb_combine_bytes(val, sizeof(__u16))); + *value_ret = convert_signed_value(cval, + snd_usb_combine_bytes(val, val_size)); return 0; } From d8fff0e75a0b229c42360a22866768e1b863e079 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 2 Feb 2018 15:26:46 +0800 Subject: [PATCH 510/705] ALSA: hda/realtek - Enable Thinkpad Dock device for ALC298 platform commit 61fcf8ece9b6b09450250c4ca40cc3b81a96a68d upstream. Thinkpad Dock device support for ALC298 platform. It need to use SSID for the quirk table. Because IdeaPad also has ALC298 platform. Use verb for the quirk table will confuse. Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 42 +++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index bed98b2b6a30..8cd7840b3dab 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4455,6 +4455,28 @@ static void alc_fixup_tpt440_dock(struct hda_codec *codec, } } +static void alc_fixup_tpt470_dock(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + static const struct hda_pintbl pincfgs[] = { + { 0x17, 0x21211010 }, /* dock headphone */ + { 0x19, 0x21a11010 }, /* dock mic */ + { } + }; + struct alc_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP; + /* Enable DOCK device */ + snd_hda_codec_write(codec, 0x17, 0, + AC_VERB_SET_CONFIG_DEFAULT_BYTES_3, 0); + /* Enable DOCK device */ + snd_hda_codec_write(codec, 0x19, 0, + AC_VERB_SET_CONFIG_DEFAULT_BYTES_3, 0); + snd_hda_apply_pincfgs(codec, pincfgs); + } +} + static void alc_shutup_dell_xps13(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; @@ -4877,6 +4899,7 @@ enum { ALC292_FIXUP_TPT460, ALC298_FIXUP_SPK_VOLUME, ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER, + ALC298_FIXUP_TPT470_DOCK, }; static const struct hda_fixup alc269_fixups[] = { @@ -5568,6 +5591,12 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE }, + [ALC298_FIXUP_TPT470_DOCK] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_tpt470_dock, + .chained = true, + .chain_id = ALC293_FIXUP_LENOVO_SPK_NOISE + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -5729,8 +5758,16 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x2218, "Thinkpad X1 Carbon 2nd", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2223, "ThinkPad T550", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2226, "ThinkPad X250", ALC292_FIXUP_TPT440_DOCK), + SND_PCI_QUIRK(0x17aa, 0x222d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), + SND_PCI_QUIRK(0x17aa, 0x222e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x2231, "Thinkpad T560", ALC292_FIXUP_TPT460), SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC292_FIXUP_TPT460), + SND_PCI_QUIRK(0x17aa, 0x2245, "Thinkpad T470", ALC298_FIXUP_TPT470_DOCK), + SND_PCI_QUIRK(0x17aa, 0x2246, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), + SND_PCI_QUIRK(0x17aa, 0x2247, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), + SND_PCI_QUIRK(0x17aa, 0x224b, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), + SND_PCI_QUIRK(0x17aa, 0x224c, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), + SND_PCI_QUIRK(0x17aa, 0x224d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x3112, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), @@ -5749,7 +5786,12 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x5050, "Thinkpad T560p", ALC292_FIXUP_TPT460), SND_PCI_QUIRK(0x17aa, 0x5051, "Thinkpad L460", ALC292_FIXUP_TPT460), SND_PCI_QUIRK(0x17aa, 0x5053, "Thinkpad T460", ALC292_FIXUP_TPT460), + SND_PCI_QUIRK(0x17aa, 0x505d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), + SND_PCI_QUIRK(0x17aa, 0x505f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), + SND_PCI_QUIRK(0x17aa, 0x5062, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x17aa, 0x511e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), + SND_PCI_QUIRK(0x17aa, 0x511f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K), SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD), SND_PCI_QUIRK(0x1b7d, 0xa831, "Ordissimo EVE2 ", ALC269VB_FIXUP_ORDISSIMO_EVE2), /* Also known as Malata PC-B1303 */ From bb1a422bd6e93fa0899e0e2ea1ce488ac651d0b4 Mon Sep 17 00:00:00 2001 From: Jan-Marek Glogowski Date: Wed, 14 Feb 2018 11:29:15 +0100 Subject: [PATCH 511/705] ALSA: hda/realtek: PCI quirk for Fujitsu U7x7 commit fdcc968a3b290407bcba9d4c90e2fba6d8d928f1 upstream. These laptops have a combined jack to attach headsets, the U727 on the left, the U757 on the right, but a headsets microphone doesn't work. Using hdajacksensetest I found that pin 0x19 changed the present state when plugging the headset, in addition to 0x21, but didn't have the correct configuration (shown as "Not connected"). So this sets the configuration to the same values as the headphone pin 0x21 except for the device type microphone, which makes it work correctly. With the patch the configured pins for U727 are Pin 0x12 (Internal Mic, Mobile-In): present = No Pin 0x14 (Internal Speaker): present = No Pin 0x19 (Black Mic, Left side): present = No Pin 0x1d (Internal Aux): present = No Pin 0x21 (Black Headphone, Left side): present = No Signed-off-by: Jan-Marek Glogowski Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 8cd7840b3dab..89c166b97e81 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3130,6 +3130,19 @@ static void alc269_fixup_pincfg_no_hp_to_lineout(struct hda_codec *codec, spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP; } +static void alc269_fixup_pincfg_U7x7_headset_mic(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + unsigned int cfg_headphone = snd_hda_codec_get_pincfg(codec, 0x21); + unsigned int cfg_headset_mic = snd_hda_codec_get_pincfg(codec, 0x19); + + if (cfg_headphone && cfg_headset_mic == 0x411111f0) + snd_hda_codec_set_pincfg(codec, 0x19, + (cfg_headphone & ~AC_DEFCFG_DEVICE) | + (AC_JACK_MIC_IN << AC_DEFCFG_DEVICE_SHIFT)); +} + static void alc269_fixup_hweq(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -4819,6 +4832,7 @@ enum { ALC269_FIXUP_LIFEBOOK_EXTMIC, ALC269_FIXUP_LIFEBOOK_HP_PIN, ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT, + ALC255_FIXUP_LIFEBOOK_U7x7_HEADSET_MIC, ALC269_FIXUP_AMIC, ALC269_FIXUP_DMIC, ALC269VB_FIXUP_AMIC, @@ -5010,6 +5024,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc269_fixup_pincfg_no_hp_to_lineout, }, + [ALC255_FIXUP_LIFEBOOK_U7x7_HEADSET_MIC] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc269_fixup_pincfg_U7x7_headset_mic, + }, [ALC269_FIXUP_AMIC] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -5733,6 +5751,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x10cf, 0x159f, "Lifebook E780", ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT), SND_PCI_QUIRK(0x10cf, 0x15dc, "Lifebook T731", ALC269_FIXUP_LIFEBOOK_HP_PIN), SND_PCI_QUIRK(0x10cf, 0x1757, "Lifebook E752", ALC269_FIXUP_LIFEBOOK_HP_PIN), + SND_PCI_QUIRK(0x10cf, 0x1629, "Lifebook U7x7", ALC255_FIXUP_LIFEBOOK_U7x7_HEADSET_MIC), SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC), SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), From 344c9ac65ea678fafaff5074361dead24a038c27 Mon Sep 17 00:00:00 2001 From: Lassi Ylikojola Date: Fri, 9 Feb 2018 16:51:36 +0200 Subject: [PATCH 512/705] ALSA: usb-audio: add implicit fb quirk for Behringer UFX1204 commit 5e35dc0338d85ccebacf3f77eca1e5dea73155e8 upstream. Add quirk to ensure a sync endpoint is properly configured. This patch is a fix for same symptoms on Behringer UFX1204 as patch from Albertto Aquirre on Dec 8 2016 for Axe-Fx II. Signed-off-by: Lassi Ylikojola Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/pcm.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index cf8459a6fad8..c5dfe82beb24 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -352,6 +352,15 @@ static int set_sync_ep_implicit_fb_quirk(struct snd_usb_substream *subs, ep = 0x86; iface = usb_ifnum_to_if(dev, 2); + if (!iface || iface->num_altsetting == 0) + return -EINVAL; + + alts = &iface->altsetting[1]; + goto add_sync_ep; + case USB_ID(0x1397, 0x0002): + ep = 0x81; + iface = usb_ifnum_to_if(dev, 1); + if (!iface || iface->num_altsetting == 0) return -EINVAL; From 869182f45e38e4c62722b20a5c6f4bc48b2e60c3 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 12 Feb 2018 15:20:51 +0100 Subject: [PATCH 513/705] ALSA: seq: Fix racy pool initializations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d15d662e89fc667b90cd294b0eb45694e33144da upstream. ALSA sequencer core initializes the event pool on demand by invoking snd_seq_pool_init() when the first write happens and the pool is empty. Meanwhile user can reset the pool size manually via ioctl concurrently, and this may lead to UAF or out-of-bound accesses since the function tries to vmalloc / vfree the buffer. A simple fix is to just wrap the snd_seq_pool_init() call with the recently introduced client->ioctl_mutex; as the calls for snd_seq_pool_init() from other side are always protected with this mutex, we can avoid the race. Reported-by: 范龙飞 Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/seq/seq_clientmgr.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 16580a82e1c8..0b408617b2c9 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -999,7 +999,7 @@ static ssize_t snd_seq_write(struct file *file, const char __user *buf, { struct snd_seq_client *client = file->private_data; int written = 0, len; - int err = -EINVAL; + int err; struct snd_seq_event event; if (!(snd_seq_file_flags(file) & SNDRV_SEQ_LFLG_OUTPUT)) @@ -1014,11 +1014,15 @@ static ssize_t snd_seq_write(struct file *file, const char __user *buf, /* allocate the pool now if the pool is not allocated yet */ if (client->pool->size > 0 && !snd_seq_write_pool_allocated(client)) { - if (snd_seq_pool_init(client->pool) < 0) + mutex_lock(&client->ioctl_mutex); + err = snd_seq_pool_init(client->pool); + mutex_unlock(&client->ioctl_mutex); + if (err < 0) return -ENOMEM; } /* only process whole events */ + err = -EINVAL; while (count >= sizeof(struct snd_seq_event)) { /* Read in the event header from the user */ len = sizeof(event); From c9aca68ee52c86ce7b5037b76808dd0519edd110 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 11 Feb 2018 18:10:28 -0500 Subject: [PATCH 514/705] mvpp2: fix multicast address filter commit 7ac8ff95f48cbfa609a060fd6a1e361dd62feeb3 upstream. IPv6 doesn't work on the MacchiatoBIN board. It is caused by broken multicast address filter in the mvpp2 driver. The driver loads doesn't load any multicast entries if "allmulti" is not set. This condition should be reversed. The condition !netdev_mc_empty(dev) is useless (because netdev_for_each_mc_addr is nop if the list is empty). This patch also fixes a possible overflow of the multicast list - if mvpp2_prs_mac_da_accept fails, we set the allmulti flag and retry. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvpp2.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index ed6fae964ec5..7e2ebfc565ee 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -5657,6 +5657,7 @@ static void mvpp2_set_rx_mode(struct net_device *dev) int id = port->id; bool allmulti = dev->flags & IFF_ALLMULTI; +retry: mvpp2_prs_mac_promisc_set(priv, id, dev->flags & IFF_PROMISC); mvpp2_prs_mac_multi_set(priv, id, MVPP2_PE_MAC_MC_ALL, allmulti); mvpp2_prs_mac_multi_set(priv, id, MVPP2_PE_MAC_MC_IP6, allmulti); @@ -5664,9 +5665,13 @@ static void mvpp2_set_rx_mode(struct net_device *dev) /* Remove all port->id's mcast enries */ mvpp2_prs_mcast_del_all(priv, id); - if (allmulti && !netdev_mc_empty(dev)) { - netdev_for_each_mc_addr(ha, dev) - mvpp2_prs_mac_da_accept(priv, id, ha->addr, true); + if (!allmulti) { + netdev_for_each_mc_addr(ha, dev) { + if (mvpp2_prs_mac_da_accept(priv, id, ha->addr, true)) { + allmulti = true; + goto retry; + } + } } } From b2a6141782cd9934e8810e1b73b13e5252811e0c Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 31 Jan 2018 22:24:45 +0000 Subject: [PATCH 515/705] usb: Move USB_UHCI_BIG_ENDIAN_* out of USB_SUPPORT commit ec897569ad7dbc6d595873a487c3fac23f463f76 upstream. Move the Kconfig symbols USB_UHCI_BIG_ENDIAN_MMIO and USB_UHCI_BIG_ENDIAN_DESC out of drivers/usb/host/Kconfig, which is conditional upon USB && USB_SUPPORT, so that it can be freely selected by platform Kconfig symbols in architecture code. For example once the MIPS_GENERIC platform selects are fixed in commit 2e6522c56552 ("MIPS: Fix typo BIG_ENDIAN to CPU_BIG_ENDIAN"), the MIPS 32r6_defconfig warns like so: warning: (MIPS_GENERIC) selects USB_UHCI_BIG_ENDIAN_MMIO which has unmet direct dependencies (USB_SUPPORT && USB) warning: (MIPS_GENERIC) selects USB_UHCI_BIG_ENDIAN_DESC which has unmet direct dependencies (USB_SUPPORT && USB) Fixes: 2e6522c56552 ("MIPS: Fix typo BIG_ENDIAN to CPU_BIG_ENDIAN") Signed-off-by: James Hogan Cc: Corentin Labbe Cc: Ralf Baechle Cc: Paul Burton Cc: linux-usb@vger.kernel.org Cc: linux-mips@linux-mips.org Acked-by: Greg Kroah-Hartman Patchwork: https://patchwork.linux-mips.org/patch/18559/ Signed-off-by: Greg Kroah-Hartman --- drivers/usb/Kconfig | 8 ++++++++ drivers/usb/host/Kconfig | 8 -------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig index 644e978cbd3e..0103f777b97a 100644 --- a/drivers/usb/Kconfig +++ b/drivers/usb/Kconfig @@ -19,6 +19,14 @@ config USB_EHCI_BIG_ENDIAN_MMIO config USB_EHCI_BIG_ENDIAN_DESC bool +config USB_UHCI_BIG_ENDIAN_MMIO + bool + default y if SPARC_LEON + +config USB_UHCI_BIG_ENDIAN_DESC + bool + default y if SPARC_LEON + menuconfig USB_SUPPORT bool "USB support" depends on HAS_IOMEM diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig index eb121b2a55d4..0e7cc71b34a9 100644 --- a/drivers/usb/host/Kconfig +++ b/drivers/usb/host/Kconfig @@ -628,14 +628,6 @@ config USB_UHCI_PLATFORM bool default y if ARCH_VT8500 -config USB_UHCI_BIG_ENDIAN_MMIO - bool - default y if SPARC_LEON - -config USB_UHCI_BIG_ENDIAN_DESC - bool - default y if SPARC_LEON - config USB_FHCI_HCD tristate "Freescale QE USB Host Controller support" depends on OF_GPIO && QE_GPIO && QUICC_ENGINE From e6701adbbabbb078bf332bf99bd05eae7a4c64ac Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 15 Feb 2018 20:00:15 +1100 Subject: [PATCH 516/705] dm: correctly handle chained bios in dec_pending() commit 8dd601fa8317243be887458c49f6c29c2f3d719f upstream. dec_pending() is given an error status (possibly 0) to be recorded against a bio. It can be called several times on the one 'struct dm_io', and it is careful to only assign a non-zero error to io->status. However when it then assigned io->status to bio->bi_status, it is not careful and could overwrite a genuine error status with 0. This can happen when chained bios are in use. If a bio is chained beneath the bio that this dm_io is handling, the child bio might complete and set bio->bi_status before the dm_io completes. This has been possible since chained bios were introduced in 3.14, and has become a lot easier to trigger with commit 18a25da84354 ("dm: ensure bio submission follows a depth-first tree walk") as that commit caused dm to start using chained bios itself. A particular failure mode is that if a bio spans an 'error' target and a working target, the 'error' fragment will complete instantly and set the ->bi_status, and the other fragment will normally complete a little later, and will clear ->bi_status. The fix is simply to only assign io_error to bio->bi_status when io_error is not zero. Reported-and-tested-by: Milan Broz Cc: stable@vger.kernel.org (v3.14+) Signed-off-by: NeilBrown Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index c5522551122f..2ffe7db75acb 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -809,7 +809,8 @@ static void dec_pending(struct dm_io *io, int error) } else { /* done with normal IO or empty flush */ trace_block_bio_complete(md->queue, bio, io_error); - bio->bi_error = io_error; + if (io_error) + bio->bi_error = io_error; bio_endio(bio); } } From e41b3b8972ff3d2a2b6233dbd852241988d1356d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: [PATCH 517/705] powerpc: fix build errors in stable tree This is just the first chunk of commit 222f20f140623ef6033491d0103ee0875fe87d35 upstream. to fix a build error in the powerpc tree due to other backports happening (and this full patch not being backported). Reported-by: Guenter Roeck Reported-by: Yves-Alexis Perez Cc: Nicholas Piggin Cc: Michael Ellerman Cc: Yves-Alexis Perez Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/entry_64.S | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index c33b69d10919..9121b9a35c8a 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -39,6 +39,11 @@ #include #include #include +#ifdef CONFIG_PPC_BOOK3S +#include +#else +#include +#endif /* * System calls. From 5c1c0b9267dd288aa7fa0356590111a89cfd6f90 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Tue, 14 Nov 2017 04:34:52 -0800 Subject: [PATCH 518/705] IB/qib: Fix comparison error with qperf compare/swap test commit 87b3524cb5058fdc7c2afdb92bdb2e079661ddc4 upstream. This failure exists with qib: ver_rc_compare_swap: mismatch, sequence 2, expected 123456789abcdef, got 0 The request builder was using the incorrect inlines to build the request header resulting in incorrect data in the atomic header. Fix by using the appropriate inlines to create the request. Fixes: 261a4351844b ("IB/qib,IB/hfi: Use core common header file") Reviewed-by: Michael J. Ruhl Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/qib/qib_rc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index c1523f9a3c12..e4d4f5c44afe 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -443,13 +443,13 @@ no_flow_control: qp->s_state = OP(COMPARE_SWAP); put_ib_ateth_swap(wqe->atomic_wr.swap, &ohdr->u.atomic_eth); - put_ib_ateth_swap(wqe->atomic_wr.compare_add, - &ohdr->u.atomic_eth); + put_ib_ateth_compare(wqe->atomic_wr.compare_add, + &ohdr->u.atomic_eth); } else { qp->s_state = OP(FETCH_ADD); put_ib_ateth_swap(wqe->atomic_wr.compare_add, &ohdr->u.atomic_eth); - put_ib_ateth_swap(0, &ohdr->u.atomic_eth); + put_ib_ateth_compare(0, &ohdr->u.atomic_eth); } put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr, &ohdr->u.atomic_eth); From ee3d989b3d59dbbb3b5c3eb33434b5ef38b3023e Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 12 Jan 2018 07:58:40 +0200 Subject: [PATCH 519/705] IB/mlx4: Fix incorrectly releasing steerable UD QPs when have only ETH ports commit 852f6927594d0d3e8632c889b2ab38cbc46476ad upstream. Allocating steerable UD QPs depends on having at least one IB port, while releasing those QPs does not. As a result, when there are only ETH ports, the IB (RoCE) driver requests releasing a qp range whose base qp is zero, with qp count zero. When SR-IOV is enabled, and the VF driver is running on a VM over a hypervisor which treats such qp release calls as errors (rather than NOPs), we see lines in the VM message log like: mlx4_core 0002:00:02.0: Failed to release qp range base:0 cnt:0 Fix this by adding a check for a zero count in mlx4_release_qp_range() (which thus treats releasing 0 qps as a nop), and eliminating the check for device managed flow steering when releasing steerable UD QPs. (Freeing ib_uc_qpns_bitmap unconditionally is also OK, since it remains NULL when steerable UD QPs are not allocated). Fixes: 4196670be786 ("IB/mlx4: Don't allocate range of steerable UD QPs for Ethernet-only device") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx4/main.c | 13 +++++-------- drivers/net/ethernet/mellanox/mlx4/qp.c | 3 +++ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 8059b7eaf3a8..c41c8d0a4ac0 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2928,9 +2928,8 @@ err_steer_free_bitmap: kfree(ibdev->ib_uc_qpns_bitmap); err_steer_qp_release: - if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) - mlx4_qp_release_range(dev, ibdev->steer_qpn_base, - ibdev->steer_qpn_count); + mlx4_qp_release_range(dev, ibdev->steer_qpn_base, + ibdev->steer_qpn_count); err_counter: for (i = 0; i < ibdev->num_ports; ++i) mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]); @@ -3035,11 +3034,9 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) ibdev->iboe.nb.notifier_call = NULL; } - if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) { - mlx4_qp_release_range(dev, ibdev->steer_qpn_base, - ibdev->steer_qpn_count); - kfree(ibdev->ib_uc_qpns_bitmap); - } + mlx4_qp_release_range(dev, ibdev->steer_qpn_base, + ibdev->steer_qpn_count); + kfree(ibdev->ib_uc_qpns_bitmap); iounmap(ibdev->uar_map); for (p = 0; p < ibdev->num_ports; ++p) diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index d1cd9c32a9ae..6143113a7fef 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -286,6 +286,9 @@ void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt) u64 in_param = 0; int err; + if (!cnt) + return; + if (mlx4_is_mfunc(dev)) { set_param_l(&in_param, base_qpn); set_param_h(&in_param, cnt); From 1d808f828f4cc97bd2f7dbafce2e9a33f3d8a86d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 9 Jan 2018 17:26:24 +0100 Subject: [PATCH 520/705] kselftest: fix OOM in memory compaction test commit 4c1baad223906943b595a887305f2e8124821dad upstream. Running the compaction_test sometimes results in out-of-memory failures. When I debugged this, it turned out that the code to reset the number of hugepages to the initial value is simply broken since we write into an open sysctl file descriptor multiple times without seeking back to the start. Adding the lseek here fixes the problem. Cc: stable@vger.kernel.org Reported-by: Naresh Kamboju Link: https://bugs.linaro.org/show_bug.cgi?id=3145 Signed-off-by: Arnd Bergmann Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/vm/compaction_test.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c index 6d1437f895b8..298f69e2834c 100644 --- a/tools/testing/selftests/vm/compaction_test.c +++ b/tools/testing/selftests/vm/compaction_test.c @@ -136,6 +136,8 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size) printf("No of huge pages allocated = %d\n", (atoi(nr_hugepages))); + lseek(fd, 0, SEEK_SET); + if (write(fd, initial_nr_hugepages, strlen(initial_nr_hugepages)) != strlen(initial_nr_hugepages)) { perror("Failed to write value to /proc/sys/vm/nr_hugepages\n"); From ae34caee36c6d34322c5afdc54c6a92881a224e4 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 9 Jan 2018 11:23:40 -0800 Subject: [PATCH 521/705] RDMA/rxe: Fix a race condition related to the QP error state commit 6f301e06de4cf9ab7303f5acd43e64fcd4aa04be upstream. The following sequence: * Change queue pair state into IB_QPS_ERR. * Post a work request on the queue pair. Triggers the following race condition in the rdma_rxe driver: * rxe_qp_error() triggers an asynchronous call of rxe_completer(), the function that examines the QP send queue. * rxe_post_send() posts a work request on the QP send queue. If rxe_completer() runs prior to rxe_post_send(), it will drain the send queue and the driver will assume no further action is necessary. However, once we post the send to the send queue, because the queue is in error, no send completion will ever happen and the send will get stuck. In order to process the send, we need to make sure that rxe_completer() gets run after a send is posted to a queue pair in an error state. This patch ensures that happens. Signed-off-by: Bart Van Assche Cc: Moni Shoua Cc: # v4.8 Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/sw/rxe/rxe_verbs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 19841c863daf..59f37f412a7f 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -848,6 +848,8 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr, (queue_count(qp->sq.queue) > 1); rxe_run_task(&qp->req.task, must_sched); + if (unlikely(qp->req.state == QP_STATE_ERROR)) + rxe_run_task(&qp->comp.task, 1); return err; } From 9708d4743f317bd0eb12d76f52fc27e2c59d9b6d Mon Sep 17 00:00:00 2001 From: Shilpasri G Bhat Date: Fri, 12 Jan 2018 12:43:53 +0530 Subject: [PATCH 522/705] cpufreq: powernv: Dont assume distinct pstate values for nominal and pmin commit 3fa4680b860bf48b437d6a2c039789c4abe202ae upstream. Some OpenPOWER boxes can have same pstate values for nominal and pmin pstates. In these boxes the current code will not initialize 'powernv_pstate_info.min' variable and result in erroneous CPU frequency reporting. This patch fixes this problem. Fixes: 09ca4c9b5958 (cpufreq: powernv: Replacing pstate_id with frequency table index) Reported-by: Alvin Wang Signed-off-by: Shilpasri G Bhat Acked-by: Viresh Kumar Cc: 4.8+ # 4.8+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/powernv-cpufreq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index a84724eabfb8..6fb3cd24c1b6 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -260,9 +260,9 @@ static int init_powernv_pstates(void) if (id == pstate_max) powernv_pstate_info.max = i; - else if (id == pstate_nominal) + if (id == pstate_nominal) powernv_pstate_info.nominal = i; - else if (id == pstate_min) + if (id == pstate_min) powernv_pstate_info.min = i; } From b3685e8e2b6ddec107f62f03ada6302cfc3a7bce Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Sun, 5 Nov 2017 21:27:41 -0800 Subject: [PATCH 523/705] PM / devfreq: Propagate error from devfreq_add_device() commit d1bf2d30728f310f72296b54f0651ecdb09cbb12 upstream. Propagate the error of devfreq_add_device() in devm_devfreq_add_device() rather than statically returning ENOMEM. This makes it slightly faster to pinpoint the cause of a returned error. Fixes: 8cd84092d35e ("PM / devfreq: Add resource-managed function for devfreq device") Cc: stable@vger.kernel.org Acked-by: Chanwoo Choi Signed-off-by: Bjorn Andersson Signed-off-by: MyungJoo Ham Signed-off-by: Greg Kroah-Hartman --- drivers/devfreq/devfreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index a2449d77af07..9e5674c5a07b 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -684,7 +684,7 @@ struct devfreq *devm_devfreq_add_device(struct device *dev, devfreq = devfreq_add_device(dev, profile, governor_name, data); if (IS_ERR(devfreq)) { devres_free(ptr); - return ERR_PTR(-ENOMEM); + return devfreq; } *ptr = devfreq; From 9cb1674008e435d2c0145b352bc66b964d7b37ff Mon Sep 17 00:00:00 2001 From: Gang He Date: Wed, 31 Jan 2018 16:14:48 -0800 Subject: [PATCH 524/705] ocfs2: try a blocking lock before return AOP_TRUNCATED_PAGE commit ff26cc10aec128c3f86b5611fd5f59c71d49c0e3 upstream. If we can't get inode lock immediately in the function ocfs2_inode_lock_with_page() when reading a page, we should not return directly here, since this will lead to a softlockup problem when the kernel is configured with CONFIG_PREEMPT is not set. The method is to get a blocking lock and immediately unlock before returning, this can avoid CPU resource waste due to lots of retries, and benefits fairness in getting lock among multiple nodes, increase efficiency in case modifying the same file frequently from multiple nodes. The softlockup crash (when set /proc/sys/kernel/softlockup_panic to 1) looks like: Kernel panic - not syncing: softlockup: hung tasks CPU: 0 PID: 885 Comm: multi_mmap Tainted: G L 4.12.14-6.1-default #1 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 Call Trace: dump_stack+0x5c/0x82 panic+0xd5/0x21e watchdog_timer_fn+0x208/0x210 __hrtimer_run_queues+0xcc/0x200 hrtimer_interrupt+0xa6/0x1f0 smp_apic_timer_interrupt+0x34/0x50 apic_timer_interrupt+0x96/0xa0 RIP: 0010:unlock_page+0x17/0x30 RSP: 0000:ffffaf154080bc88 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff10 RAX: dead000000000100 RBX: fffff21e009f5300 RCX: 0000000000000004 RDX: dead0000000000ff RSI: 0000000000000202 RDI: fffff21e009f5300 RBP: 0000000000000000 R08: 0000000000000000 R09: ffffaf154080bb00 R10: ffffaf154080bc30 R11: 0000000000000040 R12: ffff993749a39518 R13: 0000000000000000 R14: fffff21e009f5300 R15: fffff21e009f5300 ocfs2_inode_lock_with_page+0x25/0x30 [ocfs2] ocfs2_readpage+0x41/0x2d0 [ocfs2] filemap_fault+0x12b/0x5c0 ocfs2_fault+0x29/0xb0 [ocfs2] __do_fault+0x1a/0xa0 __handle_mm_fault+0xbe8/0x1090 handle_mm_fault+0xaa/0x1f0 __do_page_fault+0x235/0x4b0 trace_do_page_fault+0x3c/0x110 async_page_fault+0x28/0x30 RIP: 0033:0x7fa75ded638e RSP: 002b:00007ffd6657db18 EFLAGS: 00010287 RAX: 000055c7662fb700 RBX: 0000000000000001 RCX: 000055c7662fb700 RDX: 0000000000001770 RSI: 00007fa75e909000 RDI: 000055c7662fb700 RBP: 0000000000000003 R08: 000000000000000e R09: 0000000000000000 R10: 0000000000000483 R11: 00007fa75ded61b0 R12: 00007fa75e90a770 R13: 000000000000000e R14: 0000000000001770 R15: 0000000000000000 About performance improvement, we can see the testing time is reduced, and CPU utilization decreases, the detailed data is as follows. I ran multi_mmap test case in ocfs2-test package in a three nodes cluster. Before applying this patch: PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 2754 ocfs2te+ 20 0 170248 6980 4856 D 80.73 0.341 0:18.71 multi_mmap 1505 root rt 0 222236 123060 97224 S 2.658 6.015 0:01.44 corosync 5 root 20 0 0 0 0 S 1.329 0.000 0:00.19 kworker/u8:0 95 root 20 0 0 0 0 S 1.329 0.000 0:00.25 kworker/u8:1 2728 root 20 0 0 0 0 S 0.997 0.000 0:00.24 jbd2/sda1-33 2721 root 20 0 0 0 0 S 0.664 0.000 0:00.07 ocfs2dc-3C8CFD4 2750 ocfs2te+ 20 0 142976 4652 3532 S 0.664 0.227 0:00.28 mpirun ocfs2test@tb-node2:~>multiple_run.sh -i ens3 -k ~/linux-4.4.21-69.tar.gz -o ~/ocfs2mullog -C hacluster -s pcmk -n tb-node2,tb-node1,tb-node3 -d /dev/sda1 -b 4096 -c 32768 -t multi_mmap /mnt/shared Tests with "-b 4096 -C 32768" Thu Dec 28 14:44:52 CST 2017 multi_mmap..................................................Passed. Runtime 783 seconds. After apply this patch: PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 2508 ocfs2te+ 20 0 170248 6804 4680 R 54.00 0.333 0:55.37 multi_mmap 155 root 20 0 0 0 0 S 2.667 0.000 0:01.20 kworker/u8:3 95 root 20 0 0 0 0 S 2.000 0.000 0:01.58 kworker/u8:1 2504 ocfs2te+ 20 0 142976 4604 3480 R 1.667 0.225 0:01.65 mpirun 5 root 20 0 0 0 0 S 1.000 0.000 0:01.36 kworker/u8:0 2482 root 20 0 0 0 0 S 1.000 0.000 0:00.86 jbd2/sda1-33 299 root 0 -20 0 0 0 S 0.333 0.000 0:00.13 kworker/2:1H 335 root 0 -20 0 0 0 S 0.333 0.000 0:00.17 kworker/1:1H 535 root 20 0 12140 7268 1456 S 0.333 0.355 0:00.34 haveged 1282 root rt 0 222284 123108 97224 S 0.333 6.017 0:01.33 corosync ocfs2test@tb-node2:~>multiple_run.sh -i ens3 -k ~/linux-4.4.21-69.tar.gz -o ~/ocfs2mullog -C hacluster -s pcmk -n tb-node2,tb-node1,tb-node3 -d /dev/sda1 -b 4096 -c 32768 -t multi_mmap /mnt/shared Tests with "-b 4096 -C 32768" Thu Dec 28 15:04:12 CST 2017 multi_mmap..................................................Passed. Runtime 487 seconds. Link: http://lkml.kernel.org/r/1514447305-30814-1-git-send-email-ghe@suse.com Fixes: 1cce4df04f37 ("ocfs2: do not lock/unlock() inode DLM lock") Signed-off-by: Gang He Reviewed-by: Eric Ren Acked-by: alex chen Acked-by: piaojun Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Changwei Ge Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/dlmglue.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 8dce4099a6ca..785fcc29d85d 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2485,6 +2485,15 @@ int ocfs2_inode_lock_with_page(struct inode *inode, ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); if (ret == -EAGAIN) { unlock_page(page); + /* + * If we can't get inode lock immediately, we should not return + * directly here, since this will lead to a softlockup problem. + * The method is to get a blocking lock and immediately unlock + * before returning, this can avoid CPU resource waste due to + * lots of retries, and benefits fairness in getting lock. + */ + if (ocfs2_inode_lock(inode, ret_bh, ex) == 0) + ocfs2_inode_unlock(inode, ex); ret = AOP_TRUNCATED_PAGE; } From 15c5e601b7ccd1c2f6785eee4f69822329a90961 Mon Sep 17 00:00:00 2001 From: Eugene Syromiatnikov Date: Mon, 15 Jan 2018 20:38:17 +0100 Subject: [PATCH 525/705] s390: fix handling of -1 in set{,fs}[gu]id16 syscalls commit 6dd0d2d22aa363fec075cb2577ba273ac8462e94 upstream. For some reason, the implementation of some 16-bit ID system calls (namely, setuid16/setgid16 and setfsuid16/setfsgid16) used type cast instead of low2highgid/low2highuid macros for converting [GU]IDs, which led to incorrect handling of value of -1 (which ought to be considered invalid). Discovered by strace test suite. Cc: stable@vger.kernel.org Signed-off-by: Eugene Syromiatnikov Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/compat_linux.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index f06a9a0063f1..d0724a924184 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -110,7 +110,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setregid16, u16, rgid, u16, egid) COMPAT_SYSCALL_DEFINE1(s390_setgid16, u16, gid) { - return sys_setgid((gid_t)gid); + return sys_setgid(low2highgid(gid)); } COMPAT_SYSCALL_DEFINE2(s390_setreuid16, u16, ruid, u16, euid) @@ -120,7 +120,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setreuid16, u16, ruid, u16, euid) COMPAT_SYSCALL_DEFINE1(s390_setuid16, u16, uid) { - return sys_setuid((uid_t)uid); + return sys_setuid(low2highuid(uid)); } COMPAT_SYSCALL_DEFINE3(s390_setresuid16, u16, ruid, u16, euid, u16, suid) @@ -173,12 +173,12 @@ COMPAT_SYSCALL_DEFINE3(s390_getresgid16, u16 __user *, rgidp, COMPAT_SYSCALL_DEFINE1(s390_setfsuid16, u16, uid) { - return sys_setfsuid((uid_t)uid); + return sys_setfsuid(low2highuid(uid)); } COMPAT_SYSCALL_DEFINE1(s390_setfsgid16, u16, gid) { - return sys_setfsgid((gid_t)gid); + return sys_setfsgid(low2highgid(gid)); } static int groups16_to_user(u16 __user *grouplist, struct group_info *group_info) From 6d0fe11189f37c91a9386b0b862f00167fd65afc Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 30 Nov 2017 21:27:32 -0800 Subject: [PATCH 526/705] arm64: dts: msm8916: Correct ipc references for smsm commit 566bd8902e7fa20bd412ed753e09e89c1c96939c upstream. SMSM is not symmetrical, the incoming bits from WCNSS are available at index 6, but the outgoing host id for WCNSS is 3. Further more, upstream references the base of APCS (in contrast to downstream), so the register offset of 8 must be included. Fixes: 1fb47e0a9ba4 ("arm64: dts: qcom: msm8916: Add smsm and smp2p nodes") Cc: stable@vger.kernel.org Reported-by: Ramon Fried Signed-off-by: Bjorn Andersson Signed-off-by: Andy Gross Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/msm8916.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi index 466ca5705c99..ebf62ad76998 100644 --- a/arch/arm64/boot/dts/qcom/msm8916.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi @@ -906,8 +906,8 @@ #address-cells = <1>; #size-cells = <0>; - qcom,ipc-1 = <&apcs 0 13>; - qcom,ipc-6 = <&apcs 0 19>; + qcom,ipc-1 = <&apcs 8 13>; + qcom,ipc-3 = <&apcs 8 19>; apps_smsm: apps@0 { reg = <0>; From 910a2e4931cc08cbb0323d8d24df6c5482a535b2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 10 Jan 2018 17:22:00 +0100 Subject: [PATCH 527/705] ARM: lpc3250: fix uda1380 gpio numbers commit ca32e0c4bf9ca4b87089c5a5ce945e5f2ec890da upstream. dtc warns about obviously incorrect GPIO numbers for the audio codec on both lpc32xx boards: arch/arm/boot/dts/lpc3250-phy3250.dtb: Warning (gpios_property): reset-gpio property size (12) too small for cell size 3 in /ahb/apb/i2c@400A0000/uda1380@18 arch/arm/boot/dts/lpc3250-phy3250.dtb: Warning (gpios_property): power-gpio property size (12) too small for cell size 3 in /ahb/apb/i2c@400A0000/uda1380@18 arch/arm/boot/dts/lpc3250-ea3250.dtb: Warning (gpios_property): reset-gpio property size (12) too small for cell size 3 in /ahb/apb/i2c@400A0000/uda1380@18 arch/arm/boot/dts/lpc3250-ea3250.dtb: Warning (gpios_property): power-gpio property size (12) too small for cell size 3 in /ahb/apb/i2c@400A0000/uda1380@18 It looks like the nodes are written for a different binding that combines the GPIO number into a single number rather than a bank/number pair. I found the right numbers on stackexchange.com, so this patch fixes the warning and has a reasonable chance of getting things to actually work. Cc: stable@vger.kernel.org Link: https://unix.stackexchange.com/questions/59497/alsa-asoc-how-to-correctly-load-devices-drivers/62217#62217 Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/lpc3250-ea3250.dts | 4 ++-- arch/arm/boot/dts/lpc3250-phy3250.dts | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/lpc3250-ea3250.dts b/arch/arm/boot/dts/lpc3250-ea3250.dts index 52b3ed10283a..e2bc731079be 100644 --- a/arch/arm/boot/dts/lpc3250-ea3250.dts +++ b/arch/arm/boot/dts/lpc3250-ea3250.dts @@ -156,8 +156,8 @@ uda1380: uda1380@18 { compatible = "nxp,uda1380"; reg = <0x18>; - power-gpio = <&gpio 0x59 0>; - reset-gpio = <&gpio 0x51 0>; + power-gpio = <&gpio 3 10 0>; + reset-gpio = <&gpio 3 2 0>; dac-clk = "wspll"; }; diff --git a/arch/arm/boot/dts/lpc3250-phy3250.dts b/arch/arm/boot/dts/lpc3250-phy3250.dts index fd95e2b10357..b7bd3a110a8d 100644 --- a/arch/arm/boot/dts/lpc3250-phy3250.dts +++ b/arch/arm/boot/dts/lpc3250-phy3250.dts @@ -81,8 +81,8 @@ uda1380: uda1380@18 { compatible = "nxp,uda1380"; reg = <0x18>; - power-gpio = <&gpio 0x59 0>; - reset-gpio = <&gpio 0x51 0>; + power-gpio = <&gpio 3 10 0>; + reset-gpio = <&gpio 3 2 0>; dac-clk = "wspll"; }; From 34cad5572f162d39b7dfcabcb3a6ae28bf3dac9d Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Wed, 10 Jan 2018 09:21:02 +0100 Subject: [PATCH 528/705] ARM: dts: STi: Add gpio polarity for "hdmi,hpd-gpio" property commit 7ac1f59c09a61e6af6622df6809e003b0af07f3d upstream. The GPIO polarity is missing in the hdmi,hpd-gpio property, this fixes the following DT warnings: arch/arm/boot/dts/stih410-b2120.dtb: Warning (gpios_property): hdmi,hpd-gpio property size (8) too small for cell size 2 in /soc/sti-display-subsystem/sti-hdmi@8d04000 arch/arm/boot/dts/stih407-b2120.dtb: Warning (gpios_property): hdmi,hpd-gpio property size (8) too small for cell size 2 in /soc/sti-display-subsystem/sti-hdmi@8d04000 arch/arm/boot/dts/stih410-b2260.dtb: Warning (gpios_property): hdmi,hpd-gpio property size (8) too small for cell size 2 in /soc/sti-display-subsystem/sti-hdmi@8d04000 [arnd: marked Cc:stable since this warning shows up with the latest dtc by default, and is more likely to actually cause problems than the other patches from this series] Cc: stable@vger.kernel.org Signed-off-by: Patrice Chotard Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/stih407.dtsi | 3 ++- arch/arm/boot/dts/stih410.dtsi | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/stih407.dtsi b/arch/arm/boot/dts/stih407.dtsi index 291ffacbd2e0..fe043d313ccd 100644 --- a/arch/arm/boot/dts/stih407.dtsi +++ b/arch/arm/boot/dts/stih407.dtsi @@ -8,6 +8,7 @@ */ #include "stih407-clock.dtsi" #include "stih407-family.dtsi" +#include / { soc { sti-display-subsystem { @@ -122,7 +123,7 @@ <&clk_s_d2_quadfs 0>, <&clk_s_d2_quadfs 1>; - hdmi,hpd-gpio = <&pio5 3>; + hdmi,hpd-gpio = <&pio5 3 GPIO_ACTIVE_LOW>; reset-names = "hdmi"; resets = <&softreset STIH407_HDMI_TX_PHY_SOFTRESET>; ddc = <&hdmiddc>; diff --git a/arch/arm/boot/dts/stih410.dtsi b/arch/arm/boot/dts/stih410.dtsi index 4d329b2908be..3c118fc2bf61 100644 --- a/arch/arm/boot/dts/stih410.dtsi +++ b/arch/arm/boot/dts/stih410.dtsi @@ -9,6 +9,7 @@ #include "stih410-clock.dtsi" #include "stih407-family.dtsi" #include "stih410-pinctrl.dtsi" +#include / { aliases { bdisp0 = &bdisp0; @@ -213,7 +214,7 @@ <&clk_s_d2_quadfs 0>, <&clk_s_d2_quadfs 1>; - hdmi,hpd-gpio = <&pio5 3>; + hdmi,hpd-gpio = <&pio5 3 GPIO_ACTIVE_LOW>; reset-names = "hdmi"; resets = <&softreset STIH407_HDMI_TX_PHY_SOFTRESET>; ddc = <&hdmiddc>; From 56d435814c9653346be9dffd3d99965992eb25a5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 15 Jan 2018 17:37:08 +0100 Subject: [PATCH 529/705] ARM: dts: nomadik: add interrupt-parent for clcd commit e8bfa0422469cdfc86be3f525f621b1d44d2481b upstream. The clcd device is lacking an interrupt-parent property, which makes the interrupt unusable and shows up as a warning with the latest dtc version: arch/arm/boot/dts/ste-nomadik-s8815.dtb: Warning (interrupts_property): Missing interrupt-parent for /amba/clcd@10120000 arch/arm/boot/dts/ste-nomadik-nhk15.dtb: Warning (interrupts_property): Missing interrupt-parent for /amba/clcd@10120000 I looked up the old board files and found that this interrupt has the same irqchip as all the other on-chip device, it just needs one extra line. Fixes: 17470b7da11c ("ARM: dts: add the CLCD LCD display to the NHK15") Reviewed-by: Linus Walleij Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/ste-nomadik-stn8815.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi index adb1c0998b81..1077ceebb2d6 100644 --- a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi +++ b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi @@ -749,6 +749,7 @@ reg = <0x10120000 0x1000>; interrupt-names = "combined"; interrupts = <14>; + interrupt-parent = <&vica>; clocks = <&clcdclk>, <&hclkclcd>; clock-names = "clcdclk", "apb_pclk"; status = "disabled"; From 65eacaf594d32d2a60003df23c17b8e24fbeee58 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 11 Jan 2018 11:28:51 +0530 Subject: [PATCH 530/705] arm: spear600: Add missing interrupt-parent of rtc commit 6ffb5b4f248fe53e0361b8cbc2a523b432566442 upstream. The interrupt-parent of rtc was missing, add it. Fixes: 8113ba917dfa ("ARM: SPEAr: DT: Update device nodes") Cc: stable@vger.kernel.org # v3.8+ Reported-by: Arnd Bergmann Signed-off-by: Viresh Kumar Signed-off-by: Olof Johansson Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/spear600.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/spear600.dtsi b/arch/arm/boot/dts/spear600.dtsi index 9f60a7b6a42b..bd379034993c 100644 --- a/arch/arm/boot/dts/spear600.dtsi +++ b/arch/arm/boot/dts/spear600.dtsi @@ -194,6 +194,7 @@ rtc@fc900000 { compatible = "st,spear600-rtc"; reg = <0xfc900000 0x1000>; + interrupt-parent = <&vic0>; interrupts = <10>; status = "disabled"; }; From e55af9d39b6f649a815080005ceb766e5acf08f4 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 11 Jan 2018 11:28:52 +0530 Subject: [PATCH 531/705] arm: spear13xx: Fix dmas cells commit cdd10409914184c7eee5ae3e11beb890c9c16c61 upstream. The "dmas" cells for the designware DMA controller need to have only 3 properties apart from the phandle: request line, src master and destination master. But the commit 6e8887f60f60 updated it incorrectly while moving from platform code to DT. Fix it. Cc: stable@vger.kernel.org # v3.10+ Fixes: 6e8887f60f60 ("ARM: SPEAr13xx: Pass generic DW DMAC platform data from DT") Reported-by: Arnd Bergmann Signed-off-by: Viresh Kumar Signed-off-by: Olof Johansson Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/dma/snps-dma.txt | 2 +- arch/arm/boot/dts/spear1340.dtsi | 4 ++-- arch/arm/boot/dts/spear13xx.dtsi | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/dma/snps-dma.txt b/Documentation/devicetree/bindings/dma/snps-dma.txt index 0f5583293c9c..633481e2a4ec 100644 --- a/Documentation/devicetree/bindings/dma/snps-dma.txt +++ b/Documentation/devicetree/bindings/dma/snps-dma.txt @@ -63,6 +63,6 @@ Example: interrupts = <0 35 0x4>; status = "disabled"; dmas = <&dmahost 12 0 1>, - <&dmahost 13 0 1 0>; + <&dmahost 13 1 0>; dma-names = "rx", "rx"; }; diff --git a/arch/arm/boot/dts/spear1340.dtsi b/arch/arm/boot/dts/spear1340.dtsi index df2232d767ed..6361cbfcbe5e 100644 --- a/arch/arm/boot/dts/spear1340.dtsi +++ b/arch/arm/boot/dts/spear1340.dtsi @@ -141,8 +141,8 @@ reg = <0xb4100000 0x1000>; interrupts = <0 105 0x4>; status = "disabled"; - dmas = <&dwdma0 0x600 0 0 1>, /* 0xC << 11 */ - <&dwdma0 0x680 0 1 0>; /* 0xD << 7 */ + dmas = <&dwdma0 12 0 1>, + <&dwdma0 13 1 0>; dma-names = "tx", "rx"; }; diff --git a/arch/arm/boot/dts/spear13xx.dtsi b/arch/arm/boot/dts/spear13xx.dtsi index 449acf0d8272..9564337c1815 100644 --- a/arch/arm/boot/dts/spear13xx.dtsi +++ b/arch/arm/boot/dts/spear13xx.dtsi @@ -100,7 +100,7 @@ reg = <0xb2800000 0x1000>; interrupts = <0 29 0x4>; status = "disabled"; - dmas = <&dwdma0 0 0 0 0>; + dmas = <&dwdma0 0 0 0>; dma-names = "data"; }; @@ -288,8 +288,8 @@ #size-cells = <0>; interrupts = <0 31 0x4>; status = "disabled"; - dmas = <&dwdma0 0x2000 0 0 0>, /* 0x4 << 11 */ - <&dwdma0 0x0280 0 0 0>; /* 0x5 << 7 */ + dmas = <&dwdma0 4 0 0>, + <&dwdma0 5 0 0>; dma-names = "tx", "rx"; }; From c1a5f89b138321e3645af2603efaef1197125660 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 11 Jan 2018 11:28:53 +0530 Subject: [PATCH 532/705] arm: spear13xx: Fix spics gpio controller's warning commit f8975cb1b8a36d0839b6365235778dd9df1d04ca upstream. This fixes the following warning by also sending the flags argument for gpio controllers: Property 'cs-gpios', cell 6 is not a phandle reference in /ahb/apb/spi@e0100000 Fixes: 8113ba917dfa ("ARM: SPEAr: DT: Update device nodes") Cc: stable@vger.kernel.org # v3.8+ Reported-by: Arnd Bergmann Signed-off-by: Viresh Kumar Signed-off-by: Olof Johansson Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/spear1310-evb.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/spear1310-evb.dts b/arch/arm/boot/dts/spear1310-evb.dts index 84101e4eebbf..0f5f379323a8 100644 --- a/arch/arm/boot/dts/spear1310-evb.dts +++ b/arch/arm/boot/dts/spear1310-evb.dts @@ -349,7 +349,7 @@ spi0: spi@e0100000 { status = "okay"; num-cs = <3>; - cs-gpios = <&gpio1 7 0>, <&spics 0>, <&spics 1>; + cs-gpios = <&gpio1 7 0>, <&spics 0 0>, <&spics 1 0>; stmpe610@0 { compatible = "st,stmpe610"; From 7b559f7f08a835ff0e720afb67c18db3e1bf1b86 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 5 Feb 2018 17:18:17 -0800 Subject: [PATCH 533/705] x86/entry/64/compat: Clear registers for compat syscalls, to reduce speculation attack surface commit 6b8cf5cc9965673951f1ab3f0e3cf23d06e3e2ee upstream. At entry userspace may have populated registers with values that could otherwise be useful in a speculative execution attack. Clear them to minimize the kernel's attack surface. Originally-From: Andi Kleen Signed-off-by: Dan Williams Cc: Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/151787989697.7847.4083702787288600552.stgit@dwillia2-desk3.amr.corp.intel.com [ Made small improvements to the changelog. ] Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64_compat.S | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index d76a97653980..92c55738d543 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -83,15 +83,25 @@ ENTRY(entry_SYSENTER_compat) pushq %rcx /* pt_regs->cx */ pushq $-ENOSYS /* pt_regs->ax */ pushq $0 /* pt_regs->r8 = 0 */ + xorq %r8, %r8 /* nospec r8 */ pushq $0 /* pt_regs->r9 = 0 */ + xorq %r9, %r9 /* nospec r9 */ pushq $0 /* pt_regs->r10 = 0 */ + xorq %r10, %r10 /* nospec r10 */ pushq $0 /* pt_regs->r11 = 0 */ + xorq %r11, %r11 /* nospec r11 */ pushq %rbx /* pt_regs->rbx */ + xorl %ebx, %ebx /* nospec rbx */ pushq %rbp /* pt_regs->rbp (will be overwritten) */ + xorl %ebp, %ebp /* nospec rbp */ pushq $0 /* pt_regs->r12 = 0 */ + xorq %r12, %r12 /* nospec r12 */ pushq $0 /* pt_regs->r13 = 0 */ + xorq %r13, %r13 /* nospec r13 */ pushq $0 /* pt_regs->r14 = 0 */ + xorq %r14, %r14 /* nospec r14 */ pushq $0 /* pt_regs->r15 = 0 */ + xorq %r15, %r15 /* nospec r15 */ cld /* @@ -209,15 +219,25 @@ ENTRY(entry_SYSCALL_compat) pushq %rbp /* pt_regs->cx (stashed in bp) */ pushq $-ENOSYS /* pt_regs->ax */ pushq $0 /* pt_regs->r8 = 0 */ + xorq %r8, %r8 /* nospec r8 */ pushq $0 /* pt_regs->r9 = 0 */ + xorq %r9, %r9 /* nospec r9 */ pushq $0 /* pt_regs->r10 = 0 */ + xorq %r10, %r10 /* nospec r10 */ pushq $0 /* pt_regs->r11 = 0 */ + xorq %r11, %r11 /* nospec r11 */ pushq %rbx /* pt_regs->rbx */ + xorl %ebx, %ebx /* nospec rbx */ pushq %rbp /* pt_regs->rbp (will be overwritten) */ + xorl %ebp, %ebp /* nospec rbp */ pushq $0 /* pt_regs->r12 = 0 */ + xorq %r12, %r12 /* nospec r12 */ pushq $0 /* pt_regs->r13 = 0 */ + xorq %r13, %r13 /* nospec r13 */ pushq $0 /* pt_regs->r14 = 0 */ + xorq %r14, %r14 /* nospec r14 */ pushq $0 /* pt_regs->r15 = 0 */ + xorq %r15, %r15 /* nospec r15 */ /* * User mode is traced as though IRQs are on, and SYSENTER @@ -320,15 +340,25 @@ ENTRY(entry_INT80_compat) pushq %rcx /* pt_regs->cx */ pushq $-ENOSYS /* pt_regs->ax */ pushq $0 /* pt_regs->r8 = 0 */ + xorq %r8, %r8 /* nospec r8 */ pushq $0 /* pt_regs->r9 = 0 */ + xorq %r9, %r9 /* nospec r9 */ pushq $0 /* pt_regs->r10 = 0 */ + xorq %r10, %r10 /* nospec r10 */ pushq $0 /* pt_regs->r11 = 0 */ + xorq %r11, %r11 /* nospec r11 */ pushq %rbx /* pt_regs->rbx */ + xorl %ebx, %ebx /* nospec rbx */ pushq %rbp /* pt_regs->rbp */ + xorl %ebp, %ebp /* nospec rbp */ pushq %r12 /* pt_regs->r12 */ + xorq %r12, %r12 /* nospec r12 */ pushq %r13 /* pt_regs->r13 */ + xorq %r13, %r13 /* nospec r13 */ pushq %r14 /* pt_regs->r14 */ + xorq %r14, %r14 /* nospec r14 */ pushq %r15 /* pt_regs->r15 */ + xorq %r15, %r15 /* nospec r15 */ cld /* From 3740b9f09a94660636e33bb1aa577ad8702ca7b6 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 1 Feb 2018 11:21:58 +0100 Subject: [PATCH 534/705] compiler-gcc.h: Introduce __optimize function attribute commit df5d45aa08f848b79caf395211b222790534ccc7 upstream. Create a new function attribute __optimize, which allows to specify an optimization level on a per-function basis. Signed-off-by: Geert Uytterhoeven Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- include/linux/compiler-gcc.h | 4 ++++ include/linux/compiler.h | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 928e5ca0caee..eb0ed31193a3 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -187,6 +187,10 @@ #endif /* __CHECKER__ */ #endif /* GCC_VERSION >= 40300 */ +#if GCC_VERSION >= 40400 +#define __optimize(level) __attribute__((__optimize__(level))) +#endif /* GCC_VERSION >= 40400 */ + #if GCC_VERSION >= 40500 #ifndef __CHECKER__ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index cf0fa5d86059..5ce911db7d88 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -469,6 +469,10 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s # define __native_word(t) (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long)) #endif +#ifndef __optimize +# define __optimize(level) +#endif + /* Compile time object size, -1 for unknown */ #ifndef __compiletime_object_size # define __compiletime_object_size(obj) -1 From 70f822be66083155a96c157825907dfeffc621cd Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 10 Feb 2018 23:39:22 +0000 Subject: [PATCH 535/705] x86/speculation: Update Speculation Control microcode blacklist commit 1751342095f0d2b36fa8114d8e12c5688c455ac4 upstream. Intel have retroactively blessed the 0xc2 microcode on Skylake mobile and desktop parts, and the Gemini Lake 0x22 microcode is apparently fine too. We blacklisted the latter purely because it was present with all the other problematic ones in the 2018-01-08 release, but now it's explicitly listed as OK. We still list 0x84 for the various Kaby Lake / Coffee Lake parts, as that appeared in one version of the blacklist and then reverted to 0x80 again. We can change it if 0x84 is actually announced to be safe. Signed-off-by: David Woodhouse Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arjan.van.de.ven@intel.com Cc: jmattson@google.com Cc: karahmed@amazon.de Cc: kvm@vger.kernel.org Cc: pbonzini@redhat.com Cc: rkrcmar@redhat.com Cc: sironi@amazon.de Link: http://lkml.kernel.org/r/1518305967-31356-2-git-send-email-dwmw@amazon.co.uk Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/intel.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 4097b43cba2d..e3b00ac1458d 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -82,8 +82,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = { { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 }, { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, - { INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 }, - { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, @@ -95,8 +93,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = { { INTEL_FAM6_HASWELL_X, 0x02, 0x3b }, { INTEL_FAM6_HASWELL_X, 0x04, 0x10 }, { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a }, - /* Updated in the 20180108 release; blacklist until we know otherwise */ - { INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 }, /* Observed in the wild */ { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b }, { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 }, From 765b60870ae6d2c51ad985b62fcbc60e0c62c824 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 12 Feb 2018 15:27:34 +0000 Subject: [PATCH 536/705] x86/speculation: Correct Speculation Control microcode blacklist again commit d37fc6d360a404b208547ba112e7dabb6533c7fc upstream. Arjan points out that the Intel document only clears the 0xc2 microcode on *some* parts with CPUID 506E3 (INTEL_FAM6_SKYLAKE_DESKTOP stepping 3). For the Skylake H/S platform it's OK but for Skylake E3 which has the same CPUID it isn't (yet) cleared. So removing it from the blacklist was premature. Put it back for now. Also, Arjan assures me that the 0x84 microcode for Kaby Lake which was featured in one of the early revisions of the Intel document was never released to the public, and won't be until/unless it is also validated as safe. So those can change to 0x80 which is what all *other* versions of the doc have identified. Once the retrospective testing of existing public microcodes is done, we should be back into a mode where new microcodes are only released in batches and we shouldn't even need to update the blacklist for those anyway, so this tweaking of the list isn't expected to be a thing which keeps happening. Requested-by: Arjan van de Ven Signed-off-by: David Woodhouse Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arjan.van.de.ven@intel.com Cc: dave.hansen@intel.com Cc: kvm@vger.kernel.org Cc: pbonzini@redhat.com Link: http://lkml.kernel.org/r/1518449255-2182-1-git-send-email-dwmw@amazon.co.uk Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/intel.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index e3b00ac1458d..02cb2e3ddb60 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -75,13 +75,14 @@ struct sku_microcode { u32 microcode; }; static const struct sku_microcode spectre_bad_microcodes[] = { - { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 }, - { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 }, - { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 }, - { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 }, - { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 }, + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x80 }, + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x80 }, + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x80 }, + { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x80 }, + { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x80 }, { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, + { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, From 7b9dd0d16070cd5f38f4ff9dec7b8651e78f3a0e Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 10 Feb 2018 23:39:24 +0000 Subject: [PATCH 537/705] KVM/x86: Reduce retpoline performance impact in slot_handle_level_range(), by always inlining iterator helper methods MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 928a4c39484281f8ca366f53a1db79330d058401 upstream. With retpoline, tight loops of "call this function for every XXX" are very much pessimised by taking a prediction miss *every* time. This one is by far the biggest contributor to the guest launch time with retpoline. By marking the iterator slot_handle_…() functions always_inline, we can ensure that the indirect function call can be optimised away into a direct call and it actually generates slightly smaller code because some of the other conditionals can get optimised away too. Performance is now pretty close to what we see with nospectre_v2 on the command line. Suggested-by: Linus Torvalds Tested-by: Filippo Sironi Signed-off-by: David Woodhouse Reviewed-by: Filippo Sironi Acked-by: Paolo Bonzini Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arjan.van.de.ven@intel.com Cc: dave.hansen@intel.com Cc: jmattson@google.com Cc: karahmed@amazon.de Cc: kvm@vger.kernel.org Cc: rkrcmar@redhat.com Link: http://lkml.kernel.org/r/1518305967-31356-4-git-send-email-dwmw@amazon.co.uk Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/mmu.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 0a324e120942..a16c06604a56 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4640,7 +4640,7 @@ void kvm_mmu_uninit_vm(struct kvm *kvm) typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head); /* The caller should hold mmu-lock before calling this function. */ -static bool +static __always_inline bool slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, slot_level_handler fn, int start_level, int end_level, gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb) @@ -4670,7 +4670,7 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, return flush; } -static bool +static __always_inline bool slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot, slot_level_handler fn, int start_level, int end_level, bool lock_flush_tlb) @@ -4681,7 +4681,7 @@ slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot, lock_flush_tlb); } -static bool +static __always_inline bool slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot, slot_level_handler fn, bool lock_flush_tlb) { @@ -4689,7 +4689,7 @@ slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot, PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); } -static bool +static __always_inline bool slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot, slot_level_handler fn, bool lock_flush_tlb) { @@ -4697,7 +4697,7 @@ slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot, PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); } -static bool +static __always_inline bool slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot, slot_level_handler fn, bool lock_flush_tlb) { From 96652962e667387964b5e997811043dd1e698e84 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Sat, 10 Feb 2018 23:39:25 +0000 Subject: [PATCH 538/705] X86/nVMX: Properly set spec_ctrl and pred_cmd before merging MSRs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 206587a9fb764d71f035dc7f6d3b6488f5d5b304 upstream. These two variables should check whether SPEC_CTRL and PRED_CMD are supposed to be passed through to L2 guests or not. While msr_write_intercepted_l01 would return 'true' if it is not passed through. So just invert the result of msr_write_intercepted_l01 to implement the correct semantics. Signed-off-by: KarimAllah Ahmed Signed-off-by: David Woodhouse Reviewed-by: Jim Mattson Acked-by: Paolo Bonzini Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Radim Krčmář Cc: Thomas Gleixner Cc: arjan.van.de.ven@intel.com Cc: dave.hansen@intel.com Cc: kvm@vger.kernel.org Cc: sironi@amazon.de Fixes: 086e7d4118cc ("KVM: VMX: Allow direct access to MSR_IA32_SPEC_CTRL") Link: http://lkml.kernel.org/r/1518305967-31356-5-git-send-email-dwmw@amazon.co.uk Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d66224e695cf..1e16821c1378 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9606,8 +9606,8 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, * updated to reflect this when L1 (or its L2s) actually write to * the MSR. */ - bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); - bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL); + bool pred_cmd = !msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); + bool spec_ctrl = !msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL); if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && !pred_cmd && !spec_ctrl) From 3aad6fe914e49d71e725e4b231f082593cf38260 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 13 Feb 2018 09:03:08 +0100 Subject: [PATCH 539/705] x86/speculation: Clean up various Spectre related details commit 21e433bdb95bdf3aa48226fd3d33af608437f293 upstream. Harmonize all the Spectre messages so that a: dmesg | grep -i spectre ... gives us most Spectre related kernel boot messages. Also fix a few other details: - clarify a comment about firmware speculation control - s/KPTI/PTI - remove various line-breaks that made the code uglier Acked-by: David Woodhouse Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 957ad443b786..b83e0c9107bc 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -161,8 +161,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) return SPECTRE_V2_CMD_NONE; else { - ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, - sizeof(arg)); + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); if (ret < 0) return SPECTRE_V2_CMD_AUTO; @@ -174,8 +173,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) } if (i >= ARRAY_SIZE(mitigation_options)) { - pr_err("unknown option (%s). Switching to AUTO select\n", - mitigation_options[i].option); + pr_err("unknown option (%s). Switching to AUTO select\n", mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; } } @@ -184,8 +182,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) cmd == SPECTRE_V2_CMD_RETPOLINE_AMD || cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) && !IS_ENABLED(CONFIG_RETPOLINE)) { - pr_err("%s selected but not compiled in. Switching to AUTO select\n", - mitigation_options[i].option); + pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; } @@ -255,14 +252,14 @@ static void __init spectre_v2_select_mitigation(void) goto retpoline_auto; break; } - pr_err("kernel not compiled with retpoline; no mitigation available!"); + pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!"); return; retpoline_auto: if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { retpoline_amd: if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { - pr_err("LFENCE not serializing. Switching to generic retpoline\n"); + pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n"); goto retpoline_generic; } mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD : @@ -280,7 +277,7 @@ retpoline_auto: pr_info("%s\n", spectre_v2_strings[mode]); /* - * If neither SMEP or KPTI are available, there is a risk of + * If neither SMEP nor PTI are available, there is a risk of * hitting userspace addresses in the RSB after a context switch * from a shallow call stack to a deeper one. To prevent this fill * the entire RSB, even when using IBRS. @@ -294,21 +291,20 @@ retpoline_auto: if ((!boot_cpu_has(X86_FEATURE_KAISER) && !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); - pr_info("Filling RSB on context switch\n"); + pr_info("Spectre v2 mitigation: Filling RSB on context switch\n"); } /* Initialize Indirect Branch Prediction Barrier if supported */ if (boot_cpu_has(X86_FEATURE_IBPB)) { setup_force_cpu_cap(X86_FEATURE_USE_IBPB); - pr_info("Enabling Indirect Branch Prediction Barrier\n"); + pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); } } #undef pr_fmt #ifdef CONFIG_SYSFS -ssize_t cpu_show_meltdown(struct device *dev, - struct device_attribute *attr, char *buf) +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) { if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) return sprintf(buf, "Not affected\n"); @@ -317,16 +313,14 @@ ssize_t cpu_show_meltdown(struct device *dev, return sprintf(buf, "Vulnerable\n"); } -ssize_t cpu_show_spectre_v1(struct device *dev, - struct device_attribute *attr, char *buf) +ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) { if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) return sprintf(buf, "Not affected\n"); return sprintf(buf, "Mitigation: __user pointer sanitization\n"); } -ssize_t cpu_show_spectre_v2(struct device *dev, - struct device_attribute *attr, char *buf) +ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) { if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return sprintf(buf, "Not affected\n"); From 60d7b9c7961bc2acc95280212b981f0e5686d586 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 13 Feb 2018 08:26:17 +0100 Subject: [PATCH 540/705] selftests/x86/pkeys: Remove unused functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ce676638fe7b284132a7d7d5e7e7ad81bab9947e upstream. This also gets rid of two build warnings: protection_keys.c: In function ‘dumpit’: protection_keys.c:419:3: warning: ignoring return value of ‘write’, declared with attribute warn_unused_result [-Wunused-result] write(1, buf, nr_read); ^~~~~~~~~~~~~~~~~~~~~~ Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Dave Hansen Cc: Shuah Khan Cc: Andy Lutomirski Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/protection_keys.c | 28 ------------------- 1 file changed, 28 deletions(-) diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c index bdd58c78902e..2842a5fa22b3 100644 --- a/tools/testing/selftests/x86/protection_keys.c +++ b/tools/testing/selftests/x86/protection_keys.c @@ -381,34 +381,6 @@ pid_t fork_lazy_child(void) return forkret; } -void davecmp(void *_a, void *_b, int len) -{ - int i; - unsigned long *a = _a; - unsigned long *b = _b; - - for (i = 0; i < len / sizeof(*a); i++) { - if (a[i] == b[i]) - continue; - - dprintf3("[%3d]: a: %016lx b: %016lx\n", i, a[i], b[i]); - } -} - -void dumpit(char *f) -{ - int fd = open(f, O_RDONLY); - char buf[100]; - int nr_read; - - dprintf2("maps fd: %d\n", fd); - do { - nr_read = read(fd, &buf[0], sizeof(buf)); - write(1, buf, nr_read); - } while (nr_read > 0); - close(fd); -} - #define PKEY_DISABLE_ACCESS 0x1 #define PKEY_DISABLE_WRITE 0x2 From 0472707cf4111fee248b212dd50691ad52427f45 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Feb 2018 12:10:11 +0100 Subject: [PATCH 541/705] selftests/x86: Do not rely on "int $0x80" in test_mremap_vdso.c commit 2cbc0d66de0480449c75636f55697c7ff3af61fc upstream. On 64-bit builds, we should not rely on "int $0x80" working (it only does if CONFIG_IA32_EMULATION=y is enabled). Without this patch, the move test may succeed, but the "int $0x80" causes a segfault, resulting in a false negative output of this self-test. Signed-off-by: Dominik Brodowski Cc: Andy Lutomirski Cc: Dmitry Safonov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kselftest@vger.kernel.org Cc: shuah@kernel.org Link: http://lkml.kernel.org/r/20180211111013.16888-4-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/test_mremap_vdso.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/x86/test_mremap_vdso.c b/tools/testing/selftests/x86/test_mremap_vdso.c index bf0d687c7db7..64f11c8d9b76 100644 --- a/tools/testing/selftests/x86/test_mremap_vdso.c +++ b/tools/testing/selftests/x86/test_mremap_vdso.c @@ -90,8 +90,12 @@ int main(int argc, char **argv, char **envp) vdso_size += PAGE_SIZE; } +#ifdef __i386__ /* Glibc is likely to explode now - exit with raw syscall */ asm volatile ("int $0x80" : : "a" (__NR_exit), "b" (!!ret)); +#else /* __x86_64__ */ + syscall(SYS_exit, ret); +#endif } else { int status; From 191752d5d3d0a670842e6fb35291c98f8f818b66 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Feb 2018 09:13:21 +0100 Subject: [PATCH 542/705] selftests/x86: Do not rely on "int $0x80" in single_step_syscall.c commit 4105c69703cdeba76f384b901712c9397b04e9c2 upstream. On 64-bit builds, we should not rely on "int $0x80" working (it only does if CONFIG_IA32_EMULATION=y is enabled). To keep the "Set TF and check int80" test running on 64-bit installs with CONFIG_IA32_EMULATION=y enabled, build this test only if we can also build 32-bit binaries (which should be a good approximation for that). Signed-off-by: Dominik Brodowski Cc: Andy Lutomirski Cc: Dmitry Safonov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kselftest@vger.kernel.org Cc: shuah@kernel.org Link: http://lkml.kernel.org/r/20180211111013.16888-5-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/Makefile | 2 ++ tools/testing/selftests/x86/single_step_syscall.c | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 4af37bfe4aea..6eb50152baf0 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -26,11 +26,13 @@ CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) ifeq ($(CAN_BUILD_I386),1) all: all_32 TEST_PROGS += $(BINARIES_32) +EXTRA_CFLAGS += -DCAN_BUILD_32 endif ifeq ($(CAN_BUILD_X86_64),1) all: all_64 TEST_PROGS += $(BINARIES_64) +EXTRA_CFLAGS += -DCAN_BUILD_64 endif all_32: $(BINARIES_32) diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c index a48da95c18fd..ddfdd635de16 100644 --- a/tools/testing/selftests/x86/single_step_syscall.c +++ b/tools/testing/selftests/x86/single_step_syscall.c @@ -119,7 +119,9 @@ static void check_result(void) int main() { +#ifdef CAN_BUILD_32 int tmp; +#endif sethandler(SIGTRAP, sigtrap, 0); @@ -139,12 +141,13 @@ int main() : : "c" (post_nop) : "r11"); check_result(); #endif - +#ifdef CAN_BUILD_32 printf("[RUN]\tSet TF and check int80\n"); set_eflags(get_eflags() | X86_EFLAGS_TF); asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid) : INT80_CLOBBERS); check_result(); +#endif /* * This test is particularly interesting if fast syscalls use From be1ea502e31a47e3afd1c82964f35c78c80d271e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Feb 2018 18:22:40 -0800 Subject: [PATCH 543/705] x86/speculation: Fix up array_index_nospec_mask() asm constraint commit be3233fbfcb8f5acb6e3bcd0895c3ef9e100d470 upstream. Allow the compiler to handle @size as an immediate value or memory directly rather than allocating a register. Reported-by: Linus Torvalds Signed-off-by: Dan Williams Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/151797010204.1289.1510000292250184993.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/barrier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 857590390397..78d1c6a3d221 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -39,7 +39,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, asm ("cmp %1,%2; sbb %0,%0;" :"=r" (mask) - :"r"(size),"r" (index) + :"g"(size),"r" (index) :"cc"); return mask; } From 297f8eafece7c4fcabe60b7fc445d497541487e2 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 5 Feb 2018 14:16:06 +0000 Subject: [PATCH 544/705] nospec: Move array_index_nospec() parameter checking into separate macro commit 8fa80c503b484ddc1abbd10c7cb2ab81f3824a50 upstream. For architectures providing their own implementation of array_index_mask_nospec() in asm/barrier.h, attempting to use WARN_ONCE() to complain about out-of-range parameters using WARN_ON() results in a mess of mutually-dependent include files. Rather than unpick the dependencies, simply have the core code in nospec.h perform the checking for us. Signed-off-by: Will Deacon Acked-by: Thomas Gleixner Cc: Dan Williams Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1517840166-15399-1-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- include/linux/nospec.h | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/include/linux/nospec.h b/include/linux/nospec.h index b99bced39ac2..fbc98e2c8228 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -19,20 +19,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, unsigned long size) { - /* - * Warn developers about inappropriate array_index_nospec() usage. - * - * Even if the CPU speculates past the WARN_ONCE branch, the - * sign bit of @index is taken into account when generating the - * mask. - * - * This warning is compiled out when the compiler can infer that - * @index and @size are less than LONG_MAX. - */ - if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, - "array_index_nospec() limited to range of [0, LONG_MAX]\n")) - return 0; - /* * Always calculate and emit the mask even if the compiler * thinks the mask is not needed. The compiler does not take @@ -43,6 +29,26 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, } #endif +/* + * Warn developers about inappropriate array_index_nospec() usage. + * + * Even if the CPU speculates past the WARN_ONCE branch, the + * sign bit of @index is taken into account when generating the + * mask. + * + * This warning is compiled out when the compiler can infer that + * @index and @size are less than LONG_MAX. + */ +#define array_index_mask_nospec_check(index, size) \ +({ \ + if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, \ + "array_index_nospec() limited to range of [0, LONG_MAX]\n")) \ + _mask = 0; \ + else \ + _mask = array_index_mask_nospec(index, size); \ + _mask; \ +}) + /* * array_index_nospec - sanitize an array index after a bounds check * @@ -61,7 +67,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, ({ \ typeof(index) _i = (index); \ typeof(size) _s = (size); \ - unsigned long _mask = array_index_mask_nospec(_i, _s); \ + unsigned long _mask = array_index_mask_nospec_check(_i, _s); \ \ BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \ BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \ From 90ca269463c55e99e3e91c667a821c6303f207ec Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 13 Feb 2018 14:28:19 +0100 Subject: [PATCH 545/705] x86/speculation: Add dependency commit ea00f301285ea2f07393678cd2b6057878320c9d upstream. Joe Konno reported a compile failure resulting from using an MSR without inclusion of , and while the current code builds fine (by accident) this needs fixing for future patches. Reported-by: Joe Konno Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arjan@linux.intel.com Cc: bp@alien8.de Cc: dan.j.williams@intel.com Cc: dave.hansen@linux.intel.com Cc: dwmw2@infradead.org Cc: dwmw@amazon.co.uk Cc: gregkh@linuxfoundation.org Cc: hpa@zytor.com Cc: jpoimboe@redhat.com Cc: linux-tip-commits@vger.kernel.org Cc: luto@kernel.org Fixes: 20ffa1caecca ("x86/speculation: Add basic IBPB (Indirect Branch Prediction Barrier) support") Link: http://lkml.kernel.org/r/20180213132819.GJ25201@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 300cc159b4a0..76b058533e47 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -6,6 +6,7 @@ #include #include #include +#include #ifdef __ASSEMBLY__ From b0809f54222508397a240e83f52ae165b5f21a73 Mon Sep 17 00:00:00 2001 From: Rui Wang Date: Mon, 18 Dec 2017 16:34:10 +0800 Subject: [PATCH 546/705] selftests/x86/mpx: Fix incorrect bounds with old _sigfault commit 961888b1d76d84efc66a8f5604b06ac12ac2f978 upstream. For distributions with old userspace header files, the _sigfault structure is different. mpx-mini-test fails with the following error: [root@Purley]# mpx-mini-test_64 tabletest XSAVE is supported by HW & OS XSAVE processor supported state mask: 0x2ff XSAVE OS supported state mask: 0x2ff BNDREGS: size: 64 user: 1 supervisor: 0 aligned: 0 BNDCSR: size: 64 user: 1 supervisor: 0 aligned: 0 starting mpx bounds table test ERROR: siginfo bounds do not match shadow bounds for register 0 Fix it by using the correct offset of _lower/_upper in _sigfault. RHEL needs this patch to work. Signed-off-by: Rui Wang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave.hansen@linux.intel.com Fixes: e754aedc26ef ("x86/mpx, selftests: Add MPX self test") Link: http://lkml.kernel.org/r/1513586050-1641-1-git-send-email-rui.y.wang@intel.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/mpx-mini-test.c | 32 +++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c index 616ee9673339..79e1d13d1cda 100644 --- a/tools/testing/selftests/x86/mpx-mini-test.c +++ b/tools/testing/selftests/x86/mpx-mini-test.c @@ -315,11 +315,39 @@ static inline void *__si_bounds_upper(siginfo_t *si) return si->si_upper; } #else + +/* + * This deals with old version of _sigfault in some distros: + * + +old _sigfault: + struct { + void *si_addr; + } _sigfault; + +new _sigfault: + struct { + void __user *_addr; + int _trapno; + short _addr_lsb; + union { + struct { + void __user *_lower; + void __user *_upper; + } _addr_bnd; + __u32 _pkey; + }; + } _sigfault; + * + */ + static inline void **__si_bounds_hack(siginfo_t *si) { void *sigfault = &si->_sifields._sigfault; void *end_sigfault = sigfault + sizeof(si->_sifields._sigfault); - void **__si_lower = end_sigfault; + int *trapno = (int*)end_sigfault; + /* skip _trapno and _addr_lsb */ + void **__si_lower = (void**)(trapno + 2); return __si_lower; } @@ -331,7 +359,7 @@ static inline void *__si_bounds_lower(siginfo_t *si) static inline void *__si_bounds_upper(siginfo_t *si) { - return (*__si_bounds_hack(si)) + sizeof(void *); + return *(__si_bounds_hack(si) + 1); } #endif From 06be007aa436aae3200ca31fc13fb66568e4ac4b Mon Sep 17 00:00:00 2001 From: Jia Zhang Date: Mon, 1 Jan 2018 09:52:10 +0800 Subject: [PATCH 547/705] x86/cpu: Rename cpu_data.x86_mask to cpu_data.x86_stepping commit b399151cb48db30ad1e0e93dd40d68c6d007b637 upstream. x86_mask is a confusing name which is hard to associate with the processor's stepping. Additionally, correct an indent issue in lib/cpu.c. Signed-off-by: Jia Zhang [ Updated it to more recent kernels. ] Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bp@alien8.de Cc: tony.luck@intel.com Link: http://lkml.kernel.org/r/1514771530-70829-1-git-send-email-qianyue.zj@alibaba-inc.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/core.c | 2 +- arch/x86/events/intel/lbr.c | 2 +- arch/x86/events/intel/p6.c | 2 +- arch/x86/include/asm/acpi.h | 2 +- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/amd_nb.c | 2 +- arch/x86/kernel/asm-offsets_32.c | 2 +- arch/x86/kernel/cpu/amd.c | 26 +++++++++++++------------- arch/x86/kernel/cpu/centaur.c | 4 ++-- arch/x86/kernel/cpu/common.c | 8 ++++---- arch/x86/kernel/cpu/cyrix.c | 2 +- arch/x86/kernel/cpu/intel.c | 18 +++++++++--------- arch/x86/kernel/cpu/microcode/intel.c | 4 ++-- arch/x86/kernel/cpu/mtrr/generic.c | 2 +- arch/x86/kernel/cpu/mtrr/main.c | 4 ++-- arch/x86/kernel/cpu/proc.c | 4 ++-- arch/x86/kernel/head_32.S | 4 ++-- arch/x86/kernel/mpparse.c | 2 +- arch/x86/lib/cpu.c | 2 +- drivers/char/hw_random/via-rng.c | 2 +- drivers/cpufreq/acpi-cpufreq.c | 2 +- drivers/cpufreq/longhaul.c | 6 +++--- drivers/cpufreq/p4-clockmod.c | 2 +- drivers/cpufreq/powernow-k7.c | 2 +- drivers/cpufreq/speedstep-centrino.c | 4 ++-- drivers/cpufreq/speedstep-lib.c | 6 +++--- drivers/crypto/padlock-aes.c | 2 +- drivers/edac/amd64_edac.c | 2 +- drivers/edac/mce_amd.c | 2 +- drivers/hwmon/coretemp.c | 6 +++--- drivers/hwmon/hwmon-vid.c | 2 +- drivers/hwmon/k10temp.c | 2 +- drivers/hwmon/k8temp.c | 2 +- drivers/video/fbdev/geode/video_gx.c | 2 +- 34 files changed, 69 insertions(+), 69 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index f0f197f459b5..0bd0c1cc3228 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3363,7 +3363,7 @@ static int intel_snb_pebs_broken(int cpu) break; case INTEL_FAM6_SANDYBRIDGE_X: - switch (cpu_data(cpu).x86_mask) { + switch (cpu_data(cpu).x86_stepping) { case 6: rev = 0x618; break; case 7: rev = 0x70c; break; } diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index f924629836a8..5d103a87e984 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1131,7 +1131,7 @@ void __init intel_pmu_lbr_init_atom(void) * on PMU interrupt */ if (boot_cpu_data.x86_model == 28 - && boot_cpu_data.x86_mask < 10) { + && boot_cpu_data.x86_stepping < 10) { pr_cont("LBR disabled due to erratum"); return; } diff --git a/arch/x86/events/intel/p6.c b/arch/x86/events/intel/p6.c index 1f5c47ab4c65..c5e441baccc7 100644 --- a/arch/x86/events/intel/p6.c +++ b/arch/x86/events/intel/p6.c @@ -233,7 +233,7 @@ static __initconst const struct x86_pmu p6_pmu = { static __init void p6_pmu_rdpmc_quirk(void) { - if (boot_cpu_data.x86_mask < 9) { + if (boot_cpu_data.x86_stepping < 9) { /* * PPro erratum 26; fixed in stepping 9 and above. */ diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 5391b0ae7cc3..d32bab65de70 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -92,7 +92,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) if (boot_cpu_data.x86 == 0x0F && boot_cpu_data.x86_vendor == X86_VENDOR_AMD && boot_cpu_data.x86_model <= 0x05 && - boot_cpu_data.x86_mask < 0x0A) + boot_cpu_data.x86_stepping < 0x0A) return 1; else if (amd_e400_c1e_detected) return 1; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index cb866ae1bc5d..360a1134a748 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -88,7 +88,7 @@ struct cpuinfo_x86 { __u8 x86; /* CPU family */ __u8 x86_vendor; /* CPU vendor */ __u8 x86_model; - __u8 x86_mask; + __u8 x86_stepping; #ifdef CONFIG_X86_32 char wp_works_ok; /* It doesn't on 386's */ diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 4fdf6230d93c..8462e2d4ed94 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -105,7 +105,7 @@ int amd_cache_northbridges(void) if (boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model >= 0x8 && (boot_cpu_data.x86_model > 0x9 || - boot_cpu_data.x86_mask >= 0x1)) + boot_cpu_data.x86_stepping >= 0x1)) amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE; if (boot_cpu_data.x86 == 0x15) diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 880aa093268d..36ebb6de1a03 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -20,7 +20,7 @@ void foo(void) OFFSET(CPUINFO_x86, cpuinfo_x86, x86); OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor); OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model); - OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask); + OFFSET(CPUINFO_x86_stepping, cpuinfo_x86, x86_stepping); OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level); OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability); OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 1b89f0c4251e..c375bc672f82 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -118,7 +118,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) return; } - if (c->x86_model == 6 && c->x86_mask == 1) { + if (c->x86_model == 6 && c->x86_stepping == 1) { const int K6_BUG_LOOP = 1000000; int n; void (*f_vide)(void); @@ -147,7 +147,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) /* K6 with old style WHCR */ if (c->x86_model < 8 || - (c->x86_model == 8 && c->x86_mask < 8)) { + (c->x86_model == 8 && c->x86_stepping < 8)) { /* We can only write allocate on the low 508Mb */ if (mbytes > 508) mbytes = 508; @@ -166,7 +166,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) return; } - if ((c->x86_model == 8 && c->x86_mask > 7) || + if ((c->x86_model == 8 && c->x86_stepping > 7) || c->x86_model == 9 || c->x86_model == 13) { /* The more serious chips .. */ @@ -219,7 +219,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c) * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx * As per AMD technical note 27212 0.2 */ - if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { + if ((c->x86_model == 8 && c->x86_stepping >= 1) || (c->x86_model > 8)) { rdmsr(MSR_K7_CLK_CTL, l, h); if ((l & 0xfff00000) != 0x20000000) { pr_info("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", @@ -239,12 +239,12 @@ static void init_amd_k7(struct cpuinfo_x86 *c) * but they are not certified as MP capable. */ /* Athlon 660/661 is valid. */ - if ((c->x86_model == 6) && ((c->x86_mask == 0) || - (c->x86_mask == 1))) + if ((c->x86_model == 6) && ((c->x86_stepping == 0) || + (c->x86_stepping == 1))) return; /* Duron 670 is valid */ - if ((c->x86_model == 7) && (c->x86_mask == 0)) + if ((c->x86_model == 7) && (c->x86_stepping == 0)) return; /* @@ -254,8 +254,8 @@ static void init_amd_k7(struct cpuinfo_x86 *c) * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for * more. */ - if (((c->x86_model == 6) && (c->x86_mask >= 2)) || - ((c->x86_model == 7) && (c->x86_mask >= 1)) || + if (((c->x86_model == 6) && (c->x86_stepping >= 2)) || + ((c->x86_model == 7) && (c->x86_stepping >= 1)) || (c->x86_model > 7)) if (cpu_has(c, X86_FEATURE_MP)) return; @@ -569,7 +569,7 @@ static void early_init_amd(struct cpuinfo_x86 *c) /* Set MTRR capability flag if appropriate */ if (c->x86 == 5) if (c->x86_model == 13 || c->x86_model == 9 || - (c->x86_model == 8 && c->x86_mask >= 8)) + (c->x86_model == 8 && c->x86_stepping >= 8)) set_cpu_cap(c, X86_FEATURE_K6_MTRR); #endif #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI) @@ -834,11 +834,11 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) /* AMD errata T13 (order #21922) */ if ((c->x86 == 6)) { /* Duron Rev A0 */ - if (c->x86_model == 3 && c->x86_mask == 0) + if (c->x86_model == 3 && c->x86_stepping == 0) size = 64; /* Tbird rev A1/A2 */ if (c->x86_model == 4 && - (c->x86_mask == 0 || c->x86_mask == 1)) + (c->x86_stepping == 0 || c->x86_stepping == 1)) size = 256; } return size; @@ -975,7 +975,7 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) } /* OSVW unavailable or ID unknown, match family-model-stepping range */ - ms = (cpu->x86_model << 4) | cpu->x86_mask; + ms = (cpu->x86_model << 4) | cpu->x86_stepping; while ((range = *erratum++)) if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && (ms >= AMD_MODEL_RANGE_START(range)) && diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 1661d8ec9280..4d2f61f92fed 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -134,7 +134,7 @@ static void init_centaur(struct cpuinfo_x86 *c) clear_cpu_cap(c, X86_FEATURE_TSC); break; case 8: - switch (c->x86_mask) { + switch (c->x86_stepping) { default: name = "2"; break; @@ -209,7 +209,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) * - Note, it seems this may only be in engineering samples. */ if ((c->x86 == 6) && (c->x86_model == 9) && - (c->x86_mask == 1) && (size == 65)) + (c->x86_stepping == 1) && (size == 65)) size -= 1; return size; } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 08e89ed6aa87..96b2c83e42c6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -699,7 +699,7 @@ void cpu_detect(struct cpuinfo_x86 *c) cpuid(0x00000001, &tfms, &misc, &junk, &cap0); c->x86 = x86_family(tfms); c->x86_model = x86_model(tfms); - c->x86_mask = x86_stepping(tfms); + c->x86_stepping = x86_stepping(tfms); if (cap0 & (1<<19)) { c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; @@ -1146,7 +1146,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->loops_per_jiffy = loops_per_jiffy; c->x86_cache_size = -1; c->x86_vendor = X86_VENDOR_UNKNOWN; - c->x86_model = c->x86_mask = 0; /* So far unknown... */ + c->x86_model = c->x86_stepping = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; @@ -1391,8 +1391,8 @@ void print_cpu_info(struct cpuinfo_x86 *c) pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model); - if (c->x86_mask || c->cpuid_level >= 0) - pr_cont(", stepping: 0x%x)\n", c->x86_mask); + if (c->x86_stepping || c->cpuid_level >= 0) + pr_cont(", stepping: 0x%x)\n", c->x86_stepping); else pr_cont(")\n"); diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index bd9dcd6b712d..455d8ada9b9a 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -212,7 +212,7 @@ static void init_cyrix(struct cpuinfo_x86 *c) /* common case step number/rev -- exceptions handled below */ c->x86_model = (dir1 >> 4) + 1; - c->x86_mask = dir1 & 0xf; + c->x86_stepping = dir1 & 0xf; /* Now cook; the original recipe is by Channing Corn, from Cyrix. * We do the same thing for each generation: we work out diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 02cb2e3ddb60..6ed206bd9071 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -105,7 +105,7 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c) for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { if (c->x86_model == spectre_bad_microcodes[i].model && - c->x86_mask == spectre_bad_microcodes[i].stepping) + c->x86_stepping == spectre_bad_microcodes[i].stepping) return (c->microcode <= spectre_bad_microcodes[i].microcode); } return false; @@ -158,7 +158,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) * need the microcode to have already been loaded... so if it is * not, recommend a BIOS update and disable large pages. */ - if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 && + if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_stepping <= 2 && c->microcode < 0x20e) { pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n"); clear_cpu_cap(c, X86_FEATURE_PSE); @@ -174,7 +174,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) /* CPUID workaround for 0F33/0F34 CPU */ if (c->x86 == 0xF && c->x86_model == 0x3 - && (c->x86_mask == 0x3 || c->x86_mask == 0x4)) + && (c->x86_stepping == 0x3 || c->x86_stepping == 0x4)) c->x86_phys_bits = 36; /* @@ -289,7 +289,7 @@ int ppro_with_ram_bug(void) if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 1 && - boot_cpu_data.x86_mask < 8) { + boot_cpu_data.x86_stepping < 8) { pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n"); return 1; } @@ -306,7 +306,7 @@ static void intel_smp_check(struct cpuinfo_x86 *c) * Mask B, Pentium, but not Pentium MMX */ if (c->x86 == 5 && - c->x86_mask >= 1 && c->x86_mask <= 4 && + c->x86_stepping >= 1 && c->x86_stepping <= 4 && c->x86_model <= 3) { /* * Remember we have B step Pentia with bugs @@ -349,7 +349,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until * model 3 mask 3 */ - if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) + if ((c->x86<<8 | c->x86_model<<4 | c->x86_stepping) < 0x633) clear_cpu_cap(c, X86_FEATURE_SEP); /* @@ -367,7 +367,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * P4 Xeon erratum 037 workaround. * Hardware prefetcher may cause stale data to be loaded into the cache. */ - if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { + if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_stepping == 1)) { if (msr_set_bit(MSR_IA32_MISC_ENABLE, MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) { pr_info("CPU: C0 stepping P4 Xeon detected.\n"); @@ -382,7 +382,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * Specification Update"). */ if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 && - (c->x86_mask < 0x6 || c->x86_mask == 0xb)) + (c->x86_stepping < 0x6 || c->x86_stepping == 0xb)) set_cpu_bug(c, X86_BUG_11AP); @@ -601,7 +601,7 @@ static void init_intel(struct cpuinfo_x86 *c) case 6: if (l2 == 128) p = "Celeron (Mendocino)"; - else if (c->x86_mask == 0 || c->x86_mask == 5) + else if (c->x86_stepping == 0 || c->x86_stepping == 5) p = "Celeron-A"; break; diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index f90f17610f62..19af84ed1819 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -1062,7 +1062,7 @@ static bool is_blacklisted(unsigned int cpu) */ if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X && - c->x86_mask == 0x01 && + c->x86_stepping == 0x01 && llc_size_per_core > 2621440 && c->microcode < 0x0b000021) { pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode); @@ -1085,7 +1085,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device, return UCODE_NFOUND; sprintf(name, "intel-ucode/%02x-%02x-%02x", - c->x86, c->x86_model, c->x86_mask); + c->x86, c->x86_model, c->x86_stepping); if (request_firmware_direct(&firmware, name, device)) { pr_debug("data file %s load failed\n", name); diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index fdc55215d44d..e12ee86906c6 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -859,7 +859,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size, */ if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 1 && - boot_cpu_data.x86_mask <= 7) { + boot_cpu_data.x86_stepping <= 7) { if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { pr_warn("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); return -EINVAL; diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 24e87e74990d..fae740c22657 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -699,8 +699,8 @@ void __init mtrr_bp_init(void) if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 == 0xF && boot_cpu_data.x86_model == 0x3 && - (boot_cpu_data.x86_mask == 0x3 || - boot_cpu_data.x86_mask == 0x4)) + (boot_cpu_data.x86_stepping == 0x3 || + boot_cpu_data.x86_stepping == 0x4)) phys_addr = 36; size_or_mask = SIZE_OR_MASK_BITS(phys_addr); diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 18ca99f2798b..9e817f2a5079 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -70,8 +70,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) c->x86_model, c->x86_model_id[0] ? c->x86_model_id : "unknown"); - if (c->x86_mask || c->cpuid_level >= 0) - seq_printf(m, "stepping\t: %d\n", c->x86_mask); + if (c->x86_stepping || c->cpuid_level >= 0) + seq_printf(m, "stepping\t: %d\n", c->x86_stepping); else seq_puts(m, "stepping\t: unknown\n"); if (c->microcode) diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 2dabea46f039..82155d0cc310 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -35,7 +35,7 @@ #define X86 new_cpu_data+CPUINFO_x86 #define X86_VENDOR new_cpu_data+CPUINFO_x86_vendor #define X86_MODEL new_cpu_data+CPUINFO_x86_model -#define X86_MASK new_cpu_data+CPUINFO_x86_mask +#define X86_STEPPING new_cpu_data+CPUINFO_x86_stepping #define X86_HARD_MATH new_cpu_data+CPUINFO_hard_math #define X86_CPUID new_cpu_data+CPUINFO_cpuid_level #define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability @@ -441,7 +441,7 @@ enable_paging: shrb $4,%al movb %al,X86_MODEL andb $0x0f,%cl # mask mask revision - movb %cl,X86_MASK + movb %cl,X86_STEPPING movl %edx,X86_CAPABILITY is486: diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 0f8d20497383..d0fb941330c6 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -406,7 +406,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) processor.apicver = mpc_default_type > 4 ? 0x10 : 0x01; processor.cpuflag = CPU_ENABLED; processor.cpufeature = (boot_cpu_data.x86 << 8) | - (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; + (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_stepping; processor.featureflag = boot_cpu_data.x86_capability[CPUID_1_EDX]; processor.reserved[0] = 0; processor.reserved[1] = 0; diff --git a/arch/x86/lib/cpu.c b/arch/x86/lib/cpu.c index d6f848d1211d..2dd1fe13a37b 100644 --- a/arch/x86/lib/cpu.c +++ b/arch/x86/lib/cpu.c @@ -18,7 +18,7 @@ unsigned int x86_model(unsigned int sig) { unsigned int fam, model; - fam = x86_family(sig); + fam = x86_family(sig); model = (sig >> 4) & 0xf; diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c index 44ce80606944..e278125ddf41 100644 --- a/drivers/char/hw_random/via-rng.c +++ b/drivers/char/hw_random/via-rng.c @@ -166,7 +166,7 @@ static int via_rng_init(struct hwrng *rng) /* Enable secondary noise source on CPUs where it is present. */ /* Nehemiah stepping 8 and higher */ - if ((c->x86_model == 9) && (c->x86_mask > 7)) + if ((c->x86_model == 9) && (c->x86_stepping > 7)) lo |= VIA_NOISESRC2; /* Esther */ diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 297e9128fe9f..1ee3674a99bb 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -648,7 +648,7 @@ static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c) if (c->x86_vendor == X86_VENDOR_INTEL) { if ((c->x86 == 15) && (c->x86_model == 6) && - (c->x86_mask == 8)) { + (c->x86_stepping == 8)) { pr_info("Intel(R) Xeon(R) 7100 Errata AL30, processors may lock up on frequency changes: disabling acpi-cpufreq\n"); return -ENODEV; } diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c index c46a12df40dd..d5e27bc7585a 100644 --- a/drivers/cpufreq/longhaul.c +++ b/drivers/cpufreq/longhaul.c @@ -775,7 +775,7 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy) break; case 7: - switch (c->x86_mask) { + switch (c->x86_stepping) { case 0: longhaul_version = TYPE_LONGHAUL_V1; cpu_model = CPU_SAMUEL2; @@ -787,7 +787,7 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy) break; case 1 ... 15: longhaul_version = TYPE_LONGHAUL_V2; - if (c->x86_mask < 8) { + if (c->x86_stepping < 8) { cpu_model = CPU_SAMUEL2; cpuname = "C3 'Samuel 2' [C5B]"; } else { @@ -814,7 +814,7 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy) numscales = 32; memcpy(mults, nehemiah_mults, sizeof(nehemiah_mults)); memcpy(eblcr, nehemiah_eblcr, sizeof(nehemiah_eblcr)); - switch (c->x86_mask) { + switch (c->x86_stepping) { case 0 ... 1: cpu_model = CPU_NEHEMIAH; cpuname = "C3 'Nehemiah A' [C5XLOE]"; diff --git a/drivers/cpufreq/p4-clockmod.c b/drivers/cpufreq/p4-clockmod.c index fd77812313f3..a25741b1281b 100644 --- a/drivers/cpufreq/p4-clockmod.c +++ b/drivers/cpufreq/p4-clockmod.c @@ -168,7 +168,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) #endif /* Errata workaround */ - cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_mask; + cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_stepping; switch (cpuid) { case 0x0f07: case 0x0f0a: diff --git a/drivers/cpufreq/powernow-k7.c b/drivers/cpufreq/powernow-k7.c index 9f013ed42977..ef276f6a8c46 100644 --- a/drivers/cpufreq/powernow-k7.c +++ b/drivers/cpufreq/powernow-k7.c @@ -131,7 +131,7 @@ static int check_powernow(void) return 0; } - if ((c->x86_model == 6) && (c->x86_mask == 0)) { + if ((c->x86_model == 6) && (c->x86_stepping == 0)) { pr_info("K7 660[A0] core detected, enabling errata workarounds\n"); have_a0 = 1; } diff --git a/drivers/cpufreq/speedstep-centrino.c b/drivers/cpufreq/speedstep-centrino.c index 41bc5397f4bb..4fa5adf16c70 100644 --- a/drivers/cpufreq/speedstep-centrino.c +++ b/drivers/cpufreq/speedstep-centrino.c @@ -37,7 +37,7 @@ struct cpu_id { __u8 x86; /* CPU family */ __u8 x86_model; /* model */ - __u8 x86_mask; /* stepping */ + __u8 x86_stepping; /* stepping */ }; enum { @@ -277,7 +277,7 @@ static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, { if ((c->x86 == x->x86) && (c->x86_model == x->x86_model) && - (c->x86_mask == x->x86_mask)) + (c->x86_stepping == x->x86_stepping)) return 1; return 0; } diff --git a/drivers/cpufreq/speedstep-lib.c b/drivers/cpufreq/speedstep-lib.c index 1b8062182c81..ade98a219cc1 100644 --- a/drivers/cpufreq/speedstep-lib.c +++ b/drivers/cpufreq/speedstep-lib.c @@ -272,9 +272,9 @@ unsigned int speedstep_detect_processor(void) ebx = cpuid_ebx(0x00000001); ebx &= 0x000000FF; - pr_debug("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask); + pr_debug("ebx value is %x, x86_stepping is %x\n", ebx, c->x86_stepping); - switch (c->x86_mask) { + switch (c->x86_stepping) { case 4: /* * B-stepping [M-P4-M] @@ -361,7 +361,7 @@ unsigned int speedstep_detect_processor(void) msr_lo, msr_hi); if ((msr_hi & (1<<18)) && (relaxed_check ? 1 : (msr_hi & (3<<24)))) { - if (c->x86_mask == 0x01) { + if (c->x86_stepping == 0x01) { pr_debug("early PIII version\n"); return SPEEDSTEP_CPU_PIII_C_EARLY; } else diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 441e86b23571..9126627cbf4d 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -531,7 +531,7 @@ static int __init padlock_init(void) printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n"); - if (c->x86 == 6 && c->x86_model == 15 && c->x86_mask == 2) { + if (c->x86 == 6 && c->x86_model == 15 && c->x86_stepping == 2) { ecb_fetch_blocks = MAX_ECB_FETCH_BLOCKS; cbc_fetch_blocks = MAX_CBC_FETCH_BLOCKS; printk(KERN_NOTICE PFX "VIA Nano stepping 2 detected: enabling workaround.\n"); diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 6e197c1c213d..1c5f23224b3c 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2719,7 +2719,7 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) struct amd64_family_type *fam_type = NULL; pvt->ext_model = boot_cpu_data.x86_model >> 4; - pvt->stepping = boot_cpu_data.x86_mask; + pvt->stepping = boot_cpu_data.x86_stepping; pvt->model = boot_cpu_data.x86_model; pvt->fam = boot_cpu_data.x86; diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 7db692ed3dea..ac0c6c83b6d6 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -948,7 +948,7 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s", m->extcpu, - c->x86, c->x86_model, c->x86_mask, + c->x86, c->x86_model, c->x86_stepping, m->bank, ((m->status & MCI_STATUS_OVER) ? "Over" : "-"), ((m->status & MCI_STATUS_UC) ? "UE" : diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 6a27eb2fed17..be1e380fa1c3 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -269,13 +269,13 @@ static int adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) for (i = 0; i < ARRAY_SIZE(tjmax_model_table); i++) { const struct tjmax_model *tm = &tjmax_model_table[i]; if (c->x86_model == tm->model && - (tm->mask == ANY || c->x86_mask == tm->mask)) + (tm->mask == ANY || c->x86_stepping == tm->mask)) return tm->tjmax; } /* Early chips have no MSR for TjMax */ - if (c->x86_model == 0xf && c->x86_mask < 4) + if (c->x86_model == 0xf && c->x86_stepping < 4) usemsr_ee = 0; if (c->x86_model > 0xe && usemsr_ee) { @@ -426,7 +426,7 @@ static int chk_ucode_version(unsigned int cpu) * Readings might stop update when processor visited too deep sleep, * fixed for stepping D0 (6EC). */ - if (c->x86_model == 0xe && c->x86_mask < 0xc && c->microcode < 0x39) { + if (c->x86_model == 0xe && c->x86_stepping < 0xc && c->microcode < 0x39) { pr_err("Errata AE18 not fixed, update BIOS or microcode of the CPU!\n"); return -ENODEV; } diff --git a/drivers/hwmon/hwmon-vid.c b/drivers/hwmon/hwmon-vid.c index ef91b8a67549..84e91286fc4f 100644 --- a/drivers/hwmon/hwmon-vid.c +++ b/drivers/hwmon/hwmon-vid.c @@ -293,7 +293,7 @@ u8 vid_which_vrm(void) if (c->x86 < 6) /* Any CPU with family lower than 6 */ return 0; /* doesn't have VID */ - vrm_ret = find_vrm(c->x86, c->x86_model, c->x86_mask, c->x86_vendor); + vrm_ret = find_vrm(c->x86, c->x86_model, c->x86_stepping, c->x86_vendor); if (vrm_ret == 134) vrm_ret = get_via_model_d_vrm(); if (vrm_ret == 0) diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c index 9cdfde6515ad..0124584a6a6d 100644 --- a/drivers/hwmon/k10temp.c +++ b/drivers/hwmon/k10temp.c @@ -179,7 +179,7 @@ static bool has_erratum_319(struct pci_dev *pdev) * and AM3 formats, but that's the best we can do. */ return boot_cpu_data.x86_model < 4 || - (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask <= 2); + (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_stepping <= 2); } static int k10temp_probe(struct pci_dev *pdev, diff --git a/drivers/hwmon/k8temp.c b/drivers/hwmon/k8temp.c index 734d55d48cc8..486502798fc5 100644 --- a/drivers/hwmon/k8temp.c +++ b/drivers/hwmon/k8temp.c @@ -187,7 +187,7 @@ static int k8temp_probe(struct pci_dev *pdev, return -ENOMEM; model = boot_cpu_data.x86_model; - stepping = boot_cpu_data.x86_mask; + stepping = boot_cpu_data.x86_stepping; /* feature available since SH-C0, exclude older revisions */ if ((model == 4 && stepping == 0) || diff --git a/drivers/video/fbdev/geode/video_gx.c b/drivers/video/fbdev/geode/video_gx.c index 6082f653c68a..67773e8bbb95 100644 --- a/drivers/video/fbdev/geode/video_gx.c +++ b/drivers/video/fbdev/geode/video_gx.c @@ -127,7 +127,7 @@ void gx_set_dclk_frequency(struct fb_info *info) int timeout = 1000; /* Rev. 1 Geode GXs use a 14 MHz reference clock instead of 48 MHz. */ - if (cpu_data(0).x86_mask == 1) { + if (cpu_data(0).x86_stepping == 1) { pll_table = gx_pll_table_14MHz; pll_table_len = ARRAY_SIZE(gx_pll_table_14MHz); } else { From 14eb41363919106e50092ed85acb682f30b898bd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 14 Feb 2018 10:14:17 +0300 Subject: [PATCH 548/705] x86/spectre: Fix an error message commit 9de29eac8d2189424d81c0d840cd0469aa3d41c8 upstream. If i == ARRAY_SIZE(mitigation_options) then we accidentally print garbage from one space beyond the end of the mitigation_options[] array. Signed-off-by: Dan Carpenter Cc: Andy Lutomirski Cc: Borislav Petkov Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: KarimAllah Ahmed Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kernel-janitors@vger.kernel.org Fixes: 9005c6834c0f ("x86/spectre: Simplify spectre_v2 command line parsing") Link: http://lkml.kernel.org/r/20180214071416.GA26677@mwanda Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b83e0c9107bc..baddc9ed3454 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -173,7 +173,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) } if (i >= ARRAY_SIZE(mitigation_options)) { - pr_err("unknown option (%s). Switching to AUTO select\n", mitigation_options[i].option); + pr_err("unknown option (%s). Switching to AUTO select\n", arg); return SPECTRE_V2_CMD_AUTO; } } From aa72eecb4b0d11c3e198707440c5a97a56e7050f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 13 Feb 2018 13:22:08 -0600 Subject: [PATCH 549/705] x86/cpu: Change type of x86_cache_size variable to unsigned int commit 24dbc6000f4b9b0ef5a9daecb161f1907733765a upstream. Currently, x86_cache_size is of type int, which makes no sense as we will never have a valid cache size equal or less than 0. So instead of initializing this variable to -1, it can perfectly be initialized to 0 and use it as an unsigned variable instead. Suggested-by: Thomas Gleixner Signed-off-by: Gustavo A. R. Silva Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Addresses-Coverity-ID: 1464429 Link: http://lkml.kernel.org/r/20180213192208.GA26414@embeddedor.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/cpu/microcode/intel.c | 2 +- arch/x86/kernel/cpu/proc.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 360a1134a748..ec15ca2b32d0 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -113,7 +113,7 @@ struct cpuinfo_x86 { char x86_vendor_id[16]; char x86_model_id[64]; /* in KB - valid for CPUS which support this call: */ - int x86_cache_size; + unsigned int x86_cache_size; int x86_cache_alignment; /* In bytes */ /* Cache QoS architectural values: */ int x86_cache_max_rmid; /* max index */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 96b2c83e42c6..301bbd1f2373 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1144,7 +1144,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) int i; c->loops_per_jiffy = loops_per_jiffy; - c->x86_cache_size = -1; + c->x86_cache_size = 0; c->x86_vendor = X86_VENDOR_UNKNOWN; c->x86_model = c->x86_stepping = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 19af84ed1819..4bcd30c87531 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -1132,7 +1132,7 @@ static struct microcode_ops microcode_intel_ops = { static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c) { - u64 llc_size = c->x86_cache_size * 1024; + u64 llc_size = c->x86_cache_size * 1024ULL; do_div(llc_size, c->x86_max_cores); diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 9e817f2a5079..c4f772d3f35c 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -87,8 +87,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) } /* Cache size */ - if (c->x86_cache_size >= 0) - seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); + if (c->x86_cache_size) + seq_printf(m, "cache size\t: %u KB\n", c->x86_cache_size); show_cpuinfo_core(m, c, cpu); show_cpuinfo_misc(m, c); From 95a440bc9f4dd1a865dfa00c49b664b951a492a3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 15 Feb 2018 16:16:57 +0100 Subject: [PATCH 550/705] x86: fix build warnign with 32-bit PAE I ran into a 4.9 build warning in randconfig testing, starting with the KAISER patches: arch/x86/kernel/ldt.c: In function 'alloc_ldt_struct': arch/x86/include/asm/pgtable_types.h:208:24: error: large integer implicitly truncated to unsigned type [-Werror=overflow] #define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX) ^ arch/x86/kernel/ldt.c:81:6: note: in expansion of macro '__PAGE_KERNEL' __PAGE_KERNEL); ^~~~~~~~~~~~~ I originally ran into this last year when the patches were part of linux-next, and tried to work around it by using the proper 'pteval_t' types consistently, but that caused additional problems. This takes a much simpler approach, and makes the argument type of the dummy helper always 64-bit, which is wide enough for any page table layout and won't hurt since this call is just an empty stub anyway. Fixes: 8f0baadf2bea ("kaiser: merged update") Signed-off-by: Arnd Bergmann Acked-by: Kees Cook Acked-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- include/linux/kaiser.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/kaiser.h b/include/linux/kaiser.h index 58c55b1589d0..b56c19010480 100644 --- a/include/linux/kaiser.h +++ b/include/linux/kaiser.h @@ -32,7 +32,7 @@ static inline void kaiser_init(void) { } static inline int kaiser_add_mapping(unsigned long addr, - unsigned long size, unsigned long flags) + unsigned long size, u64 flags) { return 0; } From 012e79b98f1a5eead31ea465518af35700a64c10 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 2 Apr 2017 17:10:08 -0700 Subject: [PATCH 551/705] vfs: don't do RCU lookup of empty pathnames commit c0eb027e5aef70b71e5a38ee3e264dc0b497f343 upstream. Normal pathname lookup doesn't allow empty pathnames, but using AT_EMPTY_PATH (with name_to_handle_at() or fstatat(), for example) you can trigger an empty pathname lookup. And not only is the RCU lookup in that case entirely unnecessary (because we'll obviously immediately finalize the end result), it is actively wrong. Why? An empth path is a special case that will return the original 'dirfd' dentry - and that dentry may not actually be RCU-free'd, resulting in a potential use-after-free if we were to initialize the path lazily under the RCU read lock and depend on complete_walk() finalizing the dentry. Found by syzkaller and KASAN. Reported-by: Dmitry Vyukov Reported-by: Vegard Nossum Acked-by: Al Viro Signed-off-by: Linus Torvalds Cc: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- fs/namei.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/namei.c b/fs/namei.c index e7d125c23aa6..6cfb45f262aa 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2138,6 +2138,9 @@ static const char *path_init(struct nameidata *nd, unsigned flags) int retval = 0; const char *s = nd->name->name; + if (!*s) + flags &= ~LOOKUP_RCU; + nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT; nd->depth = 0; From c7bbb6d4c41a8fe36b671010defd89d96ae59637 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 21 Dec 2017 22:30:07 +0100 Subject: [PATCH 552/705] ARM: dts: exynos: fix RTC interrupt for exynos5410 commit 5628a8ca14149ba4226e3bdce3a04c3b688435ad upstream. According to the comment added to exynos_dt_pmu_match[] in commit 8b283c025443 ("ARM: exynos4/5: convert pmu wakeup to stacked domains"), the RTC is not able to wake up the system through the PMU on Exynos5410, unlike Exynos5420. However, when the RTC DT node got added, it was a straight copy of the Exynos5420 node, which now causes a warning from dtc. This removes the incorrect interrupt-parent, which should get the interrupt working and avoid the warning. Fixes: e1e146b1b062 ("ARM: dts: exynos: Add RTC and I2C to Exynos5410") Signed-off-by: Arnd Bergmann Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/exynos5410.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/boot/dts/exynos5410.dtsi b/arch/arm/boot/dts/exynos5410.dtsi index 137f48464f8b..bb59fee072c0 100644 --- a/arch/arm/boot/dts/exynos5410.dtsi +++ b/arch/arm/boot/dts/exynos5410.dtsi @@ -274,7 +274,6 @@ &rtc { clocks = <&clock CLK_RTC>; clock-names = "rtc"; - interrupt-parent = <&pmu_system_controller>; status = "disabled"; }; From 53e0f2656e81913d9d78b20f00016e109ead44fd Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 10 Jan 2018 15:40:37 +0100 Subject: [PATCH 553/705] ARM: pxa/tosa-bt: add MODULE_LICENSE tag commit 3343647813fdf0f2409fbf5816ee3e0622168079 upstream. Without this tag, we get a build warning: WARNING: modpost: missing MODULE_LICENSE() in arch/arm/mach-pxa/tosa-bt.o For completeness, I'm also adding author and description fields. Acked-by: Robert Jarzmik Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-pxa/tosa-bt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/mach-pxa/tosa-bt.c b/arch/arm/mach-pxa/tosa-bt.c index 107f37210fb9..83606087edc7 100644 --- a/arch/arm/mach-pxa/tosa-bt.c +++ b/arch/arm/mach-pxa/tosa-bt.c @@ -132,3 +132,7 @@ static struct platform_driver tosa_bt_driver = { }, }; module_platform_driver(tosa_bt_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Dmitry Baryshkov"); +MODULE_DESCRIPTION("Bluetooth built-in chip control"); From 37ed2c8e496727675e71eaf9f5cc44a43cf4596a Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 7 Nov 2017 19:45:01 -0800 Subject: [PATCH 554/705] arm64: dts: msm8916: Add missing #phy-cells commit b0ab681285aa66064f2de5b74191c0cabba381ff upstream. Add a missing #phy-cells to the dsi-phy, to silence dtc warning. Cc: Archit Taneja Fixes: 305410ffd1b2 ("arm64: dts: msm8916: Add display support") Signed-off-by: Bjorn Andersson Reviewed-by: Archit Taneja Signed-off-by: Andy Gross Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/msm8916.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi index ebf62ad76998..08b88f6791be 100644 --- a/arch/arm64/boot/dts/qcom/msm8916.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi @@ -796,6 +796,7 @@ "dsi_phy_regulator"; #clock-cells = <1>; + #phy-cells = <0>; clocks = <&gcc GCC_MDSS_AHB_CLK>; clock-names = "iface_clk"; From 677adefd395e12e957e58c0cbe8017fffde18ede Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 10 Jan 2018 17:10:11 +0100 Subject: [PATCH 555/705] ARM: dts: s5pv210: add interrupt-parent for ohci commit 5c1037196b9ee75897c211972de370ed1336ec8f upstream. The ohci-hcd node has an interrupt number but no interrupt-parent, leading to a warning with current dtc versions: arch/arm/boot/dts/s5pv210-aquila.dtb: Warning (interrupts_property): Missing interrupt-parent for /soc/ohci@ec300000 arch/arm/boot/dts/s5pv210-goni.dtb: Warning (interrupts_property): Missing interrupt-parent for /soc/ohci@ec300000 arch/arm/boot/dts/s5pv210-smdkc110.dtb: Warning (interrupts_property): Missing interrupt-parent for /soc/ohci@ec300000 arch/arm/boot/dts/s5pv210-smdkv210.dtb: Warning (interrupts_property): Missing interrupt-parent for /soc/ohci@ec300000 arch/arm/boot/dts/s5pv210-torbreck.dtb: Warning (interrupts_property): Missing interrupt-parent for /soc/ohci@ec300000 As seen from the related exynos dts files, the ohci and ehci controllers always share one interrupt number, and the number is the same here as well, so setting the same interrupt-parent is the reasonable solution here. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/s5pv210.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/s5pv210.dtsi b/arch/arm/boot/dts/s5pv210.dtsi index a853918be43f..0c10ba517cd0 100644 --- a/arch/arm/boot/dts/s5pv210.dtsi +++ b/arch/arm/boot/dts/s5pv210.dtsi @@ -463,6 +463,7 @@ compatible = "samsung,exynos4210-ohci"; reg = <0xec300000 0x100>; interrupts = <23>; + interrupt-parent = <&vic1>; clocks = <&clocks CLK_USB_HOST>; clock-names = "usbhost"; #address-cells = <1>; From cc98b53d798024d10aa863f4a42bb6ff8bea6615 Mon Sep 17 00:00:00 2001 From: Matthias Brugger Date: Fri, 1 Dec 2017 13:07:08 +0100 Subject: [PATCH 556/705] arm: dts: mt2701: Add reset-cells commit ae72e95b5e4ded145bfc6926ad9457b74e3af41a upstream. The hifsys and ethsys needs the definition of the reset-cells property. Fix this. Reviewed-by: Rob Herring Signed-off-by: Matthias Brugger Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/mt2701.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/mt2701.dtsi b/arch/arm/boot/dts/mt2701.dtsi index 77c6b931dc24..23fe0497f708 100644 --- a/arch/arm/boot/dts/mt2701.dtsi +++ b/arch/arm/boot/dts/mt2701.dtsi @@ -197,12 +197,14 @@ compatible = "mediatek,mt2701-hifsys", "syscon"; reg = <0 0x1a000000 0 0x1000>; #clock-cells = <1>; + #reset-cells = <1>; }; ethsys: syscon@1b000000 { compatible = "mediatek,mt2701-ethsys", "syscon"; reg = <0 0x1b000000 0 0x1000>; #clock-cells = <1>; + #reset-cells = <1>; }; bdpsys: syscon@1c000000 { From 30c68fb6582728657000627af186a99280c50721 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 2 Jan 2018 08:57:17 +0100 Subject: [PATCH 557/705] ARM: dts: Delete bogus reference to the charlcd commit 586b2a4befad88cd87b372a1cea01e58c6811ea9 upstream. The EB MP board probably has a character LCD but the board manual does not really state which IRQ it has assigned to this device. The invalid assignment was a mistake by me during submission of the DTSI where I was looking for the reference, didn't find it and didn't fill it in. Delete this for now: it can probably be fixed but that requires access to the actual board for some trial-and-error experiments. Reported-by: Arnd Bergmann Signed-off-by: Linus Walleij Signed-off-by: Olof Johansson Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/arm-realview-eb-mp.dtsi | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/arm/boot/dts/arm-realview-eb-mp.dtsi b/arch/arm/boot/dts/arm-realview-eb-mp.dtsi index 7b8d90b7aeea..29b636fce23f 100644 --- a/arch/arm/boot/dts/arm-realview-eb-mp.dtsi +++ b/arch/arm/boot/dts/arm-realview-eb-mp.dtsi @@ -150,11 +150,6 @@ interrupts = <0 8 IRQ_TYPE_LEVEL_HIGH>; }; -&charlcd { - interrupt-parent = <&intc>; - interrupts = <0 IRQ_TYPE_LEVEL_HIGH>; -}; - &serial0 { interrupt-parent = <&intc>; interrupts = <0 4 IRQ_TYPE_LEVEL_HIGH>; From 08e4d04569874f79963a40890e790d2b4c33ed30 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 30 Nov 2017 06:08:05 -0500 Subject: [PATCH 558/705] media: r820t: fix r820t_write_reg for KASAN commit 16c3ada89cff9a8c2a0eea34ffa1aa20af3f6008 upstream. With CONFIG_KASAN, we get an overly long stack frame due to inlining the register access functions: drivers/media/tuners/r820t.c: In function 'generic_set_freq.isra.7': drivers/media/tuners/r820t.c:1334:1: error: the frame size of 2880 bytes is larger than 2048 bytes [-Werror=frame-larger-than=] This is caused by a gcc bug that has now been fixed in gcc-8. To work around the problem, we can pass the register data through a local variable that older gcc versions can optimize out as well. Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 Signed-off-by: Arnd Bergmann Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/tuners/r820t.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/media/tuners/r820t.c b/drivers/media/tuners/r820t.c index 08dca40356d2..006dac6e8940 100644 --- a/drivers/media/tuners/r820t.c +++ b/drivers/media/tuners/r820t.c @@ -396,9 +396,11 @@ static int r820t_write(struct r820t_priv *priv, u8 reg, const u8 *val, return 0; } -static int r820t_write_reg(struct r820t_priv *priv, u8 reg, u8 val) +static inline int r820t_write_reg(struct r820t_priv *priv, u8 reg, u8 val) { - return r820t_write(priv, reg, &val, 1); + u8 tmp = val; /* work around GCC PR81715 with asan-stack=1 */ + + return r820t_write(priv, reg, &tmp, 1); } static int r820t_read_cache_reg(struct r820t_priv *priv, int reg) @@ -411,17 +413,18 @@ static int r820t_read_cache_reg(struct r820t_priv *priv, int reg) return -EINVAL; } -static int r820t_write_reg_mask(struct r820t_priv *priv, u8 reg, u8 val, +static inline int r820t_write_reg_mask(struct r820t_priv *priv, u8 reg, u8 val, u8 bit_mask) { + u8 tmp = val; int rc = r820t_read_cache_reg(priv, reg); if (rc < 0) return rc; - val = (rc & ~bit_mask) | (val & bit_mask); + tmp = (rc & ~bit_mask) | (tmp & bit_mask); - return r820t_write(priv, reg, &val, 1); + return r820t_write(priv, reg, &tmp, 1); } static int r820t_read(struct r820t_priv *priv, u8 reg, u8 *val, int len) From 80c1c8322c331586a86e58d3f95026a1265ab396 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 22 Feb 2018 15:43:56 +0100 Subject: [PATCH 559/705] Linux 4.9.83 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d338530540e0..cfae9b823d2b 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 82 +SUBLEVEL = 83 EXTRAVERSION = NAME = Roaring Lionus From bd3ccdc6f922c6b7db4b7075d1b6596ddb986a98 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 23 Jan 2018 17:27:25 +0800 Subject: [PATCH 560/705] vhost: use mutex_lock_nested() in vhost_dev_lock_vqs() commit e9cb4239134c860e5f92c75bf5321bd377bb505b upstream. We used to call mutex_lock() in vhost_dev_lock_vqs() which tries to hold mutexes of all virtqueues. This may confuse lockdep to report a possible deadlock because of trying to hold locks belong to same class. Switch to use mutex_lock_nested() to avoid false positive. Fixes: 6b1e6cc7855b0 ("vhost: new device IOTLB API") Reported-by: syzbot+dbb7c1161485e61b0241@syzkaller.appspotmail.com Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vhost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 64613fbf5cf8..cd38f5add254 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -849,7 +849,7 @@ static void vhost_dev_lock_vqs(struct vhost_dev *d) { int i = 0; for (i = 0; i < d->nvqs; ++i) - mutex_lock(&d->vqs[i]->mutex); + mutex_lock_nested(&d->vqs[i]->mutex, i); } static void vhost_dev_unlock_vqs(struct vhost_dev *d) From 085cbbda4b4cc7dd2ba63806346881c2c2e10107 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 24 Jan 2018 12:35:41 -0800 Subject: [PATCH 561/705] kcm: Check if sk_user_data already set in kcm_attach commit e5571240236c5652f3e079b1d5866716a7ad819c upstream. This is needed to prevent sk_user_data being overwritten. The check is done under the callback lock. This should prevent a socket from being attached twice to a KCM mux. It also prevents a socket from being attached for other use cases of sk_user_data as long as the other cases set sk_user_data under the lock. Followup work is needed to unify all the use cases of sk_user_data to use the same locking. Reported-by: syzbot+114b15f2be420a8886c3@syzkaller.appspotmail.com Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Signed-off-by: Tom Herbert Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/kcm/kcmsock.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 22785dc03051..69bef8ee04b4 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1404,9 +1404,18 @@ static int kcm_attach(struct socket *sock, struct socket *csock, return err; } - sock_hold(csk); - write_lock_bh(&csk->sk_callback_lock); + + /* Check if sk_user_data is aready by KCM or someone else. + * Must be done under lock to prevent race conditions. + */ + if (csk->sk_user_data) { + write_unlock_bh(&csk->sk_callback_lock); + strp_done(&psock->strp); + kmem_cache_free(kcm_psockp, psock); + return -EALREADY; + } + psock->save_data_ready = csk->sk_data_ready; psock->save_write_space = csk->sk_write_space; psock->save_state_change = csk->sk_state_change; @@ -1414,8 +1423,11 @@ static int kcm_attach(struct socket *sock, struct socket *csock, csk->sk_data_ready = psock_data_ready; csk->sk_write_space = psock_write_space; csk->sk_state_change = psock_state_change; + write_unlock_bh(&csk->sk_callback_lock); + sock_hold(csk); + /* Finished initialization, now add the psock to the MUX. */ spin_lock_bh(&mux->lock); head = &mux->psocks; From 2bb174afca6cdb90499bcd2c46e5d966d9976834 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 24 Jan 2018 12:35:40 -0800 Subject: [PATCH 562/705] kcm: Only allow TCP sockets to be attached to a KCM mux commit 581e7226a5d43f629eb6399a121f85f6a15f81be upstream. TCP sockets for IPv4 and IPv6 that are not listeners or in closed stated are allowed to be attached to a KCM mux. Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Reported-by: syzbot+8865eaff7f9acd593945@syzkaller.appspotmail.com Signed-off-by: Tom Herbert Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/kcm/kcmsock.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 69bef8ee04b4..179cd9b1b1f4 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1381,8 +1381,13 @@ static int kcm_attach(struct socket *sock, struct socket *csock, if (!csk) return -EINVAL; - /* We must prevent loops or risk deadlock ! */ - if (csk->sk_family == PF_KCM) + /* Only allow TCP sockets to be attached for now */ + if ((csk->sk_family != AF_INET && csk->sk_family != AF_INET6) || + csk->sk_protocol != IPPROTO_TCP) + return -EOPNOTSUPP; + + /* Don't allow listeners or closed sockets */ + if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE) return -EOPNOTSUPP; psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL); From 758980347e174abf068ea9c8f20b6a891cffa683 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 15 Jan 2018 09:58:27 +0100 Subject: [PATCH 563/705] cfg80211: check dev_set_name() return value commit 59b179b48ce2a6076448a44531242ac2b3f6cef2 upstream. syzbot reported a warning from rfkill_alloc(), and after a while I think that the reason is that it was doing fault injection and the dev_set_name() failed, leaving the name NULL, and we didn't check the return value and got to rfkill_alloc() with a NULL name. Since we really don't want a NULL name, we ought to check the return value. Fixes: fb28ad35906a ("net: struct device - replace bus_id with dev_name(), dev_set_name()") Reported-by: syzbot+1ddfb3357e1d7bb5b5d3@syzkaller.appspotmail.com Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/wireless/core.c b/net/wireless/core.c index 8201e6d7449e..ce16da2905dc 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -421,6 +421,8 @@ struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv, if (rv) goto use_default_name; } else { + int rv; + use_default_name: /* NOTE: This is *probably* safe w/out holding rtnl because of * the restrictions on phy names. Probably this call could @@ -428,7 +430,11 @@ use_default_name: * phyX. But, might should add some locking and check return * value, and use a different name if this one exists? */ - dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx); + rv = dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx); + if (rv < 0) { + kfree(rdev); + return NULL; + } } INIT_LIST_HEAD(&rdev->wiphy.wdev_list); From 5d89917c5a0fbe2f5fe04bb15c6bcd0b1ebf4d24 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 27 Dec 2017 23:25:45 +0100 Subject: [PATCH 564/705] xfrm: skip policies marked as dead while rehashing commit 862591bf4f519d1b8d859af720fafeaebdd0162a upstream. syzkaller triggered following KASAN splat: BUG: KASAN: slab-out-of-bounds in xfrm_hash_rebuild+0xdbe/0xf00 net/xfrm/xfrm_policy.c:618 read of size 2 at addr ffff8801c8e92fe4 by task kworker/1:1/23 [..] Workqueue: events xfrm_hash_rebuild [..] __asan_report_load2_noabort+0x14/0x20 mm/kasan/report.c:428 xfrm_hash_rebuild+0xdbe/0xf00 net/xfrm/xfrm_policy.c:618 process_one_work+0xbbf/0x1b10 kernel/workqueue.c:2112 worker_thread+0x223/0x1990 kernel/workqueue.c:2246 [..] The reproducer triggers: 1016 if (error) { 1017 list_move_tail(&walk->walk.all, &x->all); 1018 goto out; 1019 } in xfrm_policy_walk() via pfkey (it sets tiny rcv space, dump callback returns -ENOBUFS). In this case, *walk is located the pfkey socket struct, so this socket becomes visible in the global policy list. It looks like this is intentional -- phony walker has walk.dead set to 1 and all other places skip such "policies". Ccing original authors of the two commits that seem to expose this issue (first patch missed ->dead check, second patch adds pfkey sockets to policies dumper list). Fixes: 880a6fab8f6ba5b ("xfrm: configure policy hash table thresholds by netlink") Fixes: 12a169e7d8f4b1c ("ipsec: Put dumpers on the dump list") Cc: Herbert Xu Cc: Timo Teras Cc: Christophe Gouault Reported-by: syzbot Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_policy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index f19e6a57e118..0f181c1f559b 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -643,7 +643,8 @@ static void xfrm_hash_rebuild(struct work_struct *work) /* re-insert all policies by order of creation */ list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) { - if (xfrm_policy_id2dir(policy->index) >= XFRM_POLICY_MAX) { + if (policy->walk.dead || + xfrm_policy_id2dir(policy->index) >= XFRM_POLICY_MAX) { /* skip socket policies */ continue; } From 274ee93f0b12ce1865579ea2bbb783642c72c31f Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 18 Dec 2017 20:31:41 +0900 Subject: [PATCH 565/705] mm,vmscan: Make unregister_shrinker() no-op if register_shrinker() failed. commit bb422a738f6566f7439cd347d54e321e4fe92a9f upstream. Syzbot caught an oops at unregister_shrinker() because combination of commit 1d3d4437eae1bb29 ("vmscan: per-node deferred work") and fault injection made register_shrinker() fail and the caller of register_shrinker() did not check for failure. ---------- [ 554.881422] FAULT_INJECTION: forcing a failure. [ 554.881422] name failslab, interval 1, probability 0, space 0, times 0 [ 554.881438] CPU: 1 PID: 13231 Comm: syz-executor1 Not tainted 4.14.0-rc8+ #82 [ 554.881443] Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 [ 554.881445] Call Trace: [ 554.881459] dump_stack+0x194/0x257 [ 554.881474] ? arch_local_irq_restore+0x53/0x53 [ 554.881486] ? find_held_lock+0x35/0x1d0 [ 554.881507] should_fail+0x8c0/0xa40 [ 554.881522] ? fault_create_debugfs_attr+0x1f0/0x1f0 [ 554.881537] ? check_noncircular+0x20/0x20 [ 554.881546] ? find_next_zero_bit+0x2c/0x40 [ 554.881560] ? ida_get_new_above+0x421/0x9d0 [ 554.881577] ? find_held_lock+0x35/0x1d0 [ 554.881594] ? __lock_is_held+0xb6/0x140 [ 554.881628] ? check_same_owner+0x320/0x320 [ 554.881634] ? lock_downgrade+0x990/0x990 [ 554.881649] ? find_held_lock+0x35/0x1d0 [ 554.881672] should_failslab+0xec/0x120 [ 554.881684] __kmalloc+0x63/0x760 [ 554.881692] ? lock_downgrade+0x990/0x990 [ 554.881712] ? register_shrinker+0x10e/0x2d0 [ 554.881721] ? trace_event_raw_event_module_request+0x320/0x320 [ 554.881737] register_shrinker+0x10e/0x2d0 [ 554.881747] ? prepare_kswapd_sleep+0x1f0/0x1f0 [ 554.881755] ? _down_write_nest_lock+0x120/0x120 [ 554.881765] ? memcpy+0x45/0x50 [ 554.881785] sget_userns+0xbcd/0xe20 (...snipped...) [ 554.898693] kasan: CONFIG_KASAN_INLINE enabled [ 554.898724] kasan: GPF could be caused by NULL-ptr deref or user memory access [ 554.898732] general protection fault: 0000 [#1] SMP KASAN [ 554.898737] Dumping ftrace buffer: [ 554.898741] (ftrace buffer empty) [ 554.898743] Modules linked in: [ 554.898752] CPU: 1 PID: 13231 Comm: syz-executor1 Not tainted 4.14.0-rc8+ #82 [ 554.898755] Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 [ 554.898760] task: ffff8801d1dbe5c0 task.stack: ffff8801c9e38000 [ 554.898772] RIP: 0010:__list_del_entry_valid+0x7e/0x150 [ 554.898775] RSP: 0018:ffff8801c9e3f108 EFLAGS: 00010246 [ 554.898780] RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 554.898784] RDX: 0000000000000000 RSI: ffff8801c53c6f98 RDI: ffff8801c53c6fa0 [ 554.898788] RBP: ffff8801c9e3f120 R08: 1ffff100393c7d55 R09: 0000000000000004 [ 554.898791] R10: ffff8801c9e3ef70 R11: 0000000000000000 R12: 0000000000000000 [ 554.898795] R13: dffffc0000000000 R14: 1ffff100393c7e45 R15: ffff8801c53c6f98 [ 554.898800] FS: 0000000000000000(0000) GS:ffff8801db300000(0000) knlGS:0000000000000000 [ 554.898804] CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 [ 554.898807] CR2: 00000000dbc23000 CR3: 00000001c7269000 CR4: 00000000001406e0 [ 554.898813] DR0: 0000000020000000 DR1: 0000000020000000 DR2: 0000000000000000 [ 554.898816] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000600 [ 554.898818] Call Trace: [ 554.898828] unregister_shrinker+0x79/0x300 [ 554.898837] ? perf_trace_mm_vmscan_writepage+0x750/0x750 [ 554.898844] ? down_write+0x87/0x120 [ 554.898851] ? deactivate_super+0x139/0x1b0 [ 554.898857] ? down_read+0x150/0x150 [ 554.898864] ? check_same_owner+0x320/0x320 [ 554.898875] deactivate_locked_super+0x64/0xd0 [ 554.898883] deactivate_super+0x141/0x1b0 ---------- Since allowing register_shrinker() callers to call unregister_shrinker() when register_shrinker() failed can simplify error recovery path, this patch makes unregister_shrinker() no-op when register_shrinker() failed. Also, reset shrinker->nr_deferred in case unregister_shrinker() was by error called twice. Signed-off-by: Tetsuo Handa Signed-off-by: Aliaksei Karaliou Reported-by: syzbot Cc: Glauber Costa Cc: Al Viro Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- mm/vmscan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/vmscan.c b/mm/vmscan.c index f118dc23f662..4365de74bdb0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -295,10 +295,13 @@ EXPORT_SYMBOL(register_shrinker); */ void unregister_shrinker(struct shrinker *shrinker) { + if (!shrinker->nr_deferred) + return; down_write(&shrinker_rwsem); list_del(&shrinker->list); up_write(&shrinker_rwsem); kfree(shrinker->nr_deferred); + shrinker->nr_deferred = NULL; } EXPORT_SYMBOL(unregister_shrinker); From 46b317167d01130c5cc98ad042b2955c58f14597 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 29 Nov 2017 06:53:55 +0100 Subject: [PATCH 566/705] xfrm: Fix stack-out-of-bounds read on socket policy lookup. commit ddc47e4404b58f03e98345398fb12d38fe291512 upstream. When we do tunnel or beet mode, we pass saddr and daddr from the template to xfrm_state_find(), this is ok. On transport mode, we pass the addresses from the flowi, assuming that the IP addresses (and address family) don't change during transformation. This assumption is wrong in the IPv4 mapped IPv6 case, packet is IPv4 and template is IPv6. Fix this by catching address family missmatches of the policy and the flow already before we do the lookup. Reported-by: syzbot Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_policy.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 0f181c1f559b..5e89b7461f99 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1257,9 +1257,15 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, again: pol = rcu_dereference(sk->sk_policy[dir]); if (pol != NULL) { - bool match = xfrm_selector_match(&pol->selector, fl, family); + bool match; int err = 0; + if (pol->family != family) { + pol = NULL; + goto out; + } + + match = xfrm_selector_match(&pol->selector, fl, family); if (match) { if ((sk->sk_mark & pol->mark.m) != pol->mark.v) { pol = NULL; From 85552886b454b27588d4d7a01456ab5e82daaa47 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 27 Nov 2017 11:15:16 -0800 Subject: [PATCH 567/705] xfrm: check id proto in validate_tmpl() commit 6a53b7593233ab9e4f96873ebacc0f653a55c3e1 upstream. syzbot reported a kernel warning in xfrm_state_fini(), which indicates that we have entries left in the list net->xfrm.state_all whose proto is zero. And xfrm_id_proto_match() doesn't consider them as a match with IPSEC_PROTO_ANY in this case. Proto with value 0 is probably not a valid value, at least verify_newsa_info() doesn't consider it valid either. This patch fixes it by checking the proto value in validate_tmpl() and rejecting invalid ones, like what iproute2 does in xfrm_xfrmproto_getbyname(). Reported-by: syzbot Cc: Steffen Klassert Cc: Herbert Xu Signed-off-by: Cong Wang Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_user.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 22934885bd3f..4b52314c46d5 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1406,6 +1406,21 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) default: return -EINVAL; } + + switch (ut[i].id.proto) { + case IPPROTO_AH: + case IPPROTO_ESP: + case IPPROTO_COMP: +#if IS_ENABLED(CONFIG_IPV6) + case IPPROTO_ROUTING: + case IPPROTO_DSTOPTS: +#endif + case IPSEC_PROTO_ANY: + break; + default: + return -EINVAL; + } + } return 0; From 2e6712234606ce4b155723f71a3c9f52128d9266 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 17 Nov 2017 14:11:11 +0800 Subject: [PATCH 568/705] sctp: set frag_point in sctp_setsockopt_maxseg correctly commit ecca8f88da5c4260cc2bccfefd2a24976704c366 upstream. Now in sctp_setsockopt_maxseg user_frag or frag_point can be set with val >= 8 and val <= SCTP_MAX_CHUNK_LEN. But both checks are incorrect. val >= 8 means frag_point can even be less than SCTP_DEFAULT_MINSEGMENT. Then in sctp_datamsg_from_user(), when it's value is greater than cookie echo len and trying to bundle with cookie echo chunk, the first_len will overflow. The worse case is when it's value is equal as cookie echo len, first_len becomes 0, it will go into a dead loop for fragment later on. In Hangbin syzkaller testing env, oom was even triggered due to consecutive memory allocation in that loop. Besides, SCTP_MAX_CHUNK_LEN is the max size of the whole chunk, it should deduct the data header for frag_point or user_frag check. This patch does a proper check with SCTP_DEFAULT_MINSEGMENT subtracting the sctphdr and datahdr, SCTP_MAX_CHUNK_LEN subtracting datahdr when setting frag_point via sockopt. It also improves sctp_setsockopt_maxseg codes. Suggested-by: Marcelo Ricardo Leitner Reported-by: Hangbin Liu Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/sctp/sctp.h | 3 ++- net/sctp/socket.c | 29 +++++++++++++++++++---------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 61d9ce89d10d..579ded2c6ef1 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -433,7 +433,8 @@ static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu) if (asoc->user_frag) frag = min_t(int, frag, asoc->user_frag); - frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN)); + frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN - + sizeof(struct sctp_data_chunk))); return frag; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index c472b8391dde..fd5b9d573b38 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3125,9 +3125,9 @@ static int sctp_setsockopt_mappedv4(struct sock *sk, char __user *optval, unsign */ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned int optlen) { + struct sctp_sock *sp = sctp_sk(sk); struct sctp_assoc_value params; struct sctp_association *asoc; - struct sctp_sock *sp = sctp_sk(sk); int val; if (optlen == sizeof(int)) { @@ -3143,26 +3143,35 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned if (copy_from_user(¶ms, optval, optlen)) return -EFAULT; val = params.assoc_value; - } else + } else { return -EINVAL; + } - if ((val != 0) && ((val < 8) || (val > SCTP_MAX_CHUNK_LEN))) - return -EINVAL; + if (val) { + int min_len, max_len; + + min_len = SCTP_DEFAULT_MINSEGMENT - sp->pf->af->net_header_len; + min_len -= sizeof(struct sctphdr) + + sizeof(struct sctp_data_chunk); + + max_len = SCTP_MAX_CHUNK_LEN - sizeof(struct sctp_data_chunk); + + if (val < min_len || val > max_len) + return -EINVAL; + } asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc && params.assoc_id && sctp_style(sk, UDP)) - return -EINVAL; - if (asoc) { if (val == 0) { - val = asoc->pathmtu; - val -= sp->pf->af->net_header_len; + val = asoc->pathmtu - sp->pf->af->net_header_len; val -= sizeof(struct sctphdr) + - sizeof(struct sctp_data_chunk); + sizeof(struct sctp_data_chunk); } asoc->user_frag = val; asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu); } else { + if (params.assoc_id && sctp_style(sk, UDP)) + return -EINVAL; sp->user_frag = val; } From 7569adcf396a2b17fcc45d4172c0857e60ed32df Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 5 Nov 2017 09:16:09 -0700 Subject: [PATCH 569/705] blktrace: fix unlocked registration of tracepoints commit a6da0024ffc19e0d47712bb5ca4fd083f76b07df upstream. We need to ensure that tracepoints are registered and unregistered with the users of them. The existing atomic count isn't enough for that. Add a lock around the tracepoints, so we serialize access to them. This fixes cases where we have multiple users setting up and tearing down tracepoints, like this: CPU: 0 PID: 2995 Comm: syzkaller857118 Not tainted 4.14.0-rc5-next-20171018+ #36 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 panic+0x1e4/0x41c kernel/panic.c:183 __warn+0x1c4/0x1e0 kernel/panic.c:546 report_bug+0x211/0x2d0 lib/bug.c:183 fixup_bug+0x40/0x90 arch/x86/kernel/traps.c:177 do_trap_no_signal arch/x86/kernel/traps.c:211 [inline] do_trap+0x260/0x390 arch/x86/kernel/traps.c:260 do_error_trap+0x120/0x390 arch/x86/kernel/traps.c:297 do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:310 invalid_op+0x18/0x20 arch/x86/entry/entry_64.S:905 RIP: 0010:tracepoint_add_func kernel/tracepoint.c:210 [inline] RIP: 0010:tracepoint_probe_register_prio+0x397/0x9a0 kernel/tracepoint.c:283 RSP: 0018:ffff8801d1d1f6c0 EFLAGS: 00010293 RAX: ffff8801d22e8540 RBX: 00000000ffffffef RCX: ffffffff81710f07 RDX: 0000000000000000 RSI: ffffffff85b679c0 RDI: ffff8801d5f19818 RBP: ffff8801d1d1f7c8 R08: ffffffff81710c10 R09: 0000000000000004 R10: ffff8801d1d1f6b0 R11: 0000000000000003 R12: ffffffff817597f0 R13: 0000000000000000 R14: 00000000ffffffff R15: ffff8801d1d1f7a0 tracepoint_probe_register+0x2a/0x40 kernel/tracepoint.c:304 register_trace_block_rq_insert include/trace/events/block.h:191 [inline] blk_register_tracepoints+0x1e/0x2f0 kernel/trace/blktrace.c:1043 do_blk_trace_setup+0xa10/0xcf0 kernel/trace/blktrace.c:542 blk_trace_setup+0xbd/0x180 kernel/trace/blktrace.c:564 sg_ioctl+0xc71/0x2d90 drivers/scsi/sg.c:1089 vfs_ioctl fs/ioctl.c:45 [inline] do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:685 SYSC_ioctl fs/ioctl.c:700 [inline] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x444339 RSP: 002b:00007ffe05bb5b18 EFLAGS: 00000206 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00000000006d66c0 RCX: 0000000000444339 RDX: 000000002084cf90 RSI: 00000000c0481273 RDI: 0000000000000009 RBP: 0000000000000082 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000206 R12: ffffffffffffffff R13: 00000000c0481273 R14: 0000000000000000 R15: 0000000000000000 since we can now run these in parallel. Ensure that the exported helpers for doing this are grabbing the queue trace mutex. Reported-by: Steven Rostedt Tested-by: Dmitry Vyukov Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- kernel/trace/blktrace.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index dbafc5df03f3..4e17d55ba127 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -57,7 +57,8 @@ static struct tracer_flags blk_tracer_flags = { }; /* Global reference count of probes */ -static atomic_t blk_probes_ref = ATOMIC_INIT(0); +static DEFINE_MUTEX(blk_probe_mutex); +static int blk_probes_ref; static void blk_register_tracepoints(void); static void blk_unregister_tracepoints(void); @@ -306,11 +307,26 @@ static void blk_trace_free(struct blk_trace *bt) kfree(bt); } +static void get_probe_ref(void) +{ + mutex_lock(&blk_probe_mutex); + if (++blk_probes_ref == 1) + blk_register_tracepoints(); + mutex_unlock(&blk_probe_mutex); +} + +static void put_probe_ref(void) +{ + mutex_lock(&blk_probe_mutex); + if (!--blk_probes_ref) + blk_unregister_tracepoints(); + mutex_unlock(&blk_probe_mutex); +} + static void blk_trace_cleanup(struct blk_trace *bt) { blk_trace_free(bt); - if (atomic_dec_and_test(&blk_probes_ref)) - blk_unregister_tracepoints(); + put_probe_ref(); } int blk_trace_remove(struct request_queue *q) @@ -522,8 +538,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, if (cmpxchg(&q->blk_trace, NULL, bt)) goto err; - if (atomic_inc_return(&blk_probes_ref) == 1) - blk_register_tracepoints(); + get_probe_ref(); return 0; err: @@ -1469,9 +1484,7 @@ static int blk_trace_remove_queue(struct request_queue *q) if (bt == NULL) return -EINVAL; - if (atomic_dec_and_test(&blk_probes_ref)) - blk_unregister_tracepoints(); - + put_probe_ref(); blk_trace_free(bt); return 0; } @@ -1502,8 +1515,7 @@ static int blk_trace_setup_queue(struct request_queue *q, if (cmpxchg(&q->blk_trace, NULL, bt)) goto free_bt; - if (atomic_inc_return(&blk_probes_ref) == 1) - blk_register_tracepoints(); + get_probe_ref(); return 0; free_bt: From eeb1f9bd2480eab16f77aeafacec08c82f71a972 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 31 Oct 2017 11:55:35 +0000 Subject: [PATCH 570/705] drm: Require __GFP_NOFAIL for the legacy drm_modeset_lock_all MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d18d1a5ac811d12f7ebc1129230312b5f2c50cb8 upstream. To acquire all modeset locks requires a ww_ctx to be allocated. As this is the legacy path and the allocation small, to reduce the changes required (and complex untested error handling) to the legacy drivers, we simply assume that the allocation succeeds. At present, it relies on the too-small-to-fail rule, but syzbot found that by injecting a failure here we would hit the WARN. Document that this allocation must succeed with __GFP_NOFAIL. Signed-off-by: Chris Wilson Cc: Daniel Vetter Reported-by: syzbot Signed-off-by: Daniel Vetter Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171031115535.15166-1-chris@chris-wilson.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_modeset_lock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c index 61146f5b4f56..0a6bf815640e 100644 --- a/drivers/gpu/drm/drm_modeset_lock.c +++ b/drivers/gpu/drm/drm_modeset_lock.c @@ -81,7 +81,7 @@ void drm_modeset_lock_all(struct drm_device *dev) struct drm_modeset_acquire_ctx *ctx; int ret; - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL | __GFP_NOFAIL); if (WARN_ON(!ctx)) return; From 5fd4db305f2750418151d2d5553bfaec2956167a Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 9 Feb 2018 17:45:49 +0800 Subject: [PATCH 571/705] ptr_ring: fail early if queue occupies more than KMALLOC_MAX_SIZE commit 6e6e41c3112276288ccaf80c70916779b84bb276 upstream. To avoid slab to warn about exceeded size, fail early if queue occupies more than KMALLOC_MAX_SIZE. Reported-by: syzbot+e4d4f9ddd4295539735d@syzkaller.appspotmail.com Fixes: 2e0ab8ca83c12 ("ptr_ring: array based FIFO for pointers") Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/ptr_ring.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index e38f471a5402..05c6d20c2a7a 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -351,6 +351,8 @@ static inline void *ptr_ring_consume_bh(struct ptr_ring *r) static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp) { + if (size * sizeof(void *) > KMALLOC_MAX_SIZE) + return NULL; return kcalloc(size, sizeof(void *), gfp); } From 5cab144f072bdae16f37a06efb0dad210c7ff7bb Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 4 Jul 2017 17:25:02 +0100 Subject: [PATCH 572/705] Provide a function to create a NUL-terminated string from unterminated data commit f35157417215ec138c920320c746fdb3e04ef1d5 upstream. Provide a function, kmemdup_nul(), that will create a NUL-terminated string from an unterminated character array where the length is known in advance. This is better than kstrndup() in situations where we already know the string length as the strnlen() in kstrndup() is superfluous. Signed-off-by: David Howells Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- include/linux/string.h | 1 + mm/util.c | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/include/linux/string.h b/include/linux/string.h index 26b6f6a66f83..0c88c0a1a72b 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -123,6 +123,7 @@ extern char *kstrdup(const char *s, gfp_t gfp) __malloc; extern const char *kstrdup_const(const char *s, gfp_t gfp); extern char *kstrndup(const char *s, size_t len, gfp_t gfp); extern void *kmemdup(const void *src, size_t len, gfp_t gfp); +extern char *kmemdup_nul(const char *s, size_t len, gfp_t gfp); extern char **argv_split(gfp_t gfp, const char *str, int *argcp); extern void argv_free(char **argv); diff --git a/mm/util.c b/mm/util.c index 1a41553db866..8c755d05d4e6 100644 --- a/mm/util.c +++ b/mm/util.c @@ -80,6 +80,8 @@ EXPORT_SYMBOL(kstrdup_const); * @s: the string to duplicate * @max: read at most @max chars from @s * @gfp: the GFP mask used in the kmalloc() call when allocating memory + * + * Note: Use kmemdup_nul() instead if the size is known exactly. */ char *kstrndup(const char *s, size_t max, gfp_t gfp) { @@ -117,6 +119,28 @@ void *kmemdup(const void *src, size_t len, gfp_t gfp) } EXPORT_SYMBOL(kmemdup); +/** + * kmemdup_nul - Create a NUL-terminated string from unterminated data + * @s: The data to stringify + * @len: The size of the data + * @gfp: the GFP mask used in the kmalloc() call when allocating memory + */ +char *kmemdup_nul(const char *s, size_t len, gfp_t gfp) +{ + char *buf; + + if (!s) + return NULL; + + buf = kmalloc_track_caller(len + 1, gfp); + if (buf) { + memcpy(buf, s, len); + buf[len] = '\0'; + } + return buf; +} +EXPORT_SYMBOL(kmemdup_nul); + /** * memdup_user - duplicate memory region from user space * From fe1cb580e84865f3c6be99b952a626323228e781 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 28 Nov 2017 18:51:12 -0500 Subject: [PATCH 573/705] selinux: ensure the context is NUL terminated in security_context_to_sid_core() commit ef28df55ac27e1e5cd122e19fa311d886d47a756 upstream. The syzbot/syzkaller automated tests found a problem in security_context_to_sid_core() during early boot (before we load the SELinux policy) where we could potentially feed context strings without NUL terminators into the strcmp() function. We already guard against this during normal operation (after the SELinux policy has been loaded) by making a copy of the context strings and explicitly adding a NUL terminator to the end. The patch extends this protection to the early boot case (no loaded policy) by moving the context copy earlier in security_context_to_sid_core(). Reported-by: syzbot Signed-off-by: Paul Moore Reviewed-By: William Roberts Signed-off-by: Greg Kroah-Hartman --- security/selinux/ss/services.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 082b20c78363..a2aeb1c99667 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1400,27 +1400,25 @@ static int security_context_to_sid_core(const char *scontext, u32 scontext_len, if (!scontext_len) return -EINVAL; + /* Copy the string to allow changes and ensure a NUL terminator */ + scontext2 = kmemdup_nul(scontext, scontext_len, gfp_flags); + if (!scontext2) + return -ENOMEM; + if (!ss_initialized) { int i; for (i = 1; i < SECINITSID_NUM; i++) { - if (!strcmp(initial_sid_to_string[i], scontext)) { + if (!strcmp(initial_sid_to_string[i], scontext2)) { *sid = i; - return 0; + goto out; } } *sid = SECINITSID_KERNEL; - return 0; + goto out; } *sid = SECSID_NULL; - /* Copy the string so that we can modify the copy as we parse it. */ - scontext2 = kmalloc(scontext_len + 1, gfp_flags); - if (!scontext2) - return -ENOMEM; - memcpy(scontext2, scontext, scontext_len); - scontext2[scontext_len] = 0; - if (force) { /* Save another copy for storing in uninterpreted form */ rc = -ENOMEM; From 5e6f51aac15ac93605187fd11b9bc75a240a53f8 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 5 Dec 2017 17:17:43 -0500 Subject: [PATCH 574/705] selinux: skip bounded transition processing if the policy isn't loaded commit 4b14752ec4e0d87126e636384cf37c8dd9df157c upstream. We can't do anything reasonable in security_bounded_transition() if we don't have a policy loaded, and in fact we could run into problems with some of the code inside expecting a policy. Fix these problems like we do many others in security/selinux/ss/services.c by checking to see if the policy is loaded (ss_initialized) and returning quickly if it isn't. Reported-by: syzbot Signed-off-by: Paul Moore Acked-by: Stephen Smalley Reviewed-by: James Morris Signed-off-by: Greg Kroah-Hartman --- security/selinux/ss/services.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index a2aeb1c99667..73275a92f2e2 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -854,6 +854,9 @@ int security_bounded_transition(u32 old_sid, u32 new_sid) int index; int rc; + if (!ss_initialized) + return 0; + read_lock(&policy_rwlock); rc = -EINVAL; From eda4a83657b8e289560b40bb2dc33ed3b654142f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 18 Dec 2017 16:40:26 -0800 Subject: [PATCH 575/705] crypto: x86/twofish-3way - Fix %rbp usage commit d8c7fe9f2a486a6e5f0d5229ca43807af5ab22c6 upstream. Using %rbp as a temporary register breaks frame pointer convention and breaks stack traces when unwinding from an interrupt in the crypto code. In twofish-3way, we can't simply replace %rbp with another register because there are none available. Instead, we use the stack to hold the values that %rbp, %r11, and %r12 were holding previously. Each of these values represents the half of the output from the previous Feistel round that is being passed on unchanged to the following round. They are only used once per round, when they are exchanged with %rax, %rbx, and %rcx. As a result, we free up 3 registers (one per block) and can reassign them so that %rbp is not used, and additionally %r14 and %r15 are not used so they do not need to be saved/restored. There may be a small overhead caused by replacing 'xchg REG, REG' with the needed sequence 'mov MEM, REG; mov REG, MEM; mov REG, REG' once per round. But, counterintuitively, when I tested "ctr-twofish-3way" on a Haswell processor, the new version was actually about 2% faster. (Perhaps 'xchg' is not as well optimized as plain moves.) Reported-by: syzbot Signed-off-by: Eric Biggers Reviewed-by: Josh Poimboeuf Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/twofish-x86_64-asm_64-3way.S | 112 ++++++++++--------- 1 file changed, 60 insertions(+), 52 deletions(-) diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S index 1c3b7ceb36d2..e7273a606a07 100644 --- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S +++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S @@ -55,29 +55,31 @@ #define RAB1bl %bl #define RAB2bl %cl +#define CD0 0x0(%rsp) +#define CD1 0x8(%rsp) +#define CD2 0x10(%rsp) + +# used only before/after all rounds #define RCD0 %r8 #define RCD1 %r9 #define RCD2 %r10 -#define RCD0d %r8d -#define RCD1d %r9d -#define RCD2d %r10d +# used only during rounds +#define RX0 %r8 +#define RX1 %r9 +#define RX2 %r10 -#define RX0 %rbp -#define RX1 %r11 -#define RX2 %r12 +#define RX0d %r8d +#define RX1d %r9d +#define RX2d %r10d -#define RX0d %ebp -#define RX1d %r11d -#define RX2d %r12d +#define RY0 %r11 +#define RY1 %r12 +#define RY2 %r13 -#define RY0 %r13 -#define RY1 %r14 -#define RY2 %r15 - -#define RY0d %r13d -#define RY1d %r14d -#define RY2d %r15d +#define RY0d %r11d +#define RY1d %r12d +#define RY2d %r13d #define RT0 %rdx #define RT1 %rsi @@ -85,6 +87,8 @@ #define RT0d %edx #define RT1d %esi +#define RT1bl %sil + #define do16bit_ror(rot, op1, op2, T0, T1, tmp1, tmp2, ab, dst) \ movzbl ab ## bl, tmp2 ## d; \ movzbl ab ## bh, tmp1 ## d; \ @@ -92,6 +96,11 @@ op1##l T0(CTX, tmp2, 4), dst ## d; \ op2##l T1(CTX, tmp1, 4), dst ## d; +#define swap_ab_with_cd(ab, cd, tmp) \ + movq cd, tmp; \ + movq ab, cd; \ + movq tmp, ab; + /* * Combined G1 & G2 function. Reordered with help of rotates to have moves * at begining. @@ -110,15 +119,15 @@ /* G1,2 && G2,2 */ \ do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 0, x ## 0); \ do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 0, y ## 0); \ - xchgq cd ## 0, ab ## 0; \ + swap_ab_with_cd(ab ## 0, cd ## 0, RT0); \ \ do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 1, x ## 1); \ do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 1, y ## 1); \ - xchgq cd ## 1, ab ## 1; \ + swap_ab_with_cd(ab ## 1, cd ## 1, RT0); \ \ do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 2, x ## 2); \ do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 2, y ## 2); \ - xchgq cd ## 2, ab ## 2; + swap_ab_with_cd(ab ## 2, cd ## 2, RT0); #define enc_round_end(ab, x, y, n) \ addl y ## d, x ## d; \ @@ -168,6 +177,16 @@ decrypt_round3(ba, dc, (n*2)+1); \ decrypt_round3(ba, dc, (n*2)); +#define push_cd() \ + pushq RCD2; \ + pushq RCD1; \ + pushq RCD0; + +#define pop_cd() \ + popq RCD0; \ + popq RCD1; \ + popq RCD2; + #define inpack3(in, n, xy, m) \ movq 4*(n)(in), xy ## 0; \ xorq w+4*m(CTX), xy ## 0; \ @@ -223,11 +242,8 @@ ENTRY(__twofish_enc_blk_3way) * %rdx: src, RIO * %rcx: bool, if true: xor output */ - pushq %r15; - pushq %r14; pushq %r13; pushq %r12; - pushq %rbp; pushq %rbx; pushq %rcx; /* bool xor */ @@ -235,40 +251,36 @@ ENTRY(__twofish_enc_blk_3way) inpack_enc3(); - encrypt_cycle3(RAB, RCD, 0); - encrypt_cycle3(RAB, RCD, 1); - encrypt_cycle3(RAB, RCD, 2); - encrypt_cycle3(RAB, RCD, 3); - encrypt_cycle3(RAB, RCD, 4); - encrypt_cycle3(RAB, RCD, 5); - encrypt_cycle3(RAB, RCD, 6); - encrypt_cycle3(RAB, RCD, 7); + push_cd(); + encrypt_cycle3(RAB, CD, 0); + encrypt_cycle3(RAB, CD, 1); + encrypt_cycle3(RAB, CD, 2); + encrypt_cycle3(RAB, CD, 3); + encrypt_cycle3(RAB, CD, 4); + encrypt_cycle3(RAB, CD, 5); + encrypt_cycle3(RAB, CD, 6); + encrypt_cycle3(RAB, CD, 7); + pop_cd(); popq RIO; /* dst */ - popq %rbp; /* bool xor */ + popq RT1; /* bool xor */ - testb %bpl, %bpl; + testb RT1bl, RT1bl; jnz .L__enc_xor3; outunpack_enc3(mov); popq %rbx; - popq %rbp; popq %r12; popq %r13; - popq %r14; - popq %r15; ret; .L__enc_xor3: outunpack_enc3(xor); popq %rbx; - popq %rbp; popq %r12; popq %r13; - popq %r14; - popq %r15; ret; ENDPROC(__twofish_enc_blk_3way) @@ -278,35 +290,31 @@ ENTRY(twofish_dec_blk_3way) * %rsi: dst * %rdx: src, RIO */ - pushq %r15; - pushq %r14; pushq %r13; pushq %r12; - pushq %rbp; pushq %rbx; pushq %rsi; /* dst */ inpack_dec3(); - decrypt_cycle3(RAB, RCD, 7); - decrypt_cycle3(RAB, RCD, 6); - decrypt_cycle3(RAB, RCD, 5); - decrypt_cycle3(RAB, RCD, 4); - decrypt_cycle3(RAB, RCD, 3); - decrypt_cycle3(RAB, RCD, 2); - decrypt_cycle3(RAB, RCD, 1); - decrypt_cycle3(RAB, RCD, 0); + push_cd(); + decrypt_cycle3(RAB, CD, 7); + decrypt_cycle3(RAB, CD, 6); + decrypt_cycle3(RAB, CD, 5); + decrypt_cycle3(RAB, CD, 4); + decrypt_cycle3(RAB, CD, 3); + decrypt_cycle3(RAB, CD, 2); + decrypt_cycle3(RAB, CD, 1); + decrypt_cycle3(RAB, CD, 0); + pop_cd(); popq RIO; /* dst */ outunpack_dec3(); popq %rbx; - popq %rbp; popq %r12; popq %r13; - popq %r14; - popq %r15; ret; ENDPROC(twofish_dec_blk_3way) From 458d2fc92405836fc949c6779170dad18a508f0f Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 5 Jan 2018 11:14:08 -0800 Subject: [PATCH 576/705] staging: android: ion: Add __GFP_NOWARN for system contig heap commit 0c75f10312a35b149b2cebb1832316b35c2337ca upstream. syzbot reported a warning from Ion: WARNING: CPU: 1 PID: 3485 at mm/page_alloc.c:3926 ... __alloc_pages_nodemask+0x9fb/0xd80 mm/page_alloc.c:4252 alloc_pages_current+0xb6/0x1e0 mm/mempolicy.c:2036 alloc_pages include/linux/gfp.h:492 [inline] ion_system_contig_heap_allocate+0x40/0x2c0 drivers/staging/android/ion/ion_system_heap.c:374 ion_buffer_create drivers/staging/android/ion/ion.c:93 [inline] ion_alloc+0x2c1/0x9e0 drivers/staging/android/ion/ion.c:420 ion_ioctl+0x26d/0x380 drivers/staging/android/ion/ion-ioctl.c:84 vfs_ioctl fs/ioctl.c:46 [inline] do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:686 SYSC_ioctl fs/ioctl.c:701 [inline] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:692 This is a warning about attempting to allocate order > MAX_ORDER. This is coming from a userspace Ion allocation request. Since userspace is free to request however much memory it wants (and the kernel is free to deny its allocation), silence the allocation attempt with __GFP_NOWARN in case it fails. Reported-by: syzbot+76e7efc4748495855a4d@syzkaller.appspotmail.com Reported-by: syzbot Signed-off-by: Laura Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ion/ion_system_heap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/android/ion/ion_system_heap.c b/drivers/staging/android/ion/ion_system_heap.c index 7e023d505af8..d270a424ecac 100644 --- a/drivers/staging/android/ion/ion_system_heap.c +++ b/drivers/staging/android/ion/ion_system_heap.c @@ -384,7 +384,7 @@ static int ion_system_contig_heap_allocate(struct ion_heap *heap, if (align > (PAGE_SIZE << order)) return -EINVAL; - page = alloc_pages(low_order_gfp_flags, order); + page = alloc_pages(low_order_gfp_flags | __GFP_NOWARN, order); if (!page) return -ENOMEM; From 3ee287d35b25076b626898284d610465145aa43b Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 5 Jan 2018 11:14:09 -0800 Subject: [PATCH 577/705] staging: android: ion: Switch from WARN to pr_warn commit e4e179a844f52e907e550f887d0a2171f1508af1 upstream. Syzbot reported a warning with Ion: WARNING: CPU: 0 PID: 3502 at drivers/staging/android/ion/ion-ioctl.c:73 ion_ioctl+0x2db/0x380 drivers/staging/android/ion/ion-ioctl.c:73 Kernel panic - not syncing: panic_on_warn set ... This is a warning that validation of the ioctl fields failed. This was deliberately added as a warning to make it very obvious to developers that something needed to be fixed. In reality, this is overkill and disturbs fuzzing. Switch to pr_warn for a message instead. Reported-by: syzbot+fa2d5f63ee5904a0115a@syzkaller.appspotmail.com Reported-by: syzbot Signed-off-by: Laura Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ion/ion-ioctl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/staging/android/ion/ion-ioctl.c b/drivers/staging/android/ion/ion-ioctl.c index 7e7431d8d49f..2b700e8455c6 100644 --- a/drivers/staging/android/ion/ion-ioctl.c +++ b/drivers/staging/android/ion/ion-ioctl.c @@ -83,8 +83,10 @@ long ion_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return -EFAULT; ret = validate_ioctl_arg(cmd, &data); - if (WARN_ON_ONCE(ret)) + if (ret) { + pr_warn_once("%s: ioctl validate failed\n", __func__); return ret; + } if (!(dir & _IOC_WRITE)) memset(&data, 0, sizeof(data)); From 7abb5e9dd5683d693376635ff05ec729216fd726 Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Sun, 14 Jan 2018 17:00:48 -0500 Subject: [PATCH 578/705] blk_rq_map_user_iov: fix error override commit 69e0927b3774563c19b5fb32e91d75edc147fb62 upstream. During stress tests by syzkaller on the sg driver the block layer infrequently returns EINVAL. Closer inspection shows the block layer was trying to return ENOMEM (which is much more understandable) but for some reason overroad that useful error. Patch below does not show this (unchanged) line: ret =__blk_rq_map_user_iov(rq, map_data, &i, gfp_mask, copy); That 'ret' was being overridden when that function failed. Signed-off-by: Douglas Gilbert Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-map.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-map.c b/block/blk-map.c index 27fd8d92892d..a8b4f526d8bb 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -116,7 +116,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, unsigned long align = q->dma_pad_mask | queue_dma_alignment(q); struct bio *bio = NULL; struct iov_iter i; - int ret; + int ret = -EINVAL; if (!iter_is_iovec(iter)) goto fail; @@ -145,7 +145,7 @@ unmap_rq: __blk_rq_unmap_user(bio); fail: rq->bio = NULL; - return -EINVAL; + return ret; } EXPORT_SYMBOL(blk_rq_map_user_iov); From 9748fd5ba546bb93d403208d2fd40dd5fc557b6b Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 13 Dec 2017 10:46:40 +0100 Subject: [PATCH 579/705] KVM: x86: fix escape of guest dr6 to the host MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit efdab992813fb2ed825745625b83c05032e9cda2 upstream. syzkaller reported: WARNING: CPU: 0 PID: 12927 at arch/x86/kernel/traps.c:780 do_debug+0x222/0x250 CPU: 0 PID: 12927 Comm: syz-executor Tainted: G OE 4.15.0-rc2+ #16 RIP: 0010:do_debug+0x222/0x250 Call Trace: <#DB> debug+0x3e/0x70 RIP: 0010:copy_user_enhanced_fast_string+0x10/0x20 _copy_from_user+0x5b/0x90 SyS_timer_create+0x33/0x80 entry_SYSCALL_64_fastpath+0x23/0x9a The testcase sets a watchpoint (with perf_event_open) on a buffer that is passed to timer_create() as the struct sigevent argument. In timer_create(), copy_from_user()'s rep movsb triggers the BP. The testcase also sets the debug registers for the guest. However, KVM only restores host debug registers when the host has active watchpoints, which triggers a race condition when running the testcase with multiple threads. The guest's DR6.BS bit can escape to the host before another thread invokes timer_create(), and do_debug() complains. The fix is to respect do_debug()'s dr6 invariant when leaving KVM. Reported-by: Dmitry Vyukov Cc: Paolo Bonzini Cc: Radim Krčmář Cc: David Hildenbrand Cc: Dmitry Vyukov Reviewed-by: David Hildenbrand Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 75f756eac979..8a40165b1b1e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2847,6 +2847,12 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) kvm_x86_ops->vcpu_put(vcpu); kvm_put_guest_fpu(vcpu); vcpu->arch.last_host_tsc = rdtsc(); + /* + * If userspace has set any breakpoints or watchpoints, dr6 is restored + * on every vmexit, but if not, we might have a stale dr6 from the + * guest. do_debug expects dr6 to be cleared after it runs, do the same. + */ + set_debugreg(0, 6); } static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, From c33f9272eefe349fed598999943e626fcef8a708 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 6 Feb 2018 15:40:28 -0800 Subject: [PATCH 580/705] kcov: detect double association with a single task commit a77660d231f8b3d84fd23ed482e0964f7aa546d6 upstream. Currently KCOV_ENABLE does not check if the current task is already associated with another kcov descriptor. As the result it is possible to associate a single task with more than one kcov descriptor, which later leads to a memory leak of the old descriptor. This relation is really meant to be one-to-one (task has only one back link). Extend validation to detect such misuse. Link: http://lkml.kernel.org/r/20180122082520.15716-1-dvyukov@google.com Fixes: 5c9a8750a640 ("kernel: add kcov code coverage") Signed-off-by: Dmitry Vyukov Reported-by: Shankara Pailoor Cc: Dmitry Vyukov Cc: syzbot Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/kcov.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index 3cbb0c879705..3883df58aa12 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -220,9 +220,9 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, if (unused != 0 || kcov->mode == KCOV_MODE_DISABLED || kcov->area == NULL) return -EINVAL; - if (kcov->t != NULL) - return -EBUSY; t = current; + if (kcov->t != NULL || t->kcov != NULL) + return -EBUSY; /* Cache in task struct for performance. */ t->kcov_size = kcov->size; t->kcov_area = kcov->area; From 1099c708d7b7bcb701ac9f892d8f26811842abdd Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Thu, 28 Dec 2017 09:48:54 +0100 Subject: [PATCH 581/705] netfilter: x_tables: fix int overflow in xt_alloc_table_info() commit 889c604fd0b5f6d3b8694ade229ee44124de1127 upstream. syzkaller triggered OOM kills by passing ipt_replace.size = -1 to IPT_SO_SET_REPLACE. The root cause is that SMP_ALIGN() in xt_alloc_table_info() causes int overflow and the size check passes when it should not. SMP_ALIGN() is no longer needed leftover. Remove SMP_ALIGN() call in xt_alloc_table_info(). Reported-by: syzbot+4396883fa8c4f64e0175@syzkaller.appspotmail.com Signed-off-by: Dmitry Vyukov Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/x_tables.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index fc4977456c30..3de763aa143d 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -39,8 +39,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte "); MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module"); -#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) - struct compat_delta { unsigned int offset; /* offset in kernel */ int delta; /* delta in 32bit user land */ @@ -952,7 +950,7 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size) return NULL; /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */ - if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages) + if ((size >> PAGE_SHIFT) + 2 > totalram_pages) return NULL; if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) From b39f3f381bf3ba387d62655539a8979085cb6ff6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 24 Jan 2018 17:16:09 -0800 Subject: [PATCH 582/705] netfilter: x_tables: avoid out-of-bounds reads in xt_request_find_{match|target} commit da17c73b6eb74aad3c3c0654394635675b623b3e upstream. It looks like syzbot found its way into netfilter territory. Issue here is that @name comes from user space and might not be null terminated. Out-of-bound reads happen, KASAN is not happy. v2 added similar fix for xt_request_find_target(), as Florian advised. Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/x_tables.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 3de763aa143d..e47ade305a46 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -207,6 +207,9 @@ xt_request_find_match(uint8_t nfproto, const char *name, uint8_t revision) { struct xt_match *match; + if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN) + return ERR_PTR(-EINVAL); + match = xt_find_match(nfproto, name, revision); if (IS_ERR(match)) { request_module("%st_%s", xt_prefix[nfproto], name); @@ -249,6 +252,9 @@ struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision) { struct xt_target *target; + if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN) + return ERR_PTR(-EINVAL); + target = xt_find_target(af, name, revision); if (IS_ERR(target)) { request_module("%st_%s", xt_prefix[af], name); From ab2b0f7b23447d138be6c4147ec5d2596a94c5ce Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 30 Jan 2018 15:21:34 +0100 Subject: [PATCH 583/705] netfilter: ipt_CLUSTERIP: fix out-of-bounds accesses in clusterip_tg_check() commit 1a38956cce5eabd7b74f94bab70265e4df83165e upstream. Commit 136e92bbec0a switched local_nodes from an array to a bitmask but did not add proper bounds checks. As the result clusterip_config_init_nodelist() can both over-read ipt_clusterip_tgt_info.local_nodes and over-write clusterip_config.local_nodes. Add bounds checks for both. Fixes: 136e92bbec0a ("[NETFILTER] CLUSTERIP: use a bitmap to store node responsibility data") Signed-off-by: Dmitry Vyukov Reported-by: syzbot Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 4a9e6db9df8d..16599bae11dd 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -365,7 +365,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; const struct ipt_entry *e = par->entryinfo; struct clusterip_config *config; - int ret; + int ret, i; if (par->nft_compat) { pr_err("cannot use CLUSTERIP target from nftables compat\n"); @@ -384,8 +384,18 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) pr_info("Please specify destination IP\n"); return -EINVAL; } - - /* FIXME: further sanity checks */ + if (cipinfo->num_local_nodes > ARRAY_SIZE(cipinfo->local_nodes)) { + pr_info("bad num_local_nodes %u\n", cipinfo->num_local_nodes); + return -EINVAL; + } + for (i = 0; i < cipinfo->num_local_nodes; i++) { + if (cipinfo->local_nodes[i] - 1 >= + sizeof(config->local_nodes) * 8) { + pr_info("bad local_nodes[%d] %u\n", + i, cipinfo->local_nodes[i]); + return -EINVAL; + } + } config = clusterip_config_find_get(par->net, e->ip.dst.s_addr, 1); if (!config) { From 4ec264d8128958e66d048f45fd1c4c28cfedb119 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 30 Jan 2018 19:01:40 +0100 Subject: [PATCH 584/705] netfilter: on sockopt() acquire sock lock only in the required scope commit 3f34cfae1238848fd53f25e5c8fd59da57901f4b upstream. Syzbot reported several deadlocks in the netfilter area caused by rtnl lock and socket lock being acquired with a different order on different code paths, leading to backtraces like the following one: ====================================================== WARNING: possible circular locking dependency detected 4.15.0-rc9+ #212 Not tainted ------------------------------------------------------ syzkaller041579/3682 is trying to acquire lock: (sk_lock-AF_INET6){+.+.}, at: [<000000008775e4dd>] lock_sock include/net/sock.h:1463 [inline] (sk_lock-AF_INET6){+.+.}, at: [<000000008775e4dd>] do_ipv6_setsockopt.isra.8+0x3c5/0x39d0 net/ipv6/ipv6_sockglue.c:167 but task is already holding lock: (rtnl_mutex){+.+.}, at: [<000000004342eaa9>] rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (rtnl_mutex){+.+.}: __mutex_lock_common kernel/locking/mutex.c:756 [inline] __mutex_lock+0x16f/0x1a80 kernel/locking/mutex.c:893 mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908 rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 register_netdevice_notifier+0xad/0x860 net/core/dev.c:1607 tee_tg_check+0x1a0/0x280 net/netfilter/xt_TEE.c:106 xt_check_target+0x22c/0x7d0 net/netfilter/x_tables.c:845 check_target net/ipv6/netfilter/ip6_tables.c:538 [inline] find_check_entry.isra.7+0x935/0xcf0 net/ipv6/netfilter/ip6_tables.c:580 translate_table+0xf52/0x1690 net/ipv6/netfilter/ip6_tables.c:749 do_replace net/ipv6/netfilter/ip6_tables.c:1165 [inline] do_ip6t_set_ctl+0x370/0x5f0 net/ipv6/netfilter/ip6_tables.c:1691 nf_sockopt net/netfilter/nf_sockopt.c:106 [inline] nf_setsockopt+0x67/0xc0 net/netfilter/nf_sockopt.c:115 ipv6_setsockopt+0x115/0x150 net/ipv6/ipv6_sockglue.c:928 udpv6_setsockopt+0x45/0x80 net/ipv6/udp.c:1422 sock_common_setsockopt+0x95/0xd0 net/core/sock.c:2978 SYSC_setsockopt net/socket.c:1849 [inline] SyS_setsockopt+0x189/0x360 net/socket.c:1828 entry_SYSCALL_64_fastpath+0x29/0xa0 -> #0 (sk_lock-AF_INET6){+.+.}: lock_acquire+0x1d5/0x580 kernel/locking/lockdep.c:3914 lock_sock_nested+0xc2/0x110 net/core/sock.c:2780 lock_sock include/net/sock.h:1463 [inline] do_ipv6_setsockopt.isra.8+0x3c5/0x39d0 net/ipv6/ipv6_sockglue.c:167 ipv6_setsockopt+0xd7/0x150 net/ipv6/ipv6_sockglue.c:922 udpv6_setsockopt+0x45/0x80 net/ipv6/udp.c:1422 sock_common_setsockopt+0x95/0xd0 net/core/sock.c:2978 SYSC_setsockopt net/socket.c:1849 [inline] SyS_setsockopt+0x189/0x360 net/socket.c:1828 entry_SYSCALL_64_fastpath+0x29/0xa0 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(rtnl_mutex); lock(sk_lock-AF_INET6); lock(rtnl_mutex); lock(sk_lock-AF_INET6); *** DEADLOCK *** 1 lock held by syzkaller041579/3682: #0: (rtnl_mutex){+.+.}, at: [<000000004342eaa9>] rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 The problem, as Florian noted, is that nf_setsockopt() is always called with the socket held, even if the lock itself is required only for very tight scopes and only for some operation. This patch addresses the issues moving the lock_sock() call only where really needed, namely in ipv*_getorigdst(), so that nf_setsockopt() does not need anymore to acquire both locks. Fixes: 22265a5c3c10 ("netfilter: xt_TEE: resolve oif using netdevice notifiers") Reported-by: syzbot+a4c2dc980ac1af699b36@syzkaller.appspotmail.com Suggested-by: Florian Westphal Signed-off-by: Paolo Abeni Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_sockglue.c | 14 ++++---------- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 6 +++++- net/ipv6/ipv6_sockglue.c | 17 +++++------------ net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 18 ++++++++++++------ 4 files changed, 26 insertions(+), 29 deletions(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 551dd393ceec..bf62fa487262 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1243,11 +1243,8 @@ int ip_setsockopt(struct sock *sk, int level, if (err == -ENOPROTOOPT && optname != IP_HDRINCL && optname != IP_IPSEC_POLICY && optname != IP_XFRM_POLICY && - !ip_mroute_opt(optname)) { - lock_sock(sk); + !ip_mroute_opt(optname)) err = nf_setsockopt(sk, PF_INET, optname, optval, optlen); - release_sock(sk); - } #endif return err; } @@ -1272,12 +1269,9 @@ int compat_ip_setsockopt(struct sock *sk, int level, int optname, if (err == -ENOPROTOOPT && optname != IP_HDRINCL && optname != IP_IPSEC_POLICY && optname != IP_XFRM_POLICY && - !ip_mroute_opt(optname)) { - lock_sock(sk); - err = compat_nf_setsockopt(sk, PF_INET, optname, - optval, optlen); - release_sock(sk); - } + !ip_mroute_opt(optname)) + err = compat_nf_setsockopt(sk, PF_INET, optname, optval, + optlen); #endif return err; } diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 0c9ded247ebb..6dc8eab0fabc 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -218,15 +218,19 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) struct nf_conntrack_tuple tuple; memset(&tuple, 0, sizeof(tuple)); + + lock_sock(sk); tuple.src.u3.ip = inet->inet_rcv_saddr; tuple.src.u.tcp.port = inet->inet_sport; tuple.dst.u3.ip = inet->inet_daddr; tuple.dst.u.tcp.port = inet->inet_dport; tuple.src.l3num = PF_INET; tuple.dst.protonum = sk->sk_protocol; + release_sock(sk); /* We only do TCP and SCTP at the moment: is there a better way? */ - if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP) { + if (tuple.dst.protonum != IPPROTO_TCP && + tuple.dst.protonum != IPPROTO_SCTP) { pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n"); return -ENOPROTOOPT; } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index bcea985dd76b..493a32f6a5f2 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -907,12 +907,8 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY && - optname != IPV6_XFRM_POLICY) { - lock_sock(sk); - err = nf_setsockopt(sk, PF_INET6, optname, optval, - optlen); - release_sock(sk); - } + optname != IPV6_XFRM_POLICY) + err = nf_setsockopt(sk, PF_INET6, optname, optval, optlen); #endif return err; } @@ -942,12 +938,9 @@ int compat_ipv6_setsockopt(struct sock *sk, int level, int optname, #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY && - optname != IPV6_XFRM_POLICY) { - lock_sock(sk); - err = compat_nf_setsockopt(sk, PF_INET6, optname, - optval, optlen); - release_sock(sk); - } + optname != IPV6_XFRM_POLICY) + err = compat_nf_setsockopt(sk, PF_INET6, optname, optval, + optlen); #endif return err; } diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 963ee3848675..af80e97f59b5 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -226,20 +226,27 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { static int ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len) { - const struct inet_sock *inet = inet_sk(sk); + struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 }; const struct ipv6_pinfo *inet6 = inet6_sk(sk); + const struct inet_sock *inet = inet_sk(sk); const struct nf_conntrack_tuple_hash *h; struct sockaddr_in6 sin6; - struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 }; struct nf_conn *ct; + __be32 flow_label; + int bound_dev_if; + lock_sock(sk); tuple.src.u3.in6 = sk->sk_v6_rcv_saddr; tuple.src.u.tcp.port = inet->inet_sport; tuple.dst.u3.in6 = sk->sk_v6_daddr; tuple.dst.u.tcp.port = inet->inet_dport; tuple.dst.protonum = sk->sk_protocol; + bound_dev_if = sk->sk_bound_dev_if; + flow_label = inet6->flow_label; + release_sock(sk); - if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP) + if (tuple.dst.protonum != IPPROTO_TCP && + tuple.dst.protonum != IPPROTO_SCTP) return -ENOPROTOOPT; if (*len < 0 || (unsigned int) *len < sizeof(sin6)) @@ -257,14 +264,13 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len) sin6.sin6_family = AF_INET6; sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port; - sin6.sin6_flowinfo = inet6->flow_label & IPV6_FLOWINFO_MASK; + sin6.sin6_flowinfo = flow_label & IPV6_FLOWINFO_MASK; memcpy(&sin6.sin6_addr, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6, sizeof(sin6.sin6_addr)); nf_ct_put(ct); - sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, - sk->sk_bound_dev_if); + sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, bound_dev_if); return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0; } From c6f3be756b784b69253a1d4bcd72c2e891f87a74 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 31 Jan 2018 15:02:47 -0800 Subject: [PATCH 585/705] netfilter: xt_cgroup: initialize info->priv in cgroup_mt_check_v1() commit ba7cd5d95f25cc6005f687dabdb4e7a6063adda9 upstream. xt_cgroup_info_v1->priv is an internal pointer only used for kernel, we should not trust what user-space provides. Reported-by: Fixes: c38c4597e4bf ("netfilter: implement xt_cgroup cgroup2 path match") Cc: Pablo Neira Ayuso Signed-off-by: Cong Wang Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/xt_cgroup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index a086a914865f..0eddbd5328af 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -52,6 +52,7 @@ static int cgroup_mt_check_v1(const struct xt_mtchk_param *par) return -EINVAL; } + info->priv = NULL; if (info->has_path) { cgrp = cgroup_get_from_path(info->path); if (IS_ERR(cgrp)) { From 8d5c422fc709def69d574b03052c73bb6442a638 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 5 Feb 2018 14:41:45 -0800 Subject: [PATCH 586/705] netfilter: xt_RATEEST: acquire xt_rateest_mutex for hash insert commit 7dc68e98757a8eccf8ca7a53a29b896f1eef1f76 upstream. rateest_hash is supposed to be protected by xt_rateest_mutex, and, as suggested by Eric, lookup and insert should be atomic, so we should acquire the xt_rateest_mutex once for both. So introduce a non-locking helper for internal use and keep the locking one for external. Reported-by: Fixes: 5859034d7eb8 ("[NETFILTER]: x_tables: add RATEEST target") Signed-off-by: Cong Wang Reviewed-by: Florian Westphal Reviewed-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/xt_RATEEST.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index dbd6c4a12b97..92cfae66743b 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -39,23 +39,31 @@ static void xt_rateest_hash_insert(struct xt_rateest *est) hlist_add_head(&est->list, &rateest_hash[h]); } -struct xt_rateest *xt_rateest_lookup(const char *name) +static struct xt_rateest *__xt_rateest_lookup(const char *name) { struct xt_rateest *est; unsigned int h; h = xt_rateest_hash(name); - mutex_lock(&xt_rateest_mutex); hlist_for_each_entry(est, &rateest_hash[h], list) { if (strcmp(est->name, name) == 0) { est->refcnt++; - mutex_unlock(&xt_rateest_mutex); return est; } } - mutex_unlock(&xt_rateest_mutex); + return NULL; } + +struct xt_rateest *xt_rateest_lookup(const char *name) +{ + struct xt_rateest *est; + + mutex_lock(&xt_rateest_mutex); + est = __xt_rateest_lookup(name); + mutex_unlock(&xt_rateest_mutex); + return est; +} EXPORT_SYMBOL_GPL(xt_rateest_lookup); void xt_rateest_put(struct xt_rateest *est) @@ -100,8 +108,10 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) net_get_random_once(&jhash_rnd, sizeof(jhash_rnd)); - est = xt_rateest_lookup(info->name); + mutex_lock(&xt_rateest_mutex); + est = __xt_rateest_lookup(info->name); if (est) { + mutex_unlock(&xt_rateest_mutex); /* * If estimator parameters are specified, they must match the * existing estimator. @@ -139,11 +149,13 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) info->est = est; xt_rateest_hash_insert(est); + mutex_unlock(&xt_rateest_mutex); return 0; err2: kfree(est); err1: + mutex_unlock(&xt_rateest_mutex); return ret; } From 4dc0159458d607c8ccf36ba91a262d7bba7d4bbc Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Thu, 30 Nov 2017 11:11:29 -0800 Subject: [PATCH 587/705] rds: tcp: atomically purge entries from rds_tcp_conn_list during netns delete commit f10b4cff98c6977668434fbf5dd58695eeca2897 upstream. The rds_tcp_kill_sock() function parses the rds_tcp_conn_list to find the rds_connection entries marked for deletion as part of the netns deletion under the protection of the rds_tcp_conn_lock. Since the rds_tcp_conn_list tracks rds_tcp_connections (which have a 1:1 mapping with rds_conn_path), multiple tc entries in the rds_tcp_conn_list will map to a single rds_connection, and will be deleted as part of the rds_conn_destroy() operation that is done outside the rds_tcp_conn_lock. The rds_tcp_conn_list traversal done under the protection of rds_tcp_conn_lock should not leave any doomed tc entries in the list after the rds_tcp_conn_lock is released, else another concurrently executiong netns delete (for a differnt netns) thread may trip on these entries. Reported-by: syzbot Signed-off-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rds/tcp.c | 9 +++++++-- net/rds/tcp.h | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 78f976d32018..d36effbf7614 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -303,7 +303,8 @@ static void rds_tcp_conn_free(void *arg) rdsdebug("freeing tc %p\n", tc); spin_lock_irqsave(&rds_tcp_conn_lock, flags); - list_del(&tc->t_tcp_node); + if (!tc->t_tcp_node_detached) + list_del(&tc->t_tcp_node); spin_unlock_irqrestore(&rds_tcp_conn_lock, flags); kmem_cache_free(rds_tcp_conn_slab, tc); @@ -528,8 +529,12 @@ static void rds_tcp_kill_sock(struct net *net) if (net != c_net || !tc->t_sock) continue; - if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) + if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) { list_move_tail(&tc->t_tcp_node, &tmp_list); + } else { + list_del(&tc->t_tcp_node); + tc->t_tcp_node_detached = true; + } } spin_unlock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) { diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 56ea6620fcf9..800e847d2e00 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -11,6 +11,7 @@ struct rds_tcp_incoming { struct rds_tcp_connection { struct list_head t_tcp_node; + bool t_tcp_node_detached; struct rds_conn_path *t_cpath; /* t_conn_path_lock synchronizes the connection establishment between * rds_tcp_accept_one and rds_tcp_conn_path_connect From eb9c7c7d9542a33011b1e217e5a283ebf94fac01 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 12 Dec 2017 11:39:04 -0500 Subject: [PATCH 588/705] net: avoid skb_warn_bad_offload on IS_ERR commit 8d74e9f88d65af8bb2e095aff506aa6eac755ada upstream. skb_warn_bad_offload warns when packets enter the GSO stack that require skb_checksum_help or vice versa. Do not warn on arbitrary bad packets. Packet sockets can craft many. Syzkaller was able to demonstrate another one with eth_type games. In particular, suppress the warning when segmentation returns an error, which is for reasons other than checksum offload. See also commit 36c92474498a ("net: WARN if skb_checksum_help() is called on skb requiring segmentation") for context on this warning. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 67b5d4d8acb1..8898618bf341 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2763,7 +2763,7 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, segs = skb_mac_gso_segment(skb, features); - if (unlikely(skb_needs_check(skb, tx_path))) + if (unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs))) skb_warn_bad_offload(skb); return segs; From b392a53b11f325b30b7d54e575352a8cac4c300d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 3 Jan 2018 11:16:26 -0800 Subject: [PATCH 589/705] crypto: hash - annotate algorithms taking optional key commit a208fa8f33031b9e0aba44c7d1b7e68eb0cbd29e upstream. We need to consistently enforce that keyed hashes cannot be used without setting the key. To do this we need a reliable way to determine whether a given hash algorithm is keyed or not. AF_ALG currently does this by checking for the presence of a ->setkey() method. However, this is actually slightly broken because the CRC-32 algorithms implement ->setkey() but can also be used without a key. (The CRC-32 "key" is not actually a cryptographic key but rather represents the initial state. If not overridden, then a default initial state is used.) Prepare to fix this by introducing a flag CRYPTO_ALG_OPTIONAL_KEY which indicates that the algorithm has a ->setkey() method, but it is not required to be called. Then set it on all the CRC-32 algorithms. The same also applies to the Adler-32 implementation in Lustre. Also, the cryptd and mcryptd templates have to pass through the flag from their underlying algorithm. Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/arm64/crypto/crc32-arm64.c | 2 ++ arch/powerpc/crypto/crc32c-vpmsum_glue.c | 1 + arch/s390/crypto/crc32-vx.c | 3 +++ arch/sparc/crypto/crc32c_glue.c | 1 + arch/x86/crypto/crc32-pclmul_glue.c | 1 + arch/x86/crypto/crc32c-intel_glue.c | 1 + crypto/crc32_generic.c | 1 + crypto/crc32c_generic.c | 1 + crypto/cryptd.c | 7 +++---- crypto/mcryptd.c | 7 +++---- drivers/crypto/bfin_crc.c | 3 ++- .../staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c | 1 + include/linux/crypto.h | 6 ++++++ 13 files changed, 26 insertions(+), 9 deletions(-) diff --git a/arch/arm64/crypto/crc32-arm64.c b/arch/arm64/crypto/crc32-arm64.c index 6a37c3c6b11d..3ec568afd1d3 100644 --- a/arch/arm64/crypto/crc32-arm64.c +++ b/arch/arm64/crypto/crc32-arm64.c @@ -232,6 +232,7 @@ static struct shash_alg crc32_alg = { .cra_name = "crc32", .cra_driver_name = "crc32-arm64-hw", .cra_priority = 300, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_alignmask = 0, .cra_ctxsize = sizeof(struct chksum_ctx), @@ -253,6 +254,7 @@ static struct shash_alg crc32c_alg = { .cra_name = "crc32c", .cra_driver_name = "crc32c-arm64-hw", .cra_priority = 300, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_alignmask = 0, .cra_ctxsize = sizeof(struct chksum_ctx), diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c index f058e0c3e4d4..fd1d6c83f0c0 100644 --- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c +++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c @@ -141,6 +141,7 @@ static struct shash_alg alg = { .cra_name = "crc32c", .cra_driver_name = "crc32c-vpmsum", .cra_priority = 200, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_ctxsize = sizeof(u32), .cra_module = THIS_MODULE, diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c index 992e630c227b..6f4985f357c6 100644 --- a/arch/s390/crypto/crc32-vx.c +++ b/arch/s390/crypto/crc32-vx.c @@ -238,6 +238,7 @@ static struct shash_alg crc32_vx_algs[] = { .cra_name = "crc32", .cra_driver_name = "crc32-vx", .cra_priority = 200, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CRC32_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crc_ctx), .cra_module = THIS_MODULE, @@ -258,6 +259,7 @@ static struct shash_alg crc32_vx_algs[] = { .cra_name = "crc32be", .cra_driver_name = "crc32be-vx", .cra_priority = 200, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CRC32_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crc_ctx), .cra_module = THIS_MODULE, @@ -278,6 +280,7 @@ static struct shash_alg crc32_vx_algs[] = { .cra_name = "crc32c", .cra_driver_name = "crc32c-vx", .cra_priority = 200, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CRC32_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crc_ctx), .cra_module = THIS_MODULE, diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c index d1064e46efe8..8aa664638c3c 100644 --- a/arch/sparc/crypto/crc32c_glue.c +++ b/arch/sparc/crypto/crc32c_glue.c @@ -133,6 +133,7 @@ static struct shash_alg alg = { .cra_name = "crc32c", .cra_driver_name = "crc32c-sparc64", .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_ctxsize = sizeof(u32), .cra_alignmask = 7, diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c index 27226df3f7d8..c8d9cdacbf10 100644 --- a/arch/x86/crypto/crc32-pclmul_glue.c +++ b/arch/x86/crypto/crc32-pclmul_glue.c @@ -162,6 +162,7 @@ static struct shash_alg alg = { .cra_name = "crc32", .cra_driver_name = "crc32-pclmul", .cra_priority = 200, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_ctxsize = sizeof(u32), .cra_module = THIS_MODULE, diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c index 0857b1a1de3b..60a391b8c4a2 100644 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ b/arch/x86/crypto/crc32c-intel_glue.c @@ -239,6 +239,7 @@ static struct shash_alg alg = { .cra_name = "crc32c", .cra_driver_name = "crc32c-intel", .cra_priority = 200, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_ctxsize = sizeof(u32), .cra_module = THIS_MODULE, diff --git a/crypto/crc32_generic.c b/crypto/crc32_generic.c index aa2a25fc7482..718cbce8d169 100644 --- a/crypto/crc32_generic.c +++ b/crypto/crc32_generic.c @@ -133,6 +133,7 @@ static struct shash_alg alg = { .cra_name = "crc32", .cra_driver_name = "crc32-generic", .cra_priority = 100, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_ctxsize = sizeof(u32), .cra_module = THIS_MODULE, diff --git a/crypto/crc32c_generic.c b/crypto/crc32c_generic.c index 4c0a0e271876..372320399622 100644 --- a/crypto/crc32c_generic.c +++ b/crypto/crc32c_generic.c @@ -146,6 +146,7 @@ static struct shash_alg alg = { .cra_name = "crc32c", .cra_driver_name = "crc32c-generic", .cra_priority = 100, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_alignmask = 3, .cra_ctxsize = sizeof(struct chksum_ctx), diff --git a/crypto/cryptd.c b/crypto/cryptd.c index af9ad45d1909..9b66ce7e728d 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -673,10 +673,9 @@ static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, if (err) goto out_free_inst; - type = CRYPTO_ALG_ASYNC; - if (alg->cra_flags & CRYPTO_ALG_INTERNAL) - type |= CRYPTO_ALG_INTERNAL; - inst->alg.halg.base.cra_flags = type; + inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC | + (alg->cra_flags & (CRYPTO_ALG_INTERNAL | + CRYPTO_ALG_OPTIONAL_KEY)); inst->alg.halg.digestsize = salg->digestsize; inst->alg.halg.statesize = salg->statesize; diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c index 6e9389c8bfbd..7406ed0745be 100644 --- a/crypto/mcryptd.c +++ b/crypto/mcryptd.c @@ -516,10 +516,9 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, if (err) goto out_free_inst; - type = CRYPTO_ALG_ASYNC; - if (alg->cra_flags & CRYPTO_ALG_INTERNAL) - type |= CRYPTO_ALG_INTERNAL; - inst->alg.halg.base.cra_flags = type; + inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC | + (alg->cra_flags & (CRYPTO_ALG_INTERNAL | + CRYPTO_ALG_OPTIONAL_KEY)); inst->alg.halg.digestsize = halg->digestsize; inst->alg.halg.statesize = halg->statesize; diff --git a/drivers/crypto/bfin_crc.c b/drivers/crypto/bfin_crc.c index 10db7df366c8..2ee11802992f 100644 --- a/drivers/crypto/bfin_crc.c +++ b/drivers/crypto/bfin_crc.c @@ -494,7 +494,8 @@ static struct ahash_alg algs = { .cra_driver_name = DRIVER_NAME, .cra_priority = 100, .cra_flags = CRYPTO_ALG_TYPE_AHASH | - CRYPTO_ALG_ASYNC, + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_ctxsize = sizeof(struct bfin_crypto_crc_ctx), .cra_alignmask = 3, diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c index db0572733712..ab30a0f5129c 100644 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c @@ -119,6 +119,7 @@ static struct shash_alg alg = { .cra_name = "adler32", .cra_driver_name = "adler32-zlib", .cra_priority = 100, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_ctxsize = sizeof(u32), .cra_module = THIS_MODULE, diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 7cee5551625b..b0175fa29860 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -102,6 +102,12 @@ */ #define CRYPTO_ALG_INTERNAL 0x00002000 +/* + * Set if the algorithm has a ->setkey() method but can be used without + * calling it first, i.e. there is a default key. + */ +#define CRYPTO_ALG_OPTIONAL_KEY 0x00004000 + /* * Transform masks and values (for crt_flags). */ From adf26e87f4b73d1f820352b0bfa085f6aaf3f51a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 3 Jan 2018 11:16:27 -0800 Subject: [PATCH 590/705] crypto: hash - prevent using keyed hashes without setting key commit 9fa68f620041be04720d0cbfb1bd3ddfc6310b24 upstream. Currently, almost none of the keyed hash algorithms check whether a key has been set before proceeding. Some algorithms are okay with this and will effectively just use a key of all 0's or some other bogus default. However, others will severely break, as demonstrated using "hmac(sha3-512-generic)", the unkeyed use of which causes a kernel crash via a (potentially exploitable) stack buffer overflow. A while ago, this problem was solved for AF_ALG by pairing each hash transform with a 'has_key' bool. However, there are still other places in the kernel where userspace can specify an arbitrary hash algorithm by name, and the kernel uses it as unkeyed hash without checking whether it is really unkeyed. Examples of this include: - KEYCTL_DH_COMPUTE, via the KDF extension - dm-verity - dm-crypt, via the ESSIV support - dm-integrity, via the "internal hash" mode with no key given - drbd (Distributed Replicated Block Device) This bug is especially bad for KEYCTL_DH_COMPUTE as that requires no privileges to call. Fix the bug for all users by adding a flag CRYPTO_TFM_NEED_KEY to the ->crt_flags of each hash transform that indicates whether the transform still needs to be keyed or not. Then, make the hash init, import, and digest functions return -ENOKEY if the key is still needed. The new flag also replaces the 'has_key' bool which algif_hash was previously using, thereby simplifying the algif_hash implementation. Reported-by: syzbot Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/ahash.c | 22 ++++++++++++++---- crypto/algif_hash.c | 52 +++++++++--------------------------------- crypto/shash.c | 25 ++++++++++++++++---- include/crypto/hash.h | 34 +++++++++++++++++++-------- include/linux/crypto.h | 2 ++ 5 files changed, 75 insertions(+), 60 deletions(-) diff --git a/crypto/ahash.c b/crypto/ahash.c index f3fa104de479..14402ef6d826 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -192,11 +192,18 @@ int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key, unsigned int keylen) { unsigned long alignmask = crypto_ahash_alignmask(tfm); + int err; if ((unsigned long)key & alignmask) - return ahash_setkey_unaligned(tfm, key, keylen); + err = ahash_setkey_unaligned(tfm, key, keylen); + else + err = tfm->setkey(tfm, key, keylen); - return tfm->setkey(tfm, key, keylen); + if (err) + return err; + + crypto_ahash_clear_flags(tfm, CRYPTO_TFM_NEED_KEY); + return 0; } EXPORT_SYMBOL_GPL(crypto_ahash_setkey); @@ -369,7 +376,12 @@ EXPORT_SYMBOL_GPL(crypto_ahash_finup); int crypto_ahash_digest(struct ahash_request *req) { - return crypto_ahash_op(req, crypto_ahash_reqtfm(req)->digest); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + + if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) + return -ENOKEY; + + return crypto_ahash_op(req, tfm->digest); } EXPORT_SYMBOL_GPL(crypto_ahash_digest); @@ -455,7 +467,6 @@ static int crypto_ahash_init_tfm(struct crypto_tfm *tfm) struct ahash_alg *alg = crypto_ahash_alg(hash); hash->setkey = ahash_nosetkey; - hash->has_setkey = false; hash->export = ahash_no_export; hash->import = ahash_no_import; @@ -470,7 +481,8 @@ static int crypto_ahash_init_tfm(struct crypto_tfm *tfm) if (alg->setkey) { hash->setkey = alg->setkey; - hash->has_setkey = true; + if (!(alg->halg.base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY)) + crypto_ahash_set_flags(hash, CRYPTO_TFM_NEED_KEY); } if (alg->export) hash->export = alg->export; diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 54fc90e8339c..731b5fb8567b 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -34,11 +34,6 @@ struct hash_ctx { struct ahash_request req; }; -struct algif_hash_tfm { - struct crypto_ahash *hash; - bool has_key; -}; - static int hash_alloc_result(struct sock *sk, struct hash_ctx *ctx) { unsigned ds; @@ -308,7 +303,7 @@ static int hash_check_key(struct socket *sock) int err = 0; struct sock *psk; struct alg_sock *pask; - struct algif_hash_tfm *tfm; + struct crypto_ahash *tfm; struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); @@ -322,7 +317,7 @@ static int hash_check_key(struct socket *sock) err = -ENOKEY; lock_sock_nested(psk, SINGLE_DEPTH_NESTING); - if (!tfm->has_key) + if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) goto unlock; if (!pask->refcnt++) @@ -413,41 +408,17 @@ static struct proto_ops algif_hash_ops_nokey = { static void *hash_bind(const char *name, u32 type, u32 mask) { - struct algif_hash_tfm *tfm; - struct crypto_ahash *hash; - - tfm = kzalloc(sizeof(*tfm), GFP_KERNEL); - if (!tfm) - return ERR_PTR(-ENOMEM); - - hash = crypto_alloc_ahash(name, type, mask); - if (IS_ERR(hash)) { - kfree(tfm); - return ERR_CAST(hash); - } - - tfm->hash = hash; - - return tfm; + return crypto_alloc_ahash(name, type, mask); } static void hash_release(void *private) { - struct algif_hash_tfm *tfm = private; - - crypto_free_ahash(tfm->hash); - kfree(tfm); + crypto_free_ahash(private); } static int hash_setkey(void *private, const u8 *key, unsigned int keylen) { - struct algif_hash_tfm *tfm = private; - int err; - - err = crypto_ahash_setkey(tfm->hash, key, keylen); - tfm->has_key = !err; - - return err; + return crypto_ahash_setkey(private, key, keylen); } static void hash_sock_destruct(struct sock *sk) @@ -462,11 +433,10 @@ static void hash_sock_destruct(struct sock *sk) static int hash_accept_parent_nokey(void *private, struct sock *sk) { - struct hash_ctx *ctx; + struct crypto_ahash *tfm = private; struct alg_sock *ask = alg_sk(sk); - struct algif_hash_tfm *tfm = private; - struct crypto_ahash *hash = tfm->hash; - unsigned len = sizeof(*ctx) + crypto_ahash_reqsize(hash); + struct hash_ctx *ctx; + unsigned int len = sizeof(*ctx) + crypto_ahash_reqsize(tfm); ctx = sock_kmalloc(sk, len, GFP_KERNEL); if (!ctx) @@ -479,7 +449,7 @@ static int hash_accept_parent_nokey(void *private, struct sock *sk) ask->private = ctx; - ahash_request_set_tfm(&ctx->req, hash); + ahash_request_set_tfm(&ctx->req, tfm); ahash_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG, af_alg_complete, &ctx->completion); @@ -490,9 +460,9 @@ static int hash_accept_parent_nokey(void *private, struct sock *sk) static int hash_accept_parent(void *private, struct sock *sk) { - struct algif_hash_tfm *tfm = private; + struct crypto_ahash *tfm = private; - if (!tfm->has_key && crypto_ahash_has_setkey(tfm->hash)) + if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) return -ENOKEY; return hash_accept_parent_nokey(private, sk); diff --git a/crypto/shash.c b/crypto/shash.c index 9bd5044d467b..d5bd2f05d036 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -57,11 +57,18 @@ int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key, { struct shash_alg *shash = crypto_shash_alg(tfm); unsigned long alignmask = crypto_shash_alignmask(tfm); + int err; if ((unsigned long)key & alignmask) - return shash_setkey_unaligned(tfm, key, keylen); + err = shash_setkey_unaligned(tfm, key, keylen); + else + err = shash->setkey(tfm, key, keylen); - return shash->setkey(tfm, key, keylen); + if (err) + return err; + + crypto_shash_clear_flags(tfm, CRYPTO_TFM_NEED_KEY); + return 0; } EXPORT_SYMBOL_GPL(crypto_shash_setkey); @@ -180,6 +187,9 @@ int crypto_shash_digest(struct shash_desc *desc, const u8 *data, struct shash_alg *shash = crypto_shash_alg(tfm); unsigned long alignmask = crypto_shash_alignmask(tfm); + if (crypto_shash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) + return -ENOKEY; + if (((unsigned long)data | (unsigned long)out) & alignmask) return shash_digest_unaligned(desc, data, len, out); @@ -359,7 +369,8 @@ int crypto_init_shash_ops_async(struct crypto_tfm *tfm) crt->digest = shash_async_digest; crt->setkey = shash_async_setkey; - crt->has_setkey = alg->setkey != shash_no_setkey; + crypto_ahash_set_flags(crt, crypto_shash_get_flags(shash) & + CRYPTO_TFM_NEED_KEY); if (alg->export) crt->export = shash_async_export; @@ -374,8 +385,14 @@ int crypto_init_shash_ops_async(struct crypto_tfm *tfm) static int crypto_shash_init_tfm(struct crypto_tfm *tfm) { struct crypto_shash *hash = __crypto_shash_cast(tfm); + struct shash_alg *alg = crypto_shash_alg(hash); + + hash->descsize = alg->descsize; + + if (crypto_shash_alg_has_setkey(alg) && + !(alg->base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY)) + crypto_shash_set_flags(hash, CRYPTO_TFM_NEED_KEY); - hash->descsize = crypto_shash_alg(hash)->descsize; return 0; } diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 26605888a199..d3de3b819ec0 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -205,7 +205,6 @@ struct crypto_ahash { unsigned int keylen); unsigned int reqsize; - bool has_setkey; struct crypto_tfm base; }; @@ -399,11 +398,6 @@ static inline void *ahash_request_ctx(struct ahash_request *req) int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key, unsigned int keylen); -static inline bool crypto_ahash_has_setkey(struct crypto_ahash *tfm) -{ - return tfm->has_setkey; -} - /** * crypto_ahash_finup() - update and finalize message digest * @req: reference to the ahash_request handle that holds all information @@ -475,7 +469,12 @@ static inline int crypto_ahash_export(struct ahash_request *req, void *out) */ static inline int crypto_ahash_import(struct ahash_request *req, const void *in) { - return crypto_ahash_reqtfm(req)->import(req, in); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + + if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) + return -ENOKEY; + + return tfm->import(req, in); } /** @@ -492,7 +491,12 @@ static inline int crypto_ahash_import(struct ahash_request *req, const void *in) */ static inline int crypto_ahash_init(struct ahash_request *req) { - return crypto_ahash_reqtfm(req)->init(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + + if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) + return -ENOKEY; + + return tfm->init(req); } /** @@ -845,7 +849,12 @@ static inline int crypto_shash_export(struct shash_desc *desc, void *out) */ static inline int crypto_shash_import(struct shash_desc *desc, const void *in) { - return crypto_shash_alg(desc->tfm)->import(desc, in); + struct crypto_shash *tfm = desc->tfm; + + if (crypto_shash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) + return -ENOKEY; + + return crypto_shash_alg(tfm)->import(desc, in); } /** @@ -861,7 +870,12 @@ static inline int crypto_shash_import(struct shash_desc *desc, const void *in) */ static inline int crypto_shash_init(struct shash_desc *desc) { - return crypto_shash_alg(desc->tfm)->init(desc); + struct crypto_shash *tfm = desc->tfm; + + if (crypto_shash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) + return -ENOKEY; + + return crypto_shash_alg(tfm)->init(desc); } /** diff --git a/include/linux/crypto.h b/include/linux/crypto.h index b0175fa29860..8edb3ba6f640 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -111,6 +111,8 @@ /* * Transform masks and values (for crt_flags). */ +#define CRYPTO_TFM_NEED_KEY 0x00000001 + #define CRYPTO_TFM_REQ_MASK 0x000fff00 #define CRYPTO_TFM_RES_MASK 0xfff00000 From ec677e0687ded48154852faa7d5ff25d171a691b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 10 Jan 2018 17:34:45 +0100 Subject: [PATCH 591/705] ASoC: ux500: add MODULE_LICENSE tag commit 1783c9d7cb7bc3181b9271665959b87280d98d8e upstream. This adds MODULE_LICENSE/AUTHOR/DESCRIPTION tags to the ux500 platform drivers, to avoid these build warnings: WARNING: modpost: missing MODULE_LICENSE() in sound/soc/ux500/snd-soc-ux500-plat-dma.o WARNING: modpost: missing MODULE_LICENSE() in sound/soc/ux500/snd-soc-ux500-mach-mop500.o The company no longer exists, so the email addresses of the authors don't work any more, but I've added them anyway for consistency. Signed-off-by: Arnd Bergmann Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/ux500/mop500.c | 4 ++++ sound/soc/ux500/ux500_pcm.c | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/sound/soc/ux500/mop500.c b/sound/soc/ux500/mop500.c index ba9fc099cf67..503aef8fcde2 100644 --- a/sound/soc/ux500/mop500.c +++ b/sound/soc/ux500/mop500.c @@ -164,3 +164,7 @@ static struct platform_driver snd_soc_mop500_driver = { }; module_platform_driver(snd_soc_mop500_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("ASoC MOP500 board driver"); +MODULE_AUTHOR("Ola Lilja"); diff --git a/sound/soc/ux500/ux500_pcm.c b/sound/soc/ux500/ux500_pcm.c index f12c01dddc8d..d35ba7700f46 100644 --- a/sound/soc/ux500/ux500_pcm.c +++ b/sound/soc/ux500/ux500_pcm.c @@ -165,3 +165,8 @@ int ux500_pcm_unregister_platform(struct platform_device *pdev) return 0; } EXPORT_SYMBOL_GPL(ux500_pcm_unregister_platform); + +MODULE_AUTHOR("Ola Lilja"); +MODULE_AUTHOR("Roger Nilsson"); +MODULE_DESCRIPTION("ASoC UX500 driver"); +MODULE_LICENSE("GPL v2"); From fc428560984ce1705a092fe94305689849ccb7f6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 15 Jan 2018 17:04:22 +0100 Subject: [PATCH 592/705] video: fbdev/mmp: add MODULE_LICENSE commit c1530ac5a3ce93a1f02adabc4508b5fbf862dfe2 upstream. Kbuild complains about the lack of a license tag in this driver: WARNING: modpost: missing MODULE_LICENSE() in drivers/video/fbdev/mmp/mmp_disp.o This adds the license, author and description tags. Signed-off-by: Arnd Bergmann Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/mmp/core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/video/fbdev/mmp/core.c b/drivers/video/fbdev/mmp/core.c index a0f496049db7..3a6bb6561ba0 100644 --- a/drivers/video/fbdev/mmp/core.c +++ b/drivers/video/fbdev/mmp/core.c @@ -23,6 +23,7 @@ #include #include #include +#include #include