From f680f70adbeab28b35f849016b964dd645db6237 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Tue, 3 Nov 2015 11:51:33 +0530 Subject: [PATCH 001/110] ath10k: fix invalid NSS for 4x4 devices The number of spatial streams that are derived from chain mask for 4x4 devices is using wrong bitmask and conditional check. This is affecting downlink throughput for QCA99x0 devices. Earlier cfg_tx_chainmask is not filled by default until user configured it and so get_nss_from_chainmask never be called. This issue is exposed by recent commit 166de3f1895d ("ath10k: remove supported chain mask"). By default maximum supported chain mask is filled in cfg_tx_chainmask. Cc: stable@vger.kernel.org Fixes: 5572a95b4b ("ath10k: apply chainmask settings to vdev on creation") Signed-off-by: Rajkumar Manoharan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index a7411fe90cc4..95a55405ebf0 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -4225,7 +4225,7 @@ static int ath10k_config(struct ieee80211_hw *hw, u32 changed) static u32 get_nss_from_chainmask(u16 chain_mask) { - if ((chain_mask & 0x15) == 0x15) + if ((chain_mask & 0xf) == 0xf) return 4; else if ((chain_mask & 0x7) == 0x7) return 3; From 7883746bc663150e8acd7a57397fc889698b0b33 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 21:47:12 +0200 Subject: [PATCH 002/110] Bluetooth: Fix l2cap_chan leak in SMP The L2CAP core expects channel implementations to manage the reference returned by the new_connection callback. With sockets this is already handled with each channel being tied to the corresponding socket. With SMP however there's no context to tie the pointer to in the smp_new_conn_cb function. The function can also not just drop the reference since it's the only one at that point. For fixed channels (like SMP) the code path inside the L2CAP core from new_connection() to ready() is short and straight-forwards. The crucial difference is that in ready() the implementation has access to the l2cap_conn that SMP needs associate its l2cap_chan. Instead of taking a new reference in smp_ready_cb() we can simply assume to already own the reference created in smp_new_conn_cb(), i.e. there is no need to call l2cap_chan_hold(). Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann Cc: stable@vger.kernel.org # 3.19+ --- net/bluetooth/smp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index c91353841e40..ffed8a1d4f27 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -3027,8 +3027,13 @@ static void smp_ready_cb(struct l2cap_chan *chan) BT_DBG("chan %p", chan); + /* No need to call l2cap_chan_hold() here since we already own + * the reference taken in smp_new_conn_cb(). This is just the + * first time that we tie it to a specific pointer. The code in + * l2cap_core.c ensures that there's no risk this function wont + * get called if smp_new_conn_cb was previously called. + */ conn->smp = chan; - l2cap_chan_hold(chan); if (hcon->type == ACL_LINK && test_bit(HCI_CONN_ENCRYPT, &hcon->flags)) bredr_pairing(chan); From 12551ced30bb4658496ba76b7998dc6930b45722 Mon Sep 17 00:00:00 2001 From: Bartosz Markowski Date: Thu, 5 Nov 2015 09:50:40 +0100 Subject: [PATCH 003/110] ath10k: fix the currently supported QCA9377 target version name When introducing the original QCA9377 support, the chip target version was wrongly picked. The chip advertising itself with bmi target value equal to 0x05020001 is in fact a 1.1 revision. I realized this once I got a real 1.1 hw to play with. Signed-off-by: Bartosz Markowski Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 4 ++-- drivers/net/wireless/ath/ath10k/hw.h | 8 ++++++-- drivers/net/wireless/ath/ath10k/pci.c | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index aa9bd92ac4ed..c8f6ca284b9d 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -138,8 +138,8 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { }, }, { - .id = QCA9377_HW_1_0_DEV_VERSION, - .name = "qca9377 hw1.0", + .id = QCA9377_HW_1_1_DEV_VERSION, + .name = "qca9377 hw1.1", .patch_load_addr = QCA9377_HW_1_0_PATCH_LOAD_ADDR, .uart_pin = 7, .otp_exe_param = 0, diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h index 39966a05c1cc..01bf2244f54c 100644 --- a/drivers/net/wireless/ath/ath10k/hw.h +++ b/drivers/net/wireless/ath/ath10k/hw.h @@ -42,6 +42,8 @@ #define QCA6174_HW_3_0_VERSION 0x05020000 #define QCA6174_HW_3_2_VERSION 0x05030000 +#define QCA9377_HW_1_1_DEV_VERSION 0x05020001 + enum qca6174_pci_rev { QCA6174_PCI_REV_1_1 = 0x11, QCA6174_PCI_REV_1_3 = 0x13, @@ -60,6 +62,10 @@ enum qca6174_chip_id_rev { QCA6174_HW_3_2_CHIP_ID_REV = 10, }; +enum qca9377_chip_id_rev { + QCA9377_HW_1_1_CHIP_ID_REV = 0x1, +}; + #define QCA6174_HW_2_1_FW_DIR "ath10k/QCA6174/hw2.1" #define QCA6174_HW_2_1_FW_FILE "firmware.bin" #define QCA6174_HW_2_1_OTP_FILE "otp.bin" @@ -85,8 +91,6 @@ enum qca6174_chip_id_rev { #define QCA99X0_HW_2_0_PATCH_LOAD_ADDR 0x1234 /* QCA9377 1.0 definitions */ -#define QCA9377_HW_1_0_DEV_VERSION 0x05020001 -#define QCA9377_HW_1_0_CHIP_ID_REV 0x1 #define QCA9377_HW_1_0_FW_DIR ATH10K_FW_DIR "/QCA9377/hw1.0" #define QCA9377_HW_1_0_FW_FILE "firmware.bin" #define QCA9377_HW_1_0_OTP_FILE "otp.bin" diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index 3fca200b986c..c444b43f183d 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -92,7 +92,7 @@ static const struct ath10k_pci_supp_chip ath10k_pci_supp_chips[] = { { QCA6174_2_1_DEVICE_ID, QCA6174_HW_3_2_CHIP_ID_REV }, { QCA99X0_2_0_DEVICE_ID, QCA99X0_HW_2_0_CHIP_ID_REV }, - { QCA9377_1_0_DEVICE_ID, QCA9377_HW_1_0_CHIP_ID_REV }, + { QCA9377_1_0_DEVICE_ID, QCA9377_HW_1_1_CHIP_ID_REV }, }; static void ath10k_pci_buffer_cleanup(struct ath10k *ar); From 6cf213958299803c1166e63c1805a8b8cd135be5 Mon Sep 17 00:00:00 2001 From: Bartosz Markowski Date: Thu, 5 Nov 2015 09:50:41 +0100 Subject: [PATCH 004/110] ath10k: update missing hw_params of QCA9377 hw1.1 The uart_pin was incorrectly configured for QCA9377 and the recently added hw_params were omitted. Signed-off-by: Bartosz Markowski Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index c8f6ca284b9d..a30d41559134 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -141,8 +141,10 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .id = QCA9377_HW_1_1_DEV_VERSION, .name = "qca9377 hw1.1", .patch_load_addr = QCA9377_HW_1_0_PATCH_LOAD_ADDR, - .uart_pin = 7, + .uart_pin = 6, .otp_exe_param = 0, + .channel_counters_freq_hz = 88000, + .max_probe_resp_desc_thres = 0, .fw = { .dir = QCA9377_HW_1_0_FW_DIR, .fw = QCA9377_HW_1_0_FW_FILE, From 079a0490e207c5a88e4b40cefcc331f4bce562f4 Mon Sep 17 00:00:00 2001 From: Bartosz Markowski Date: Thu, 5 Nov 2015 09:50:42 +0100 Subject: [PATCH 005/110] ath10k: introduce dev_id to hw_params A follow up patch introducing a QCA9377 hw1.0 support will need this device identification helper for an explicit distinction of HWs, as apparently both QCA6174 hw3.0 and QCA9377 share the same BMI target version (0x0502000x). For the QCA9377 hw1.1 previously added we were just lucky we did not overlap with the same chip_id_rev. Signed-off-by: Bartosz Markowski Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 27 +++++++++++++++++++++++++- drivers/net/wireless/ath/ath10k/core.h | 1 + drivers/net/wireless/ath/ath10k/hw.h | 6 ++++++ drivers/net/wireless/ath/ath10k/pci.c | 6 ------ 4 files changed, 33 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index a30d41559134..b80b8f372354 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -51,6 +51,7 @@ MODULE_PARM_DESC(rawmode, "Use raw 802.11 frame datapath"); static const struct ath10k_hw_params ath10k_hw_params_list[] = { { .id = QCA988X_HW_2_0_VERSION, + .dev_id = QCA988X_2_0_DEVICE_ID, .name = "qca988x hw2.0", .patch_load_addr = QCA988X_HW_2_0_PATCH_LOAD_ADDR, .uart_pin = 7, @@ -69,6 +70,25 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { }, { .id = QCA6174_HW_2_1_VERSION, + .dev_id = QCA6164_2_1_DEVICE_ID, + .name = "qca6164 hw2.1", + .patch_load_addr = QCA6174_HW_2_1_PATCH_LOAD_ADDR, + .uart_pin = 6, + .otp_exe_param = 0, + .channel_counters_freq_hz = 88000, + .max_probe_resp_desc_thres = 0, + .fw = { + .dir = QCA6174_HW_2_1_FW_DIR, + .fw = QCA6174_HW_2_1_FW_FILE, + .otp = QCA6174_HW_2_1_OTP_FILE, + .board = QCA6174_HW_2_1_BOARD_DATA_FILE, + .board_size = QCA6174_BOARD_DATA_SZ, + .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, + }, + }, + { + .id = QCA6174_HW_2_1_VERSION, + .dev_id = QCA6174_2_1_DEVICE_ID, .name = "qca6174 hw2.1", .patch_load_addr = QCA6174_HW_2_1_PATCH_LOAD_ADDR, .uart_pin = 6, @@ -86,6 +106,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { }, { .id = QCA6174_HW_3_0_VERSION, + .dev_id = QCA6174_2_1_DEVICE_ID, .name = "qca6174 hw3.0", .patch_load_addr = QCA6174_HW_3_0_PATCH_LOAD_ADDR, .uart_pin = 6, @@ -103,6 +124,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { }, { .id = QCA6174_HW_3_2_VERSION, + .dev_id = QCA6174_2_1_DEVICE_ID, .name = "qca6174 hw3.2", .patch_load_addr = QCA6174_HW_3_0_PATCH_LOAD_ADDR, .uart_pin = 6, @@ -121,6 +143,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { }, { .id = QCA99X0_HW_2_0_DEV_VERSION, + .dev_id = QCA99X0_2_0_DEVICE_ID, .name = "qca99x0 hw2.0", .patch_load_addr = QCA99X0_HW_2_0_PATCH_LOAD_ADDR, .uart_pin = 7, @@ -139,6 +162,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { }, { .id = QCA9377_HW_1_1_DEV_VERSION, + .dev_id = QCA9377_1_0_DEVICE_ID, .name = "qca9377 hw1.1", .patch_load_addr = QCA9377_HW_1_0_PATCH_LOAD_ADDR, .uart_pin = 6, @@ -1265,7 +1289,8 @@ static int ath10k_init_hw_params(struct ath10k *ar) for (i = 0; i < ARRAY_SIZE(ath10k_hw_params_list); i++) { hw_params = &ath10k_hw_params_list[i]; - if (hw_params->id == ar->target_version) + if (hw_params->id == ar->target_version && + hw_params->dev_id == ar->dev_id) break; } diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index 4a2301589902..622d3816eaf4 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -636,6 +636,7 @@ struct ath10k { struct ath10k_hw_params { u32 id; + u16 dev_id; const char *name; u32 patch_load_addr; int uart_pin; diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h index 01bf2244f54c..13aacbdc98e6 100644 --- a/drivers/net/wireless/ath/ath10k/hw.h +++ b/drivers/net/wireless/ath/ath10k/hw.h @@ -22,6 +22,12 @@ #define ATH10K_FW_DIR "ath10k" +#define QCA988X_2_0_DEVICE_ID (0x003c) +#define QCA6164_2_1_DEVICE_ID (0x0041) +#define QCA6174_2_1_DEVICE_ID (0x003e) +#define QCA99X0_2_0_DEVICE_ID (0x0040) +#define QCA9377_1_0_DEVICE_ID (0x0042) + /* QCA988X 1.0 definitions (unsupported) */ #define QCA988X_HW_1_0_CHIP_ID_REV 0x0 diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index c444b43f183d..14dce2a1b39f 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -57,12 +57,6 @@ MODULE_PARM_DESC(reset_mode, "0: auto, 1: warm only (default: 0)"); #define ATH10K_PCI_TARGET_WAIT 3000 #define ATH10K_PCI_NUM_WARM_RESET_ATTEMPTS 3 -#define QCA988X_2_0_DEVICE_ID (0x003c) -#define QCA6164_2_1_DEVICE_ID (0x0041) -#define QCA6174_2_1_DEVICE_ID (0x003e) -#define QCA99X0_2_0_DEVICE_ID (0x0040) -#define QCA9377_1_0_DEVICE_ID (0x0042) - static const struct pci_device_id ath10k_pci_id_table[] = { { PCI_VDEVICE(ATHEROS, QCA988X_2_0_DEVICE_ID) }, /* PCI-E QCA988X V2 */ { PCI_VDEVICE(ATHEROS, QCA6164_2_1_DEVICE_ID) }, /* PCI-E QCA6164 V2.1 */ From 034074f3a889b69325326e612b7b37f3492a65ad Mon Sep 17 00:00:00 2001 From: Bartosz Markowski Date: Thu, 5 Nov 2015 09:50:43 +0100 Subject: [PATCH 006/110] ath10k: add QCA9377 hw1.0 support Add new BMI target version and chip id revision. Register it on supported chips list. Signed-off-by: Bartosz Markowski Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 18 ++++++++++++++++++ drivers/net/wireless/ath/ath10k/hw.h | 3 +++ drivers/net/wireless/ath/ath10k/pci.c | 2 ++ 3 files changed, 23 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index b80b8f372354..0947cc271e69 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -160,6 +160,24 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ, }, }, + { + .id = QCA9377_HW_1_0_DEV_VERSION, + .dev_id = QCA9377_1_0_DEVICE_ID, + .name = "qca9377 hw1.0", + .patch_load_addr = QCA9377_HW_1_0_PATCH_LOAD_ADDR, + .uart_pin = 6, + .otp_exe_param = 0, + .channel_counters_freq_hz = 88000, + .max_probe_resp_desc_thres = 0, + .fw = { + .dir = QCA9377_HW_1_0_FW_DIR, + .fw = QCA9377_HW_1_0_FW_FILE, + .otp = QCA9377_HW_1_0_OTP_FILE, + .board = QCA9377_HW_1_0_BOARD_DATA_FILE, + .board_size = QCA9377_BOARD_DATA_SZ, + .board_ext_size = QCA9377_BOARD_EXT_DATA_SZ, + }, + }, { .id = QCA9377_HW_1_1_DEV_VERSION, .dev_id = QCA9377_1_0_DEVICE_ID, diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h index 13aacbdc98e6..713c2bcea178 100644 --- a/drivers/net/wireless/ath/ath10k/hw.h +++ b/drivers/net/wireless/ath/ath10k/hw.h @@ -48,6 +48,8 @@ #define QCA6174_HW_3_0_VERSION 0x05020000 #define QCA6174_HW_3_2_VERSION 0x05030000 +/* QCA9377 target BMI version signatures */ +#define QCA9377_HW_1_0_DEV_VERSION 0x05020000 #define QCA9377_HW_1_1_DEV_VERSION 0x05020001 enum qca6174_pci_rev { @@ -69,6 +71,7 @@ enum qca6174_chip_id_rev { }; enum qca9377_chip_id_rev { + QCA9377_HW_1_0_CHIP_ID_REV = 0x0, QCA9377_HW_1_1_CHIP_ID_REV = 0x1, }; diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index 14dce2a1b39f..679a3ebb34af 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -86,6 +86,8 @@ static const struct ath10k_pci_supp_chip ath10k_pci_supp_chips[] = { { QCA6174_2_1_DEVICE_ID, QCA6174_HW_3_2_CHIP_ID_REV }, { QCA99X0_2_0_DEVICE_ID, QCA99X0_HW_2_0_CHIP_ID_REV }, + + { QCA9377_1_0_DEVICE_ID, QCA9377_HW_1_0_CHIP_ID_REV }, { QCA9377_1_0_DEVICE_ID, QCA9377_HW_1_1_CHIP_ID_REV }, }; From 2727a743e9eeb3d4063d4077afee3fd2a5e198ca Mon Sep 17 00:00:00 2001 From: Ryan Hsu Date: Thu, 5 Nov 2015 18:44:27 -0800 Subject: [PATCH 007/110] ath10k: override CE5 configuration for QCA6147 device Commit a70587b3389a ("ath10k: configure copy engine 5 for HTT messages") introduced to use the unused CE5 for target to host message. For the device like QCA6174, CE5 already assigned for other feature. So for QCA6174, override the CE5 configuration and use the CE1 instead. This patch is based on Rajkumar's earlier patch. Fixes: a70587b3389a ("ath10k: configure copy engine 5 for HTT messages") Signed-off-by: Ryan Hsu Signed-off-by: Rajkumar Manoharan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/pci.c | 32 ++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index 679a3ebb34af..0ad3dd139bf5 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -108,7 +108,7 @@ static void ath10k_pci_htc_rx_cb(struct ath10k_ce_pipe *ce_state); static void ath10k_pci_htt_tx_cb(struct ath10k_ce_pipe *ce_state); static void ath10k_pci_htt_rx_cb(struct ath10k_ce_pipe *ce_state); -static const struct ce_attr host_ce_config_wlan[] = { +static struct ce_attr host_ce_config_wlan[] = { /* CE0: host->target HTC control and raw streams */ { .flags = CE_ATTR_FLAGS, @@ -213,7 +213,7 @@ static const struct ce_attr host_ce_config_wlan[] = { }; /* Target firmware's Copy Engine configuration. */ -static const struct ce_pipe_config target_ce_config_wlan[] = { +static struct ce_pipe_config target_ce_config_wlan[] = { /* CE0: host->target HTC control and raw streams */ { .pipenum = __cpu_to_le32(0), @@ -326,7 +326,7 @@ static const struct ce_pipe_config target_ce_config_wlan[] = { * This table is derived from the CE_PCI TABLE, above. * It is passed to the Target at startup for use by firmware. */ -static const struct service_to_pipe target_service_to_ce_map_wlan[] = { +static struct service_to_pipe target_service_to_ce_map_wlan[] = { { __cpu_to_le32(ATH10K_HTC_SVC_ID_WMI_DATA_VO), __cpu_to_le32(PIPEDIR_OUT), /* out = UL = host -> target */ @@ -2023,6 +2023,29 @@ static int ath10k_pci_init_config(struct ath10k *ar) return 0; } +static void ath10k_pci_override_ce_config(struct ath10k *ar) +{ + struct ce_attr *attr; + struct ce_pipe_config *config; + + /* For QCA6174 we're overriding the Copy Engine 5 configuration, + * since it is currently used for other feature. + */ + + /* Override Host's Copy Engine 5 configuration */ + attr = &host_ce_config_wlan[5]; + attr->src_sz_max = 0; + attr->dest_nentries = 0; + + /* Override Target firmware's Copy Engine configuration */ + config = &target_ce_config_wlan[5]; + config->pipedir = __cpu_to_le32(PIPEDIR_OUT); + config->nbytes_max = __cpu_to_le32(2048); + + /* Map from service/endpoint to Copy Engine */ + target_service_to_ce_map_wlan[15].pipenum = __cpu_to_le32(1); +} + static int ath10k_pci_alloc_pipes(struct ath10k *ar) { struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); @@ -3016,6 +3039,9 @@ static int ath10k_pci_probe(struct pci_dev *pdev, goto err_core_destroy; } + if (QCA_REV_6174(ar)) + ath10k_pci_override_ce_config(ar); + ret = ath10k_pci_alloc_pipes(ar); if (ret) { ath10k_err(ar, "failed to allocate copy engine pipes: %d\n", From 23ba8a66234943e0b41c13ef7ca1088cf8488025 Mon Sep 17 00:00:00 2001 From: Vladimir Kondratiev Date: Sun, 8 Nov 2015 14:03:17 +0200 Subject: [PATCH 008/110] MAINTAINERS: wil6210: new maintainer - Maya Erez Maya Erez will maintain the wil6210 driver Signed-off-by: Vladimir Kondratiev Signed-off-by: Kalle Valo --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 5974a0f676d6..a457deb03252 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1814,7 +1814,7 @@ S: Supported F: drivers/net/wireless/ath/ath6kl/ WILOCITY WIL6210 WIRELESS DRIVER -M: Vladimir Kondratiev +M: Maya Erez L: linux-wireless@vger.kernel.org L: wil6210@qca.qualcomm.com S: Supported From 4ab75944c4b324c1f5f01dbd4c4d122d2b9da187 Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Wed, 28 Oct 2015 12:32:20 +0200 Subject: [PATCH 009/110] iwlwifi: Add new PCI IDs for the 8260 series Add some new PCI IDs for the 8260 series which were missing. The following sub-system IDs were added: 0x0130, 0x1130, 0x0132, 0x1132, 0x1150, 0x8110, 0x9110, 0x8130, 0x9130, 0x8132, 0x9132, 0x8150, 0x9150, 0x0044, 0x0930 CC: [4.1+] Signed-off-by: Oren Givon Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/pcie/drv.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/pcie/drv.c b/drivers/net/wireless/iwlwifi/pcie/drv.c index 644b58bc5226..639761fb2bfb 100644 --- a/drivers/net/wireless/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/iwlwifi/pcie/drv.c @@ -423,14 +423,21 @@ static const struct pci_device_id iwl_hw_card_ids[] = { /* 8000 Series */ {IWL_PCI_DEVICE(0x24F3, 0x0010, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x1010, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x0130, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x1130, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x0132, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x1132, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0110, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x01F0, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x0012, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x1012, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x1110, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0050, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0250, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x1050, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0150, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x1150, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F4, 0x0030, iwl8260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x24F4, 0x1130, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F4, 0x1030, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0xC010, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0xC110, iwl8260_2ac_cfg)}, @@ -438,18 +445,28 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x24F3, 0xC050, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0xD050, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x8010, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x8110, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x9010, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x9110, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F4, 0x8030, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F4, 0x9030, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x8130, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x9130, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x8132, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x9132, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x8050, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x8150, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x9050, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x9150, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0004, iwl8260_2n_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x0044, iwl8260_2n_cfg)}, {IWL_PCI_DEVICE(0x24F5, 0x0010, iwl4165_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F6, 0x0030, iwl4165_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0810, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0910, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0850, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0950, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x0930, iwl8260_2ac_cfg)}, #endif /* CONFIG_IWLMVM */ {0} From 5fd6705c366f885471b979de95ba14411e812395 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 15 Nov 2015 11:23:39 +0200 Subject: [PATCH 010/110] iwlwifi: bump firmware API to 19 This firmware will be the first firmware to support 3168. It hasn't been released yet. Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/iwl-7000.c | 2 +- drivers/net/wireless/iwlwifi/iwl-8000.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-7000.c b/drivers/net/wireless/iwlwifi/iwl-7000.c index 1a73c7a1da77..bf88ec3a65fa 100644 --- a/drivers/net/wireless/iwlwifi/iwl-7000.c +++ b/drivers/net/wireless/iwlwifi/iwl-7000.c @@ -69,7 +69,7 @@ #include "iwl-agn-hw.h" /* Highest firmware API version supported */ -#define IWL7260_UCODE_API_MAX 17 +#define IWL7260_UCODE_API_MAX 19 /* Oldest version we won't warn about */ #define IWL7260_UCODE_API_OK 13 diff --git a/drivers/net/wireless/iwlwifi/iwl-8000.c b/drivers/net/wireless/iwlwifi/iwl-8000.c index 0116e5a4c393..9bcc0bf937d8 100644 --- a/drivers/net/wireless/iwlwifi/iwl-8000.c +++ b/drivers/net/wireless/iwlwifi/iwl-8000.c @@ -69,7 +69,7 @@ #include "iwl-agn-hw.h" /* Highest firmware API version supported */ -#define IWL8000_UCODE_API_MAX 17 +#define IWL8000_UCODE_API_MAX 19 /* Oldest version we won't warn about */ #define IWL8000_UCODE_API_OK 13 From d6ee54a9d7c807cdb8eb77d7f019cce344c2162c Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Tue, 10 Nov 2015 22:13:43 +0200 Subject: [PATCH 011/110] iwlwifi: mvm: don't overwrite the key indices in D3 entry When entering D3, we need to use hardcoded key indices because the firmware requires that. To do so, we are overwriting the HW key index in the keyconf structure, which makes it impossible to reuse the indices that were used before entering D3. Additionally, we overwrite all the non-PTK keys with index 1, because the firmware only allows one non-PTK key to be set. This is bad, because when we resume, we may try to set more than one key with index 1, which will obviously fail. To fix this, allow the callers to set a pre-defined index to use in iwl_mvm_set_sta_key() instead of relying on the hw_key_idx value from the keyconf struct (which requires overwriting it). In normal cases, the caller can pass STA_KEY_IDX_INVALID, which will cause a new key offset to be chosen. During HW_RESTART, we pass the offset that is in use. And during D3 entry, we pass the hardcoded indices we need to use. Additionally, don't clear the fw_key_table in D3 entry, so that the flags are still set with the pre-D3 values when exiting D3. fixes=I3165c22362483f0152d9ec1d2a987fb5529727c1 Fixes: b546dcd6b742 ("iwlwifi: mvm: don't reset key index on HW restart") Signed-off-by: Luca Coelho Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/d3.c | 8 +--- drivers/net/wireless/iwlwifi/mvm/mac80211.c | 11 ++++-- drivers/net/wireless/iwlwifi/mvm/sta.c | 44 +++++++++++++-------- drivers/net/wireless/iwlwifi/mvm/sta.h | 4 +- 4 files changed, 39 insertions(+), 28 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/d3.c b/drivers/net/wireless/iwlwifi/mvm/d3.c index 85ae902df7c0..29ae58ebf223 100644 --- a/drivers/net/wireless/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/iwlwifi/mvm/d3.c @@ -309,9 +309,9 @@ static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw, * to transmit packets to the AP, i.e. the PTK. */ if (key->flags & IEEE80211_KEY_FLAG_PAIRWISE) { - key->hw_key_idx = 0; mvm->ptk_ivlen = key->iv_len; mvm->ptk_icvlen = key->icv_len; + ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, 0); } else { /* * firmware only supports TSC/RSC for a single key, @@ -319,12 +319,11 @@ static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw, * with new ones -- this relies on mac80211 doing * list_add_tail(). */ - key->hw_key_idx = 1; mvm->gtk_ivlen = key->iv_len; mvm->gtk_icvlen = key->icv_len; + ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, 1); } - ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, true); data->error = ret != 0; out_unlock: mutex_unlock(&mvm->mutex); @@ -772,9 +771,6 @@ static int iwl_mvm_switch_to_d3(struct iwl_mvm *mvm) */ set_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status); - /* We reprogram keys and shouldn't allocate new key indices */ - memset(mvm->fw_key_table, 0, sizeof(mvm->fw_key_table)); - mvm->ptk_ivlen = 0; mvm->ptk_icvlen = 0; mvm->ptk_ivlen = 0; diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index 1fb684693040..e88afac51c5d 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -2941,6 +2941,7 @@ static int iwl_mvm_mac_set_key(struct ieee80211_hw *hw, { struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); int ret; + u8 key_offset; if (iwlwifi_mod_params.sw_crypto) { IWL_DEBUG_MAC80211(mvm, "leave - hwcrypto disabled\n"); @@ -3006,10 +3007,14 @@ static int iwl_mvm_mac_set_key(struct ieee80211_hw *hw, break; } + /* in HW restart reuse the index, otherwise request a new one */ + if (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) + key_offset = key->hw_key_idx; + else + key_offset = STA_KEY_IDX_INVALID; + IWL_DEBUG_MAC80211(mvm, "set hwcrypto key\n"); - ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, - test_bit(IWL_MVM_STATUS_IN_HW_RESTART, - &mvm->status)); + ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, key_offset); if (ret) { IWL_WARN(mvm, "set key failed\n"); /* diff --git a/drivers/net/wireless/iwlwifi/mvm/sta.c b/drivers/net/wireless/iwlwifi/mvm/sta.c index 300a249486e4..4e260083b6a0 100644 --- a/drivers/net/wireless/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/iwlwifi/mvm/sta.c @@ -1227,7 +1227,8 @@ static u8 iwl_mvm_get_key_sta_id(struct ieee80211_vif *vif, static int iwl_mvm_send_sta_key(struct iwl_mvm *mvm, struct iwl_mvm_sta *mvm_sta, struct ieee80211_key_conf *keyconf, bool mcast, - u32 tkip_iv32, u16 *tkip_p1k, u32 cmd_flags) + u32 tkip_iv32, u16 *tkip_p1k, u32 cmd_flags, + u8 key_offset) { struct iwl_mvm_add_sta_key_cmd cmd = {}; __le16 key_flags; @@ -1269,7 +1270,7 @@ static int iwl_mvm_send_sta_key(struct iwl_mvm *mvm, if (mcast) key_flags |= cpu_to_le16(STA_KEY_MULTICAST); - cmd.key_offset = keyconf->hw_key_idx; + cmd.key_offset = key_offset; cmd.key_flags = key_flags; cmd.sta_id = sta_id; @@ -1360,6 +1361,7 @@ static int __iwl_mvm_set_sta_key(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct ieee80211_key_conf *keyconf, + u8 key_offset, bool mcast) { struct iwl_mvm_sta *mvm_sta = iwl_mvm_sta_from_mac80211(sta); @@ -1375,17 +1377,17 @@ static int __iwl_mvm_set_sta_key(struct iwl_mvm *mvm, ieee80211_get_key_rx_seq(keyconf, 0, &seq); ieee80211_get_tkip_rx_p1k(keyconf, addr, seq.tkip.iv32, p1k); ret = iwl_mvm_send_sta_key(mvm, mvm_sta, keyconf, mcast, - seq.tkip.iv32, p1k, 0); + seq.tkip.iv32, p1k, 0, key_offset); break; case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_WEP40: case WLAN_CIPHER_SUITE_WEP104: ret = iwl_mvm_send_sta_key(mvm, mvm_sta, keyconf, mcast, - 0, NULL, 0); + 0, NULL, 0, key_offset); break; default: ret = iwl_mvm_send_sta_key(mvm, mvm_sta, keyconf, mcast, - 0, NULL, 0); + 0, NULL, 0, key_offset); } return ret; @@ -1433,7 +1435,7 @@ int iwl_mvm_set_sta_key(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct ieee80211_key_conf *keyconf, - bool have_key_offset) + u8 key_offset) { bool mcast = !(keyconf->flags & IEEE80211_KEY_FLAG_PAIRWISE); u8 sta_id; @@ -1470,18 +1472,25 @@ int iwl_mvm_set_sta_key(struct iwl_mvm *mvm, if (WARN_ON_ONCE(iwl_mvm_sta_from_mac80211(sta)->vif != vif)) return -EINVAL; - if (!have_key_offset) { - /* - * The D3 firmware hardcodes the PTK offset to 0, so we have to - * configure it there. As a result, this workaround exists to - * let the caller set the key offset (hw_key_idx), see d3.c. - */ - keyconf->hw_key_idx = iwl_mvm_set_fw_key_idx(mvm); - if (keyconf->hw_key_idx == STA_KEY_IDX_INVALID) + /* If the key_offset is not pre-assigned, we need to find a + * new offset to use. In normal cases, the offset is not + * pre-assigned, but during HW_RESTART we want to reuse the + * same indices, so we pass them when this function is called. + * + * In D3 entry, we need to hardcoded the indices (because the + * firmware hardcodes the PTK offset to 0). In this case, we + * need to make sure we don't overwrite the hw_key_idx in the + * keyconf structure, because otherwise we cannot configure + * the original ones back when resuming. + */ + if (key_offset == STA_KEY_IDX_INVALID) { + key_offset = iwl_mvm_set_fw_key_idx(mvm); + if (key_offset == STA_KEY_IDX_INVALID) return -ENOSPC; + keyconf->hw_key_idx = key_offset; } - ret = __iwl_mvm_set_sta_key(mvm, vif, sta, keyconf, mcast); + ret = __iwl_mvm_set_sta_key(mvm, vif, sta, keyconf, key_offset, mcast); if (ret) { __clear_bit(keyconf->hw_key_idx, mvm->fw_key_table); goto end; @@ -1495,7 +1504,8 @@ int iwl_mvm_set_sta_key(struct iwl_mvm *mvm, */ if (keyconf->cipher == WLAN_CIPHER_SUITE_WEP40 || keyconf->cipher == WLAN_CIPHER_SUITE_WEP104) { - ret = __iwl_mvm_set_sta_key(mvm, vif, sta, keyconf, !mcast); + ret = __iwl_mvm_set_sta_key(mvm, vif, sta, keyconf, + key_offset, !mcast); if (ret) { __clear_bit(keyconf->hw_key_idx, mvm->fw_key_table); __iwl_mvm_remove_sta_key(mvm, sta_id, keyconf, mcast); @@ -1602,7 +1612,7 @@ void iwl_mvm_update_tkip_key(struct iwl_mvm *mvm, mvm_sta = iwl_mvm_sta_from_mac80211(sta); iwl_mvm_send_sta_key(mvm, mvm_sta, keyconf, mcast, - iv32, phase1key, CMD_ASYNC); + iv32, phase1key, CMD_ASYNC, keyconf->hw_key_idx); rcu_read_unlock(); } diff --git a/drivers/net/wireless/iwlwifi/mvm/sta.h b/drivers/net/wireless/iwlwifi/mvm/sta.h index eedb215eba3f..0631cc0a6d3c 100644 --- a/drivers/net/wireless/iwlwifi/mvm/sta.h +++ b/drivers/net/wireless/iwlwifi/mvm/sta.h @@ -365,8 +365,8 @@ int iwl_mvm_rm_sta_id(struct iwl_mvm *mvm, int iwl_mvm_set_sta_key(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct ieee80211_sta *sta, - struct ieee80211_key_conf *key, - bool have_key_offset); + struct ieee80211_key_conf *keyconf, + u8 key_offset); int iwl_mvm_remove_sta_key(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct ieee80211_sta *sta, From 9513c5e18a0dc55a1fc9c890715098ba2315830b Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Mon, 19 Oct 2015 16:29:11 +0200 Subject: [PATCH 012/110] iwlwifi: mvm: Avoid dereferencing sta if it was already flushed Be a little bit more careful when dereferencing sta on key removal, As it might already get flushed on other thread. Signed-off-by: Avri Altman Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/sta.c | 44 ++++++++++++-------------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/sta.c b/drivers/net/wireless/iwlwifi/mvm/sta.c index 4e260083b6a0..354acbde088e 100644 --- a/drivers/net/wireless/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/iwlwifi/mvm/sta.c @@ -1201,7 +1201,8 @@ static int iwl_mvm_set_fw_key_idx(struct iwl_mvm *mvm) return max_offs; } -static u8 iwl_mvm_get_key_sta_id(struct ieee80211_vif *vif, +static u8 iwl_mvm_get_key_sta_id(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, struct ieee80211_sta *sta) { struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); @@ -1218,8 +1219,21 @@ static u8 iwl_mvm_get_key_sta_id(struct ieee80211_vif *vif, * station ID, then use AP's station ID. */ if (vif->type == NL80211_IFTYPE_STATION && - mvmvif->ap_sta_id != IWL_MVM_STATION_COUNT) - return mvmvif->ap_sta_id; + mvmvif->ap_sta_id != IWL_MVM_STATION_COUNT) { + u8 sta_id = mvmvif->ap_sta_id; + + sta = rcu_dereference_protected(mvm->fw_id_to_mac_id[sta_id], + lockdep_is_held(&mvm->mutex)); + /* + * It is possible that the 'sta' parameter is NULL, + * for example when a GTK is removed - the sta_id will then + * be the AP ID, and no station was passed by mac80211. + */ + if (IS_ERR_OR_NULL(sta)) + return IWL_MVM_STATION_COUNT; + + return sta_id; + } return IWL_MVM_STATION_COUNT; } @@ -1445,7 +1459,7 @@ int iwl_mvm_set_sta_key(struct iwl_mvm *mvm, lockdep_assert_held(&mvm->mutex); /* Get the station id from the mvm local station table */ - sta_id = iwl_mvm_get_key_sta_id(vif, sta); + sta_id = iwl_mvm_get_key_sta_id(mvm, vif, sta); if (sta_id == IWL_MVM_STATION_COUNT) { IWL_ERR(mvm, "Failed to find station id\n"); return -EINVAL; @@ -1531,7 +1545,7 @@ int iwl_mvm_remove_sta_key(struct iwl_mvm *mvm, lockdep_assert_held(&mvm->mutex); /* Get the station id from the mvm local station table */ - sta_id = iwl_mvm_get_key_sta_id(vif, sta); + sta_id = iwl_mvm_get_key_sta_id(mvm, vif, sta); IWL_DEBUG_WEP(mvm, "mvm remove dynamic key: idx=%d sta=%d\n", keyconf->keyidx, sta_id); @@ -1557,24 +1571,6 @@ int iwl_mvm_remove_sta_key(struct iwl_mvm *mvm, return 0; } - /* - * It is possible that the 'sta' parameter is NULL, and thus - * there is a need to retrieve the sta from the local station table, - * for example when a GTK is removed (where the sta_id will then be - * the AP ID, and no station was passed by mac80211.) - */ - if (!sta) { - sta = rcu_dereference_protected(mvm->fw_id_to_mac_id[sta_id], - lockdep_is_held(&mvm->mutex)); - if (!sta) { - IWL_ERR(mvm, "Invalid station id\n"); - return -EINVAL; - } - } - - if (WARN_ON_ONCE(iwl_mvm_sta_from_mac80211(sta)->vif != vif)) - return -EINVAL; - ret = __iwl_mvm_remove_sta_key(mvm, sta_id, keyconf, mcast); if (ret) return ret; @@ -1594,7 +1590,7 @@ void iwl_mvm_update_tkip_key(struct iwl_mvm *mvm, u16 *phase1key) { struct iwl_mvm_sta *mvm_sta; - u8 sta_id = iwl_mvm_get_key_sta_id(vif, sta); + u8 sta_id = iwl_mvm_get_key_sta_id(mvm, vif, sta); bool mcast = !(keyconf->flags & IEEE80211_KEY_FLAG_PAIRWISE); if (WARN_ON_ONCE(sta_id == IWL_MVM_STATION_COUNT)) From 6419fdbb6f90e147690f8833cba59d289d613da5 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Wed, 11 Nov 2015 17:31:26 +0530 Subject: [PATCH 013/110] ath10k: poll HTT send completion when CE 5 is unused commit a70587b3389a ("ath10k: configure copy engine 5 for HTT messages") moved send completion polling under HTT Rx (CE 5) service routine. For QCA6174 based devices copy engine 1 (CE 1) is used for HTT Rx instead of CE 5. So send completion never be called. This is causing "failed to transmit packet, dropping: -105" errors. Fix this by processing send completion from CE 1 service routine instead of CE 5. Fixes: a70587b3389a ("ath10k: configure copy engine 5 for HTT messages") Tested-by: Ryan Hsu Signed-off-by: Rajkumar Manoharan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/pci.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index 0ad3dd139bf5..930785a724e1 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -107,6 +107,7 @@ static void ath10k_pci_htc_tx_cb(struct ath10k_ce_pipe *ce_state); static void ath10k_pci_htc_rx_cb(struct ath10k_ce_pipe *ce_state); static void ath10k_pci_htt_tx_cb(struct ath10k_ce_pipe *ce_state); static void ath10k_pci_htt_rx_cb(struct ath10k_ce_pipe *ce_state); +static void ath10k_pci_htt_htc_rx_cb(struct ath10k_ce_pipe *ce_state); static struct ce_attr host_ce_config_wlan[] = { /* CE0: host->target HTC control and raw streams */ @@ -124,7 +125,7 @@ static struct ce_attr host_ce_config_wlan[] = { .src_nentries = 0, .src_sz_max = 2048, .dest_nentries = 512, - .recv_cb = ath10k_pci_htc_rx_cb, + .recv_cb = ath10k_pci_htt_htc_rx_cb, }, /* CE2: target->host WMI */ @@ -1204,6 +1205,16 @@ static void ath10k_pci_htc_rx_cb(struct ath10k_ce_pipe *ce_state) ath10k_pci_process_rx_cb(ce_state, ath10k_htc_rx_completion_handler); } +static void ath10k_pci_htt_htc_rx_cb(struct ath10k_ce_pipe *ce_state) +{ + /* CE4 polling needs to be done whenever CE pipe which transports + * HTT Rx (target->host) is processed. + */ + ath10k_ce_per_engine_service(ce_state->ar, 4); + + ath10k_pci_process_rx_cb(ce_state, ath10k_htc_rx_completion_handler); +} + /* Called by lower (CE) layer when a send to HTT Target completes. */ static void ath10k_pci_htt_tx_cb(struct ath10k_ce_pipe *ce_state) { From eeec5d0ef7ee54a75e09e861c3cc44177b8752c7 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Tue, 10 Nov 2015 10:46:11 -0600 Subject: [PATCH 014/110] rtlwifi: rtl8821ae: Fix lockups on boot In commit 54328e64047a5 ("rtlwifi: rtl8821ae: Fix system lockups on boot"), an attempt was made to fix a regression introduced in commit 1277fa2ab2f9 ("rtlwifi: Remove the clear interrupt routine from all drivers"). Unfortunately, there were logic errors in that patch that prevented affected boxes from booting even after that patch was applied. The actual cause of the original problem is unknown as none of the developers have systems that are affected. Fixes: 54328e64047a ("rtlwifi: rtl8821ae: Fix system lockups on boot") Signed-off-by: Larry Finger Cc: Stable [V4.1+] Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c | 2 +- drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c index 6e9418ed90c2..bbb789f8990b 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c @@ -2272,7 +2272,7 @@ void rtl8821ae_enable_interrupt(struct ieee80211_hw *hw) struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); - if (!rtlpci->int_clear) + if (rtlpci->int_clear) rtl8821ae_clear_interrupt(hw);/*clear it here first*/ rtl_write_dword(rtlpriv, REG_HIMR, rtlpci->irq_mask[0] & 0xFFFFFFFF); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c index 8ee141a55bc5..142bdff4ed60 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c @@ -448,7 +448,7 @@ MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 1)\n"); MODULE_PARM_DESC(msi, "Set to 1 to use MSI interrupts mode (default 1)\n"); MODULE_PARM_DESC(debug, "Set debug level (0-5) (default 0)"); MODULE_PARM_DESC(disable_watchdog, "Set to 1 to disable the watchdog (default 0)\n"); -MODULE_PARM_DESC(int_clear, "Set to 1 to disable interrupt clear before set (default 0)\n"); +MODULE_PARM_DESC(int_clear, "Set to 0 to disable interrupt clear before set (default 1)\n"); static SIMPLE_DEV_PM_OPS(rtlwifi_pm_ops, rtl_pci_suspend, rtl_pci_resume); From 45bb780a2147b9995f3d288c44ecb87ca8a330e2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 4 Nov 2015 13:58:46 +0100 Subject: [PATCH 015/110] mac80211: don't advertise NL80211_FEATURE_FULL_AP_CLIENT_STATE For now, this feature doesn't actually work. To avoid shipping a kernel that has it enabled but where it can't be used disable it for now - we can re-enable it when it's fixed. This partially reverts 44674d9c2267 ("mac80211: advertise support for full station state in AP mode"). Cc: Ayala Beker Signed-off-by: Johannes Berg --- net/mac80211/main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 858f6b1cb149..175ffcf7fb06 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -541,8 +541,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, NL80211_FEATURE_HT_IBSS | NL80211_FEATURE_VIF_TXPOWER | NL80211_FEATURE_MAC_ON_CREATE | - NL80211_FEATURE_USERSPACE_MPM | - NL80211_FEATURE_FULL_AP_CLIENT_STATE; + NL80211_FEATURE_USERSPACE_MPM; if (!ops->hw_scan) wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN | From c2e703a55245bfff3db53b1f7cbe59f1ee8a4339 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 17 Nov 2015 14:25:21 +0100 Subject: [PATCH 016/110] mac80211: mesh: fix call_rcu() usage When using call_rcu(), the called function may be delayed quite significantly, and without a matching rcu_barrier() there's no way to be sure it has finished. Therefore, global state that could be gone/freed/reused should never be touched in the callback. Fix this in mesh by moving the atomic_dec() into the caller; that's not really a problem since we already unlinked the path and it will be destroyed anyway. This fixes a crash Jouni observed when running certain tests in a certain order, in which the mesh interface was torn down, the memory reused for a function pointer (work struct) and running that then crashed since the pointer had been decremented by 1, resulting in an invalid instruction byte stream. Cc: stable@vger.kernel.org Fixes: eb2b9311fd00 ("mac80211: mesh path table implementation") Reported-by: Jouni Malinen Signed-off-by: Johannes Berg --- net/mac80211/mesh_pathtbl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index b890e225a8f1..b3b44a5dd375 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -779,10 +779,8 @@ void mesh_plink_broken(struct sta_info *sta) static void mesh_path_node_reclaim(struct rcu_head *rp) { struct mpath_node *node = container_of(rp, struct mpath_node, rcu); - struct ieee80211_sub_if_data *sdata = node->mpath->sdata; del_timer_sync(&node->mpath->timer); - atomic_dec(&sdata->u.mesh.mpaths); kfree(node->mpath); kfree(node); } @@ -790,8 +788,9 @@ static void mesh_path_node_reclaim(struct rcu_head *rp) /* needs to be called with the corresponding hashwlock taken */ static void __mesh_path_del(struct mesh_table *tbl, struct mpath_node *node) { - struct mesh_path *mpath; - mpath = node->mpath; + struct mesh_path *mpath = node->mpath; + struct ieee80211_sub_if_data *sdata = node->mpath->sdata; + spin_lock(&mpath->state_lock); mpath->flags |= MESH_PATH_RESOLVING; if (mpath->is_gate) @@ -799,6 +798,7 @@ static void __mesh_path_del(struct mesh_table *tbl, struct mpath_node *node) hlist_del_rcu(&node->list); call_rcu(&node->rcu, mesh_path_node_reclaim); spin_unlock(&mpath->state_lock); + atomic_dec(&sdata->u.mesh.mpaths); atomic_dec(&tbl->entries); } From 945fae44d316a572916f673b2a58cd2d5389e7b6 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 17 Nov 2015 13:46:48 -0800 Subject: [PATCH 017/110] udp: remove duplicate include Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/udp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 24ec14f9825c..0c7b0e61b917 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -100,7 +100,6 @@ #include #include #include -#include #include #include #include From 6f97532ef05e49f1998a09f8359b83d00a7b3229 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 17 Nov 2015 14:24:27 -0800 Subject: [PATCH 018/110] fm10k: fix memory leak This was detected by Coverity. The function skb_cow_head leaves skb alone on failure, so caller needs to free. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/fm10k/fm10k_netdev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index 639263d5e833..7781e80896a6 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -627,8 +627,10 @@ static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev) /* verify the skb head is not shared */ err = skb_cow_head(skb, 0); - if (err) + if (err) { + dev_kfree_skb(skb); return NETDEV_TX_OK; + } /* locate vlan header */ vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN); From 52d178516dcf61e7a59363a572458c830af6e520 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 17 Nov 2015 14:26:06 -0800 Subject: [PATCH 019/110] hdlc: fix null-deref on allocation failure If alloc_netdev() failed and return NULL, then the next instruction would dereference it. Found by Coverity. Compile tested only. Not sure if anyone still uses this driver (or the whole WAN subsystem). Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/wan/hdlc_fr.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index e92aaf615901..89541cc90e87 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -1075,11 +1075,10 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type) used = pvc_is_used(pvc); - if (type == ARPHRD_ETHER) { + if (type == ARPHRD_ETHER) dev = alloc_netdev(0, "pvceth%d", NET_NAME_UNKNOWN, ether_setup); - dev->priv_flags &= ~IFF_TX_SKB_SHARING; - } else + else dev = alloc_netdev(0, "pvc%d", NET_NAME_UNKNOWN, pvc_setup); if (!dev) { @@ -1088,9 +1087,10 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type) return -ENOBUFS; } - if (type == ARPHRD_ETHER) + if (type == ARPHRD_ETHER) { + dev->priv_flags &= ~IFF_TX_SKB_SHARING; eth_hw_addr_random(dev); - else { + } else { *(__be16*)dev->dev_addr = htons(dlci); dlci_to_q922(dev->broadcast, dlci); } From 0208e951d55c435137543e12d7ee795c3784713a Mon Sep 17 00:00:00 2001 From: Ben Pope Date: Tue, 17 Nov 2015 18:21:07 -0700 Subject: [PATCH 020/110] ethernet/atheros/alx: add Killer E2400 device ID This patch adds the PCI device ID (0xe0a1) and alx_pci_tbl entry for the Killer E2400 Ethernet controller, modeled after the Killer E2200 controller support (0xe091) already present in the alx driver. This patch was originally authored by Ben Pope, but it got held up by issues in the commit message, so I'm resubmitting it on his behalf. I've extensively used a kernel with this patch on a System76 serw9 laptop and am quite confident it works well (at least on the hardware I have available for testing). Note that as a favor to System76, Ubuntu has been carrying this as a sauce patch in their 4.2 based Wily kernel, which presumably has given it real-world testing on other E2400 equipped hardware (I don't know of any Ubuntu kernel bugs filed about it): https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1498633 Signed-off-by: Jason Gerard DeRose Signed-off-by: Ben Pope Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/alx/main.c | 2 ++ drivers/net/ethernet/atheros/alx/reg.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index c8af3ce3ea38..bd377a6b067d 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -1534,6 +1534,8 @@ static const struct pci_device_id alx_pci_tbl[] = { .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG }, { PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_E2200), .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG }, + { PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_E2400), + .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG }, { PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_AR8162), .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG }, { PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_AR8171) }, diff --git a/drivers/net/ethernet/atheros/alx/reg.h b/drivers/net/ethernet/atheros/alx/reg.h index af006b44b2a6..0959e6824cb6 100644 --- a/drivers/net/ethernet/atheros/alx/reg.h +++ b/drivers/net/ethernet/atheros/alx/reg.h @@ -37,6 +37,7 @@ #define ALX_DEV_ID_AR8161 0x1091 #define ALX_DEV_ID_E2200 0xe091 +#define ALX_DEV_ID_E2400 0xe0a1 #define ALX_DEV_ID_AR8162 0x1090 #define ALX_DEV_ID_AR8171 0x10A1 #define ALX_DEV_ID_AR8172 0x10A0 From 022be25c2498e1baa82562aba9f3380b1ef70fa6 Mon Sep 17 00:00:00 2001 From: Punnaiah Choudary Kalluri Date: Wed, 18 Nov 2015 09:03:50 +0530 Subject: [PATCH 021/110] net: macb: Add support for sgmii phy interface This patch adds support for the sgmii phy interface. Signed-off-by: Punnaiah Choudary Kalluri Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb.c | 4 ++++ drivers/net/ethernet/cadence/macb.h | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 88c1e1a834f8..169059c92f80 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -1682,6 +1682,8 @@ static void macb_init_hw(struct macb *bp) macb_set_hwaddr(bp); config = macb_mdc_clk_div(bp); + if (bp->phy_interface == PHY_INTERFACE_MODE_SGMII) + config |= GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL); config |= MACB_BF(RBOF, NET_IP_ALIGN); /* Make eth data aligned */ config |= MACB_BIT(PAE); /* PAuse Enable */ config |= MACB_BIT(DRFCS); /* Discard Rx FCS */ @@ -2416,6 +2418,8 @@ static int macb_init(struct platform_device *pdev) /* Set MII management clock divider */ val = macb_mdc_clk_div(bp); val |= macb_dbw(bp); + if (bp->phy_interface == PHY_INTERFACE_MODE_SGMII) + val |= GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL); macb_writel(bp, NCFGR, val); return 0; diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 6e1faea00ca8..d83b0db77821 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -215,12 +215,17 @@ /* GEM specific NCFGR bitfields. */ #define GEM_GBE_OFFSET 10 /* Gigabit mode enable */ #define GEM_GBE_SIZE 1 +#define GEM_PCSSEL_OFFSET 11 +#define GEM_PCSSEL_SIZE 1 #define GEM_CLK_OFFSET 18 /* MDC clock division */ #define GEM_CLK_SIZE 3 #define GEM_DBW_OFFSET 21 /* Data bus width */ #define GEM_DBW_SIZE 2 #define GEM_RXCOEN_OFFSET 24 #define GEM_RXCOEN_SIZE 1 +#define GEM_SGMIIEN_OFFSET 27 +#define GEM_SGMIIEN_SIZE 1 + /* Constants for data bus width. */ #define GEM_DBW32 0 /* 32 bit AMBA AHB data bus width */ From 206b49500df558dbc15d8836b09f6397ec5ed8bb Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 18 Nov 2015 16:40:19 +0100 Subject: [PATCH 022/110] net/ip6_tunnel: fix dst leak the commit cdf3464e6c6b ("ipv6: Fix dst_entry refcnt bugs in ip6_tunnel") introduced percpu storage for ip6_tunnel dst cache, but while clearing such cache it used raw_cpu_ptr to walk the per cpu entries, so cached dst on non current cpu are not actually reset. This patch replaces raw_cpu_ptr with per_cpu_ptr, properly cleaning such storage. Fixes: cdf3464e6c6b ("ipv6: Fix dst_entry refcnt bugs in ip6_tunnel") Signed-off-by: Paolo Abeni Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index eabffbb89795..137fca42aaa6 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -177,7 +177,7 @@ void ip6_tnl_dst_reset(struct ip6_tnl *t) int i; for_each_possible_cpu(i) - ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), NULL); + ip6_tnl_per_cpu_dst_set(per_cpu_ptr(t->dst_cache, i), NULL); } EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset); From 451c2b5caf37b526ae34a1081b71115e1de2d063 Mon Sep 17 00:00:00 2001 From: Aya Mahfouz Date: Wed, 18 Nov 2015 08:36:44 +0200 Subject: [PATCH 023/110] net: dns_resolver: convert time_t to time64_t Changes the definition of the pointer _expiry from time_t to time64_t. This is to handle the Y2038 problem where time_t will overflow in the year 2038. The change is safe because the kernel subsystems that call dns_query pass NULL. Signed-off-by: Arnd Bergmann Signed-off-by: Aya Mahfouz Signed-off-by: David S. Miller --- include/linux/dns_resolver.h | 2 +- net/dns_resolver/dns_query.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/dns_resolver.h b/include/linux/dns_resolver.h index cc92268af89a..6ac3cad9aef1 100644 --- a/include/linux/dns_resolver.h +++ b/include/linux/dns_resolver.h @@ -27,7 +27,7 @@ #ifdef __KERNEL__ extern int dns_query(const char *type, const char *name, size_t namelen, - const char *options, char **_result, time_t *_expiry); + const char *options, char **_result, time64_t *_expiry); #endif /* KERNEL */ diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c index 4677b6fa6dda..ecc28cff08ab 100644 --- a/net/dns_resolver/dns_query.c +++ b/net/dns_resolver/dns_query.c @@ -67,7 +67,7 @@ * Returns the size of the result on success, -ve error code otherwise. */ int dns_query(const char *type, const char *name, size_t namelen, - const char *options, char **_result, time_t *_expiry) + const char *options, char **_result, time64_t *_expiry) { struct key *rkey; const struct user_key_payload *upayload; From 508dc0648ca8a2d305faee472cea5e0be579014f Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Thu, 19 Nov 2015 01:39:51 +0300 Subject: [PATCH 024/110] ravb: fix WARNING in __free_irq() When the R8A7795 support was added to the driver, little attention was paid to the ravb_open() error path: free_irq() for the EMAC interrupt was called uncoditionally, unlike request_irq(), and in a wrong order as well... As a result, on the R-Car gen2 SoCs I started getting the following in case of a device opening error: WARNING: CPU: 0 PID: 1 at kernel/irq/manage.c:1448 __free_irq+0x8c/0x228() Trying to free already-free IRQ 0 Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.4.0-rc1-dirty #1005 Hardware name: Generic R8A7791 (Flattened Device Tree) Backtrace: [] (dump_backtrace) from [] (show_stack+0x18/0x1c) r6:c063cdd6 r5:00000009 r4:00000000 r3:00204140 [] (show_stack) from [] (dump_stack+0x74/0x90) [] (dump_stack) from [] (warn_slowpath_common+0x8c/0xb8) r4:ef04fd38 r3:c0714770 [] (warn_slowpath_common) from [] (warn_slowpath_fmt+0x38/0x40) r8:ee8ad800 r7:ef0030a0 r6:00000000 r5:00000000 r4:ef003040 [] (warn_slowpath_fmt) from [] (__free_irq+0x8c/0x228) r3:00000000 r2:c063ce9f [] (__free_irq) from [] (free_irq+0x70/0xa4) r10:0000016b r8:00000000 r7:00000000 r6:ee8ad800 r5:00000000 r4:ef003040 [] (free_irq) from [] (ravb_open+0x224/0x274) r7:fffffffe r6:00000000 r5:fffffffe r4:ee8ad800 [] (ravb_open) from [] (__dev_open+0x84/0x104) r7:ee8ad830 r6:c0566334 r5:00000000 r4:ee8ad800 [] (__dev_open) from [] (__dev_change_flags+0x94/0x13c) r7:00001002 r6:00000001 r5:00001003 r4:ee8ad800 [] (__dev_change_flags) from [] (dev_change_flags+0x20/0x50) r7:c072e6e0 r6:00000138 r5:00001002 r4:ee8ad800 [] (dev_change_flags) from [] (ip_auto_config+0x174/0xfb8) r8:00001002 r7:c072e6e0 r6:c0703344 r5:00000001 r4:ee8ad800 r3:00000101 [] (ip_auto_config) from [] (do_one_initcall+0x100/0x1cc) r10:c06fb83c r9:00000000 r8:c06ebef8 r7:c0736000 r6:c0710918 r5:c0710918 r4:ef2f8f80 [] (do_one_initcall) from [] (kernel_init_freeable+0x11c/0x1 ec) r10:c06fb83c r9:00000000 r8:0000009a r7:c0736000 r6:c0706bf0 r5:c06fb834 r4:00000007 [] (kernel_init_freeable) from [] (kernel_init+0x14/0xec) r10:00000000 r8:00000000 r7:00000000 r6:00000000 r5:c0514c40 r4:c0736000 [] (kernel_init) from [] (ret_from_fork+0x14/0x3c) r4:00000000 r3:ef04e000 Fix up the free_irq() call order and add a new label on the error path. Fixes: 22d4df8ff3a3 ("ravb: Add support for r8a7795 SoC") Signed-off-by: Sergei Shtylyov Acked-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index ee8d1ec61fab..ed5da4d47668 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1225,7 +1225,7 @@ static int ravb_open(struct net_device *ndev) /* Device init */ error = ravb_dmac_init(ndev); if (error) - goto out_free_irq; + goto out_free_irq2; ravb_emac_init(ndev); /* Initialise PTP Clock driver */ @@ -1243,9 +1243,11 @@ static int ravb_open(struct net_device *ndev) out_ptp_stop: /* Stop PTP Clock driver */ ravb_ptp_stop(ndev); +out_free_irq2: + if (priv->chip_id == RCAR_GEN3) + free_irq(priv->emac_irq, ndev); out_free_irq: free_irq(ndev->irq, ndev); - free_irq(priv->emac_irq, ndev); out_napi_off: napi_disable(&priv->napi[RAVB_NC]); napi_disable(&priv->napi[RAVB_BE]); From f4b16fce7a5a1ec8069b1f577476bdc1d2688cd1 Mon Sep 17 00:00:00 2001 From: Zi Shen Lim Date: Wed, 18 Nov 2015 00:56:02 -0800 Subject: [PATCH 025/110] arm64: bpf: fix buffer pointer During code review, I noticed we were passing a bad buffer pointer to bpf_load_pointer helper function called by jitted code. Point to the buffer allocated by JIT, so we don't silently corrupt other parts of the stack. Signed-off-by: Zi Shen Lim Acked-by: Yang Shi Signed-off-by: David S. Miller --- arch/arm64/net/bpf_jit_comp.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index d6a53ef2350b..7cf032bebf8c 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -139,6 +139,12 @@ static inline int epilogue_offset(const struct jit_ctx *ctx) /* Stack must be multiples of 16B */ #define STACK_ALIGN(sz) (((sz) + 15) & ~15) +#define _STACK_SIZE \ + (MAX_BPF_STACK \ + + 4 /* extra for skb_copy_bits buffer */) + +#define STACK_SIZE STACK_ALIGN(_STACK_SIZE) + static void build_prologue(struct jit_ctx *ctx) { const u8 r6 = bpf2a64[BPF_REG_6]; @@ -150,10 +156,6 @@ static void build_prologue(struct jit_ctx *ctx) const u8 rx = bpf2a64[BPF_REG_X]; const u8 tmp1 = bpf2a64[TMP_REG_1]; const u8 tmp2 = bpf2a64[TMP_REG_2]; - int stack_size = MAX_BPF_STACK; - - stack_size += 4; /* extra for skb_copy_bits buffer */ - stack_size = STACK_ALIGN(stack_size); /* * BPF prog stack layout @@ -165,12 +167,13 @@ static void build_prologue(struct jit_ctx *ctx) * | ... | callee saved registers * +-----+ * | | x25/x26 - * BPF fp register => -80:+-----+ + * BPF fp register => -80:+-----+ <= (BPF_FP) * | | * | ... | BPF prog stack * | | - * | | - * current A64_SP => +-----+ + * +-----+ <= (BPF_FP - MAX_BPF_STACK) + * |RSVD | JIT scratchpad + * current A64_SP => +-----+ <= (BPF_FP - STACK_SIZE) * | | * | ... | Function call stack * | | @@ -196,7 +199,7 @@ static void build_prologue(struct jit_ctx *ctx) emit(A64_MOV(1, fp, A64_SP), ctx); /* Set up function call stack */ - emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx); + emit(A64_SUB_I(1, A64_SP, A64_SP, STACK_SIZE), ctx); /* Clear registers A and X */ emit_a64_mov_i64(ra, 0, ctx); @@ -213,13 +216,9 @@ static void build_epilogue(struct jit_ctx *ctx) const u8 fp = bpf2a64[BPF_REG_FP]; const u8 tmp1 = bpf2a64[TMP_REG_1]; const u8 tmp2 = bpf2a64[TMP_REG_2]; - int stack_size = MAX_BPF_STACK; - - stack_size += 4; /* extra for skb_copy_bits buffer */ - stack_size = STACK_ALIGN(stack_size); /* We're done with BPF stack */ - emit(A64_ADD_I(1, A64_SP, A64_SP, stack_size), ctx); + emit(A64_ADD_I(1, A64_SP, A64_SP, STACK_SIZE), ctx); /* Restore fs (x25) and x26 */ emit(A64_POP(fp, A64_R(26), A64_SP), ctx); @@ -658,7 +657,7 @@ emit_cond_jmp: return -EINVAL; } emit_a64_mov_i64(r3, size, ctx); - emit(A64_ADD_I(1, r4, fp, MAX_BPF_STACK), ctx); + emit(A64_SUB_I(1, r4, fp, STACK_SIZE), ctx); emit_a64_mov_i64(r5, (unsigned long)bpf_load_pointer, ctx); emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); emit(A64_MOV(1, A64_FP, A64_SP), ctx); From 68242a5a1e2edce39b069385cbafb82304eac0f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Wed, 18 Nov 2015 21:13:07 +0100 Subject: [PATCH 026/110] net: qmi_wwan: add XS Stick W100-2 from 4G Systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thomas reports " 4gsystems sells two total different LTE-surfsticks under the same name. .. The newer version of XS Stick W100 is from "omega" .. Under windows the driver switches to the same ID, and uses MI03\6 for network and MI01\6 for modem. .. echo "1c9e 9b01" > /sys/bus/usb/drivers/qmi_wwan/new_id echo "1c9e 9b01" > /sys/bus/usb-serial/drivers/option1/new_id T: Bus=01 Lev=01 Prnt=01 Port=03 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1c9e ProdID=9b01 Rev=02.32 S: Manufacturer=USB Modem S: Product=USB Modem S: SerialNumber= C: #Ifs= 5 Cfg#= 1 Atr=80 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#= 4 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage Now all important things are there: wwp0s29f7u2i3 (net), ttyUSB2 (at), cdc-wdm0 (qmi), ttyUSB1 (at) There is also ttyUSB0, but it is not usable, at least not for at. The device works well with qmi and ModemManager-NetworkManager. " Reported-by: Thomas Schäfer Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 34799eaace41..9a5be8b85186 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -725,6 +725,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ {QMI_FIXED_INTF(0x1bc7, 0x1201, 2)}, /* Telit LE920 */ + {QMI_FIXED_INTF(0x1c9e, 0x9b01, 3)}, /* XS Stick W100-2 from 4G Systems */ {QMI_FIXED_INTF(0x0b3c, 0xc000, 4)}, /* Olivetti Olicard 100 */ {QMI_FIXED_INTF(0x0b3c, 0xc001, 4)}, /* Olivetti Olicard 120 */ {QMI_FIXED_INTF(0x0b3c, 0xc002, 4)}, /* Olivetti Olicard 140 */ From 1b8e6a01e19f001e9f93b39c32387961c91ed3cc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 12:40:13 -0800 Subject: [PATCH 027/110] tcp: md5: fix lockdep annotation When a passive TCP is created, we eventually call tcp_md5_do_add() with sk pointing to the child. It is not owner by the user yet (we will add this socket into listener accept queue a bit later anyway) But we do own the spinlock, so amend the lockdep annotation to avoid following splat : [ 8451.090932] net/ipv4/tcp_ipv4.c:923 suspicious rcu_dereference_protected() usage! [ 8451.090932] [ 8451.090932] other info that might help us debug this: [ 8451.090932] [ 8451.090934] [ 8451.090934] rcu_scheduler_active = 1, debug_locks = 1 [ 8451.090936] 3 locks held by socket_sockopt_/214795: [ 8451.090936] #0: (rcu_read_lock){.+.+..}, at: [] __netif_receive_skb_core+0x151/0xe90 [ 8451.090947] #1: (rcu_read_lock){.+.+..}, at: [] ip_local_deliver_finish+0x43/0x2b0 [ 8451.090952] #2: (slock-AF_INET){+.-...}, at: [] sk_clone_lock+0x1c5/0x500 [ 8451.090958] [ 8451.090958] stack backtrace: [ 8451.090960] CPU: 7 PID: 214795 Comm: socket_sockopt_ [ 8451.091215] Call Trace: [ 8451.091216] [] dump_stack+0x55/0x76 [ 8451.091229] [] lockdep_rcu_suspicious+0xeb/0x110 [ 8451.091235] [] tcp_md5_do_add+0x1bf/0x1e0 [ 8451.091239] [] tcp_v4_syn_recv_sock+0x1f1/0x4c0 [ 8451.091242] [] ? tcp_v4_md5_hash_skb+0x167/0x190 [ 8451.091246] [] tcp_check_req+0x3c8/0x500 [ 8451.091249] [] ? tcp_v4_inbound_md5_hash+0x11e/0x190 [ 8451.091253] [] tcp_v4_rcv+0x3c0/0x9f0 [ 8451.091256] [] ? ip_local_deliver_finish+0x43/0x2b0 [ 8451.091260] [] ip_local_deliver_finish+0xb6/0x2b0 [ 8451.091263] [] ? ip_local_deliver_finish+0x43/0x2b0 [ 8451.091267] [] ip_local_deliver+0x48/0x80 [ 8451.091270] [] ip_rcv_finish+0x160/0x700 [ 8451.091273] [] ip_rcv+0x29e/0x3d0 [ 8451.091277] [] __netif_receive_skb_core+0xb47/0xe90 Fixes: a8afca0329988 ("tcp: md5: protects md5sig_info with RCU") Signed-off-by: Eric Dumazet Reported-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ba09016d1bfd..db003438aaf5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -921,7 +921,8 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, } md5sig = rcu_dereference_protected(tp->md5sig_info, - sock_owned_by_user(sk)); + sock_owned_by_user(sk) || + lockdep_is_held(&sk->sk_lock.slock)); if (!md5sig) { md5sig = kmalloc(sizeof(*md5sig), gfp); if (!md5sig) From 5ad11b50fda1306b5317124f97f0a7a4c022b022 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 17 Nov 2015 10:24:39 +0200 Subject: [PATCH 028/110] mac80211: ensure we don't update tx power on a non-running sdata We can't update the Tx power on the device unless it is running. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=101521. Cc: stable@vger.kernel.org Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index d0dc1bfaeec2..53ee049efbff 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -76,7 +76,8 @@ bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata) void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata, bool update_bss) { - if (__ieee80211_recalc_txpower(sdata) || update_bss) + if (__ieee80211_recalc_txpower(sdata) || + (update_bss && ieee80211_sdata_running(sdata))) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_TXPOWER); } From ac0621971a26526cad8cf9db7626d5e50562a441 Mon Sep 17 00:00:00 2001 From: Gregory Greenman Date: Tue, 17 Nov 2015 10:24:38 +0200 Subject: [PATCH 029/110] mac80211: always set the buf_size in AddBA req to 64 Advertising reordering window in ADDBA less than 64 can crash some APs, an example is LinkSys WRT120N (with FW v1.0.07 build 002 Jun 18 2012). On the other hand, a driver may need to limit Tx A-MPDU size for its own reasons, like specific HW limitations. Signed-off-by: Gregory Greenman Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 ++++-- net/mac80211/agg-tx.c | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 82045fca388b..760bc4d5a2cf 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2003,8 +2003,10 @@ enum ieee80211_hw_flags { * it shouldn't be set. * * @max_tx_aggregation_subframes: maximum number of subframes in an - * aggregate an HT driver will transmit, used by the peer as a - * hint to size its reorder buffer. + * aggregate an HT driver will transmit. Though ADDBA will advertise + * a constant value of 64 as some older APs can crash if the window + * size is smaller (an example is LinkSys WRT120N with FW v1.0.07 + * build 002 Jun 18 2012). * * @offchannel_tx_hw_queue: HW queue ID to use for offchannel TX * (if %IEEE80211_HW_QUEUE_CONTROL is set) diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index a758eb84e8f0..ff757181b0a8 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -500,7 +500,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) /* send AddBA request */ ieee80211_send_addba_request(sdata, sta->sta.addr, tid, tid_tx->dialog_token, start_seq_num, - local->hw.max_tx_aggregation_subframes, + IEEE80211_MAX_AMPDU_BUF, tid_tx->timeout); } @@ -926,6 +926,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, amsdu = capab & IEEE80211_ADDBA_PARAM_AMSDU_MASK; tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6; + buf_size = min(buf_size, local->hw.max_tx_aggregation_subframes); mutex_lock(&sta->ampdu_mlme.mtx); From 0db19b850468a24b70d8471f5ebe71f0a035bbab Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 18 Nov 2015 17:27:25 -0800 Subject: [PATCH 030/110] net: cpsw: Fix ethernet regression for dm814x Commit b6745f6e4e63 ("drivers: net: cpsw: davinci_emac: move reading mac id to common file") started using of_machine_is_compatible for detecting type but missed at dm8148 causing Ethernet to stop working. Let's fix the issue by adding handling for dm814x. Cc: Mugunthan V N Signed-off-by: Tony Lindgren Acked-by: Mugunthnan V N Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw-common.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw-common.c b/drivers/net/ethernet/ti/cpsw-common.c index c08be62bceba..1562ab4151e1 100644 --- a/drivers/net/ethernet/ti/cpsw-common.c +++ b/drivers/net/ethernet/ti/cpsw-common.c @@ -78,6 +78,9 @@ static int cpsw_am33xx_cm_get_macid(struct device *dev, u16 offset, int slave, int ti_cm_get_macid(struct device *dev, int slave, u8 *mac_addr) { + if (of_machine_is_compatible("ti,dm8148")) + return cpsw_am33xx_cm_get_macid(dev, 0x630, slave, mac_addr); + if (of_machine_is_compatible("ti,am33xx")) return cpsw_am33xx_cm_get_macid(dev, 0x630, slave, mac_addr); From 0e45f4da5981895e885dd72fe912a3f8e32bae73 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 18 Nov 2015 18:17:30 -0800 Subject: [PATCH 031/110] tcp: disable Fast Open on timeouts after handshake Some middle-boxes black-hole the data after the Fast Open handshake (https://www.ietf.org/proceedings/94/slides/slides-94-tcpm-13.pdf). The exact reason is unknown. The work-around is to disable Fast Open temporarily after multiple recurring timeouts with few or no data delivered in the established state. Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Reported-by: Christoph Paasch Signed-off-by: David S. Miller --- net/ipv4/tcp_timer.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index c9c716a483e4..448603a81966 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -176,6 +176,18 @@ static int tcp_write_timeout(struct sock *sk) syn_set = true; } else { if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) { + /* Some middle-boxes may black-hole Fast Open _after_ + * the handshake. Therefore we conservatively disable + * Fast Open on this path on recurring timeouts with + * few or zero bytes acked after Fast Open. + */ + if (tp->syn_data_acked && + tp->bytes_acked <= tp->rx_opt.mss_clamp) { + tcp_fastopen_cache_set(sk, 0, NULL, true, 0); + if (icsk->icsk_retransmits == sysctl_tcp_retries1) + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPFASTOPENACTIVEFAIL); + } /* Black hole detection */ tcp_mtu_probing(icsk, sk); From dd52bc2b4ed16db66f9347aa263d8f1dc889b4b6 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 18 Nov 2015 18:17:31 -0800 Subject: [PATCH 032/110] tcp: fix Fast Open snmp over-counting bug Fix incrementing TCPFastOpenActiveFailed snmp stats multiple times when the handshake experiences multiple SYN timeouts. Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 448603a81966..193ba1fa8a9a 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -168,7 +168,7 @@ static int tcp_write_timeout(struct sock *sk) dst_negative_advice(sk); if (tp->syn_fastopen || tp->syn_data) tcp_fastopen_cache_set(sk, 0, NULL, true, 0); - if (tp->syn_data) + if (tp->syn_data && icsk->icsk_retransmits == 1) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL); } From 5d4c9bfbabdb1d497f21afd81501e5c54b0c85d9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 21:03:33 -0800 Subject: [PATCH 033/110] tcp: fix potential huge kmalloc() calls in TCP_REPAIR tcp_send_rcvq() is used for re-injecting data into tcp receive queue. Problems : - No check against size is performed, allowed user to fool kernel in attempting very large memory allocations, eventually triggering OOM when memory is fragmented. - In case of fault during the copy we do not return correct errno. Lets use alloc_skb_with_frags() to cook optimal skbs. Fixes: 292e8d8c8538 ("tcp: Move rcvq sending to tcp_input.c") Fixes: c0e88ff0f256 ("tcp: Repair socket queues") Signed-off-by: Eric Dumazet Cc: Pavel Emelyanov Acked-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fdd88c3803a6..a4a0b6b3bcf2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4481,19 +4481,34 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) { struct sk_buff *skb; + int err = -ENOMEM; + int data_len = 0; bool fragstolen; if (size == 0) return 0; - skb = alloc_skb(size, sk->sk_allocation); + if (size > PAGE_SIZE) { + int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS); + + data_len = npages << PAGE_SHIFT; + size = data_len + (size & ~PAGE_MASK); + } + skb = alloc_skb_with_frags(size - data_len, data_len, + PAGE_ALLOC_COSTLY_ORDER, + &err, sk->sk_allocation); if (!skb) goto err; + skb_put(skb, size - data_len); + skb->data_len = data_len; + skb->len = size; + if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) goto err_free; - if (memcpy_from_msg(skb_put(skb, size), msg, size)) + err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); + if (err) goto err_free; TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt; @@ -4509,7 +4524,8 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) err_free: kfree_skb(skb); err: - return -ENOMEM; + return err; + } static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) From 425d3d83707d378d97d1f7d460ce3083a1948c20 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Thu, 19 Nov 2015 11:56:51 +0200 Subject: [PATCH 034/110] bnx2x: Fix vxlan removal Commmit ac7eccd4d48fc "bnx2x: track vxlan port count" contains a bug - Instead of achieving the required goal, vxlan configuration would not be removed since we're decrementing the port instead of the counter. CC: Jiri Benc Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index c9b036789184..2e611dc5f162 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -10139,8 +10139,8 @@ static void __bnx2x_del_vxlan_port(struct bnx2x *bp, u16 port) DP(BNX2X_MSG_SP, "Invalid vxlan port\n"); return; } - bp->vxlan_dst_port--; - if (bp->vxlan_dst_port) + bp->vxlan_dst_port_count--; + if (bp->vxlan_dst_port_count) return; if (netif_running(bp->dev)) { From de92718883ddbcd11b738d36ffcf57617b97fa12 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 19 Nov 2015 11:42:26 +0100 Subject: [PATCH 035/110] net: tulip: turn compile-time warning into dev_warn() The tulip driver causes annoying build-time warnings for allmodconfig builds for all recent architectures: dec/tulip/winbond-840.c:910:2: warning: #warning Processor architecture undefined dec/tulip/tulip_core.c:101:2: warning: #warning Processor architecture undefined! This is the last remaining warning for arm64, and I'd like to get rid of it. We don't really know the cache line size, architecturally it would be at least 16 bytes, but all implementations I found have 64 or 128 bytes. Configuring tulip for 32-byte lines as we do on ARM32 seems to be the safe but slow default, and nobody who cares about performance these days would use a tulip chip anyway, so we can just use that. To save the next person the job of trying to find out what this is for and picking a default for their architecture just to kill off the warning, I'm now removing the preprocessor #warning and turning it into a pr_warn or dev_warn that prints the equivalent information when the driver gets loaded. Signed-off-by: Arnd Bergmann Acked-by: Grant Grundler Signed-off-by: David S. Miller --- drivers/net/ethernet/dec/tulip/tulip_core.c | 9 +++++++-- drivers/net/ethernet/dec/tulip/winbond-840.c | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index ed41559bae77..b553409e04ad 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -98,8 +98,7 @@ static int csr0 = 0x01A00000 | 0x4800; #elif defined(__mips__) static int csr0 = 0x00200000 | 0x4000; #else -#warning Processor architecture undefined! -static int csr0 = 0x00A00000 | 0x4800; +static int csr0; #endif /* Operational parameters that usually are not changed. */ @@ -1982,6 +1981,12 @@ static int __init tulip_init (void) pr_info("%s", version); #endif + if (!csr0) { + pr_warn("tulip: unknown CPU architecture, using default csr0\n"); + /* default to 8 longword cache line alignment */ + csr0 = 0x00A00000 | 0x4800; + } + /* copy module parms into globals */ tulip_rx_copybreak = rx_copybreak; tulip_max_interrupt_work = max_interrupt_work; diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c index 9beb3d34d4ba..3c0e4d5c5fef 100644 --- a/drivers/net/ethernet/dec/tulip/winbond-840.c +++ b/drivers/net/ethernet/dec/tulip/winbond-840.c @@ -907,7 +907,7 @@ static void init_registers(struct net_device *dev) #elif defined(CONFIG_SPARC) || defined (CONFIG_PARISC) || defined(CONFIG_ARM) i |= 0x4800; #else -#warning Processor architecture undefined + dev_warn(&dev->dev, "unknown CPU architecture, using default csr0 setting\n"); i |= 0x4800; #endif iowrite32(i, ioaddr + PCIBusCfg); From 52dfc8301248f5008d64a680e832e2f99c55ec9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5ns=20Rullg=C3=A5rd?= Date: Thu, 19 Nov 2015 13:02:59 +0000 Subject: [PATCH 036/110] net: ethernet: add driver for Aurora VLSI NB8800 Ethernet controller This adds a driver for the Aurora VLSI NB8800 Ethernet controller. It is an almost complete rewrite of a driver originally found in a Sigma Designs 2.6.22 tree. Signed-off-by: Mans Rullgard Signed-off-by: David S. Miller --- drivers/net/ethernet/Kconfig | 1 + drivers/net/ethernet/Makefile | 1 + drivers/net/ethernet/aurora/Kconfig | 20 + drivers/net/ethernet/aurora/Makefile | 1 + drivers/net/ethernet/aurora/nb8800.c | 1552 ++++++++++++++++++++++++++ drivers/net/ethernet/aurora/nb8800.h | 316 ++++++ 6 files changed, 1891 insertions(+) create mode 100644 drivers/net/ethernet/aurora/Kconfig create mode 100644 drivers/net/ethernet/aurora/Makefile create mode 100644 drivers/net/ethernet/aurora/nb8800.c create mode 100644 drivers/net/ethernet/aurora/nb8800.h diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index 955d06b9cdba..31c5e476fd64 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -29,6 +29,7 @@ source "drivers/net/ethernet/apm/Kconfig" source "drivers/net/ethernet/apple/Kconfig" source "drivers/net/ethernet/arc/Kconfig" source "drivers/net/ethernet/atheros/Kconfig" +source "drivers/net/ethernet/aurora/Kconfig" source "drivers/net/ethernet/cadence/Kconfig" source "drivers/net/ethernet/adi/Kconfig" source "drivers/net/ethernet/broadcom/Kconfig" diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile index 4a2ee98738f0..071f84eb6f3f 100644 --- a/drivers/net/ethernet/Makefile +++ b/drivers/net/ethernet/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_NET_XGENE) += apm/ obj-$(CONFIG_NET_VENDOR_APPLE) += apple/ obj-$(CONFIG_NET_VENDOR_ARC) += arc/ obj-$(CONFIG_NET_VENDOR_ATHEROS) += atheros/ +obj-$(CONFIG_NET_VENDOR_AURORA) += aurora/ obj-$(CONFIG_NET_CADENCE) += cadence/ obj-$(CONFIG_NET_BFIN) += adi/ obj-$(CONFIG_NET_VENDOR_BROADCOM) += broadcom/ diff --git a/drivers/net/ethernet/aurora/Kconfig b/drivers/net/ethernet/aurora/Kconfig new file mode 100644 index 000000000000..a3c7106fdf85 --- /dev/null +++ b/drivers/net/ethernet/aurora/Kconfig @@ -0,0 +1,20 @@ +config NET_VENDOR_AURORA + bool "Aurora VLSI devices" + help + If you have a network (Ethernet) device belonging to this class, + say Y. + + Note that the answer to this question doesn't directly affect the + kernel: saying N will just cause the configurator to skip all + questions about Aurora devices. If you say Y, you will be asked + for your specific device in the following questions. + +if NET_VENDOR_AURORA + +config AURORA_NB8800 + tristate "Aurora AU-NB8800 support" + select PHYLIB + help + Support for the AU-NB8800 gigabit Ethernet controller. + +endif diff --git a/drivers/net/ethernet/aurora/Makefile b/drivers/net/ethernet/aurora/Makefile new file mode 100644 index 000000000000..6cb528a2fc26 --- /dev/null +++ b/drivers/net/ethernet/aurora/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_AURORA_NB8800) += nb8800.o diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c new file mode 100644 index 000000000000..ecc4a334c507 --- /dev/null +++ b/drivers/net/ethernet/aurora/nb8800.c @@ -0,0 +1,1552 @@ +/* + * Copyright (C) 2015 Mans Rullgard + * + * Mostly rewritten, based on driver from Sigma Designs. Original + * copyright notice below. + * + * + * Driver for tangox SMP864x/SMP865x/SMP867x/SMP868x builtin Ethernet Mac. + * + * Copyright (C) 2005 Maxime Bizon + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nb8800.h" + +static void nb8800_tx_done(struct net_device *dev); +static int nb8800_dma_stop(struct net_device *dev); + +static inline u8 nb8800_readb(struct nb8800_priv *priv, int reg) +{ + return readb_relaxed(priv->base + reg); +} + +static inline u32 nb8800_readl(struct nb8800_priv *priv, int reg) +{ + return readl_relaxed(priv->base + reg); +} + +static inline void nb8800_writeb(struct nb8800_priv *priv, int reg, u8 val) +{ + writeb_relaxed(val, priv->base + reg); +} + +static inline void nb8800_writew(struct nb8800_priv *priv, int reg, u16 val) +{ + writew_relaxed(val, priv->base + reg); +} + +static inline void nb8800_writel(struct nb8800_priv *priv, int reg, u32 val) +{ + writel_relaxed(val, priv->base + reg); +} + +static inline void nb8800_maskb(struct nb8800_priv *priv, int reg, + u32 mask, u32 val) +{ + u32 old = nb8800_readb(priv, reg); + u32 new = (old & ~mask) | (val & mask); + + if (new != old) + nb8800_writeb(priv, reg, new); +} + +static inline void nb8800_maskl(struct nb8800_priv *priv, int reg, + u32 mask, u32 val) +{ + u32 old = nb8800_readl(priv, reg); + u32 new = (old & ~mask) | (val & mask); + + if (new != old) + nb8800_writel(priv, reg, new); +} + +static inline void nb8800_modb(struct nb8800_priv *priv, int reg, u8 bits, + bool set) +{ + nb8800_maskb(priv, reg, bits, set ? bits : 0); +} + +static inline void nb8800_setb(struct nb8800_priv *priv, int reg, u8 bits) +{ + nb8800_maskb(priv, reg, bits, bits); +} + +static inline void nb8800_clearb(struct nb8800_priv *priv, int reg, u8 bits) +{ + nb8800_maskb(priv, reg, bits, 0); +} + +static inline void nb8800_modl(struct nb8800_priv *priv, int reg, u32 bits, + bool set) +{ + nb8800_maskl(priv, reg, bits, set ? bits : 0); +} + +static inline void nb8800_setl(struct nb8800_priv *priv, int reg, u32 bits) +{ + nb8800_maskl(priv, reg, bits, bits); +} + +static inline void nb8800_clearl(struct nb8800_priv *priv, int reg, u32 bits) +{ + nb8800_maskl(priv, reg, bits, 0); +} + +static int nb8800_mdio_wait(struct mii_bus *bus) +{ + struct nb8800_priv *priv = bus->priv; + u32 val; + + return readl_poll_timeout_atomic(priv->base + NB8800_MDIO_CMD, + val, !(val & MDIO_CMD_GO), 1, 1000); +} + +static int nb8800_mdio_cmd(struct mii_bus *bus, u32 cmd) +{ + struct nb8800_priv *priv = bus->priv; + int err; + + err = nb8800_mdio_wait(bus); + if (err) + return err; + + nb8800_writel(priv, NB8800_MDIO_CMD, cmd); + udelay(10); + nb8800_writel(priv, NB8800_MDIO_CMD, cmd | MDIO_CMD_GO); + + return nb8800_mdio_wait(bus); +} + +static int nb8800_mdio_read(struct mii_bus *bus, int phy_id, int reg) +{ + struct nb8800_priv *priv = bus->priv; + u32 val; + int err; + + err = nb8800_mdio_cmd(bus, MDIO_CMD_ADDR(phy_id) | MDIO_CMD_REG(reg)); + if (err) + return err; + + val = nb8800_readl(priv, NB8800_MDIO_STS); + if (val & MDIO_STS_ERR) + return 0xffff; + + return val & 0xffff; +} + +static int nb8800_mdio_write(struct mii_bus *bus, int phy_id, int reg, u16 val) +{ + u32 cmd = MDIO_CMD_ADDR(phy_id) | MDIO_CMD_REG(reg) | + MDIO_CMD_DATA(val) | MDIO_CMD_WR; + + return nb8800_mdio_cmd(bus, cmd); +} + +static void nb8800_mac_tx(struct net_device *dev, bool enable) +{ + struct nb8800_priv *priv = netdev_priv(dev); + + while (nb8800_readl(priv, NB8800_TXC_CR) & TCR_EN) + cpu_relax(); + + nb8800_modb(priv, NB8800_TX_CTL1, TX_EN, enable); +} + +static void nb8800_mac_rx(struct net_device *dev, bool enable) +{ + nb8800_modb(netdev_priv(dev), NB8800_RX_CTL, RX_EN, enable); +} + +static void nb8800_mac_af(struct net_device *dev, bool enable) +{ + nb8800_modb(netdev_priv(dev), NB8800_RX_CTL, RX_AF_EN, enable); +} + +static void nb8800_start_rx(struct net_device *dev) +{ + nb8800_setl(netdev_priv(dev), NB8800_RXC_CR, RCR_EN); +} + +static int nb8800_alloc_rx(struct net_device *dev, unsigned int i, bool napi) +{ + struct nb8800_priv *priv = netdev_priv(dev); + struct nb8800_rx_desc *rxd = &priv->rx_descs[i]; + struct nb8800_rx_buf *rxb = &priv->rx_bufs[i]; + int size = L1_CACHE_ALIGN(RX_BUF_SIZE); + dma_addr_t dma_addr; + struct page *page; + unsigned long offset; + void *data; + + data = napi ? napi_alloc_frag(size) : netdev_alloc_frag(size); + if (!data) + return -ENOMEM; + + page = virt_to_head_page(data); + offset = data - page_address(page); + + dma_addr = dma_map_page(&dev->dev, page, offset, RX_BUF_SIZE, + DMA_FROM_DEVICE); + + if (dma_mapping_error(&dev->dev, dma_addr)) { + skb_free_frag(data); + return -ENOMEM; + } + + rxb->page = page; + rxb->offset = offset; + rxd->desc.s_addr = dma_addr; + + return 0; +} + +static void nb8800_receive(struct net_device *dev, unsigned int i, + unsigned int len) +{ + struct nb8800_priv *priv = netdev_priv(dev); + struct nb8800_rx_desc *rxd = &priv->rx_descs[i]; + struct page *page = priv->rx_bufs[i].page; + int offset = priv->rx_bufs[i].offset; + void *data = page_address(page) + offset; + dma_addr_t dma = rxd->desc.s_addr; + struct sk_buff *skb; + unsigned int size; + int err; + + size = len <= RX_COPYBREAK ? len : RX_COPYHDR; + + skb = napi_alloc_skb(&priv->napi, size); + if (!skb) { + netdev_err(dev, "rx skb allocation failed\n"); + dev->stats.rx_dropped++; + return; + } + + if (len <= RX_COPYBREAK) { + dma_sync_single_for_cpu(&dev->dev, dma, len, DMA_FROM_DEVICE); + memcpy(skb_put(skb, len), data, len); + dma_sync_single_for_device(&dev->dev, dma, len, + DMA_FROM_DEVICE); + } else { + err = nb8800_alloc_rx(dev, i, true); + if (err) { + netdev_err(dev, "rx buffer allocation failed\n"); + dev->stats.rx_dropped++; + return; + } + + dma_unmap_page(&dev->dev, dma, RX_BUF_SIZE, DMA_FROM_DEVICE); + memcpy(skb_put(skb, RX_COPYHDR), data, RX_COPYHDR); + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + offset + RX_COPYHDR, len - RX_COPYHDR, + RX_BUF_SIZE); + } + + skb->protocol = eth_type_trans(skb, dev); + napi_gro_receive(&priv->napi, skb); +} + +static void nb8800_rx_error(struct net_device *dev, u32 report) +{ + if (report & RX_LENGTH_ERR) + dev->stats.rx_length_errors++; + + if (report & RX_FCS_ERR) + dev->stats.rx_crc_errors++; + + if (report & RX_FIFO_OVERRUN) + dev->stats.rx_fifo_errors++; + + if (report & RX_ALIGNMENT_ERROR) + dev->stats.rx_frame_errors++; + + dev->stats.rx_errors++; +} + +static int nb8800_poll(struct napi_struct *napi, int budget) +{ + struct net_device *dev = napi->dev; + struct nb8800_priv *priv = netdev_priv(dev); + struct nb8800_rx_desc *rxd; + unsigned int last = priv->rx_eoc; + unsigned int next; + int work = 0; + + nb8800_tx_done(dev); + +again: + while (work < budget) { + struct nb8800_rx_buf *rxb; + unsigned int len; + + next = (last + 1) % RX_DESC_COUNT; + + rxb = &priv->rx_bufs[next]; + rxd = &priv->rx_descs[next]; + + if (!rxd->report) + break; + + len = RX_BYTES_TRANSFERRED(rxd->report); + + if (IS_RX_ERROR(rxd->report)) + nb8800_rx_error(dev, rxd->report); + else + nb8800_receive(dev, next, len); + + dev->stats.rx_packets++; + dev->stats.rx_bytes += len; + + if (rxd->report & RX_MULTICAST_PKT) + dev->stats.multicast++; + + rxd->report = 0; + last = next; + work++; + } + + if (work) { + priv->rx_descs[last].desc.config |= DESC_EOC; + wmb(); /* ensure new EOC is written before clearing old */ + priv->rx_descs[priv->rx_eoc].desc.config &= ~DESC_EOC; + priv->rx_eoc = last; + nb8800_start_rx(dev); + } + + if (work < budget) { + nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_irq); + + /* If a packet arrived after we last checked but + * before writing RX_ITR, the interrupt will be + * delayed, so we retrieve it now. + */ + if (priv->rx_descs[next].report) + goto again; + + napi_complete_done(napi, work); + } + + return work; +} + +static void __nb8800_tx_dma_start(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + struct nb8800_tx_buf *txb; + u32 txc_cr; + + txb = &priv->tx_bufs[priv->tx_queue]; + if (!txb->ready) + return; + + txc_cr = nb8800_readl(priv, NB8800_TXC_CR); + if (txc_cr & TCR_EN) + return; + + nb8800_writel(priv, NB8800_TX_DESC_ADDR, txb->dma_desc); + wmb(); /* ensure desc addr is written before starting DMA */ + nb8800_writel(priv, NB8800_TXC_CR, txc_cr | TCR_EN); + + priv->tx_queue = (priv->tx_queue + txb->chain_len) % TX_DESC_COUNT; +} + +static void nb8800_tx_dma_start(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + + spin_lock_irq(&priv->tx_lock); + __nb8800_tx_dma_start(dev); + spin_unlock_irq(&priv->tx_lock); +} + +static void nb8800_tx_dma_start_irq(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + + spin_lock(&priv->tx_lock); + __nb8800_tx_dma_start(dev); + spin_unlock(&priv->tx_lock); +} + +static int nb8800_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + struct nb8800_tx_desc *txd; + struct nb8800_tx_buf *txb; + struct nb8800_dma_desc *desc; + dma_addr_t dma_addr; + unsigned int dma_len; + unsigned int align; + unsigned int next; + + if (atomic_read(&priv->tx_free) <= NB8800_DESC_LOW) { + netif_stop_queue(dev); + return NETDEV_TX_BUSY; + } + + align = (8 - (uintptr_t)skb->data) & 7; + + dma_len = skb->len - align; + dma_addr = dma_map_single(&dev->dev, skb->data + align, + dma_len, DMA_TO_DEVICE); + + if (dma_mapping_error(&dev->dev, dma_addr)) { + netdev_err(dev, "tx dma mapping error\n"); + kfree_skb(skb); + dev->stats.tx_dropped++; + return NETDEV_TX_OK; + } + + if (atomic_dec_return(&priv->tx_free) <= NB8800_DESC_LOW) { + netif_stop_queue(dev); + skb->xmit_more = 0; + } + + next = priv->tx_next; + txb = &priv->tx_bufs[next]; + txd = &priv->tx_descs[next]; + desc = &txd->desc[0]; + + next = (next + 1) % TX_DESC_COUNT; + + if (align) { + memcpy(txd->buf, skb->data, align); + + desc->s_addr = + txb->dma_desc + offsetof(struct nb8800_tx_desc, buf); + desc->n_addr = txb->dma_desc + sizeof(txd->desc[0]); + desc->config = DESC_BTS(2) | DESC_DS | align; + + desc++; + } + + desc->s_addr = dma_addr; + desc->n_addr = priv->tx_bufs[next].dma_desc; + desc->config = DESC_BTS(2) | DESC_DS | DESC_EOF | dma_len; + + if (!skb->xmit_more) + desc->config |= DESC_EOC; + + txb->skb = skb; + txb->dma_addr = dma_addr; + txb->dma_len = dma_len; + + if (!priv->tx_chain) { + txb->chain_len = 1; + priv->tx_chain = txb; + } else { + priv->tx_chain->chain_len++; + } + + netdev_sent_queue(dev, skb->len); + + priv->tx_next = next; + + if (!skb->xmit_more) { + smp_wmb(); + priv->tx_chain->ready = true; + priv->tx_chain = NULL; + nb8800_tx_dma_start(dev); + } + + return NETDEV_TX_OK; +} + +static void nb8800_tx_error(struct net_device *dev, u32 report) +{ + if (report & TX_LATE_COLLISION) + dev->stats.collisions++; + + if (report & TX_PACKET_DROPPED) + dev->stats.tx_dropped++; + + if (report & TX_FIFO_UNDERRUN) + dev->stats.tx_fifo_errors++; + + dev->stats.tx_errors++; +} + +static void nb8800_tx_done(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + unsigned int limit = priv->tx_next; + unsigned int done = priv->tx_done; + unsigned int packets = 0; + unsigned int len = 0; + + while (done != limit) { + struct nb8800_tx_desc *txd = &priv->tx_descs[done]; + struct nb8800_tx_buf *txb = &priv->tx_bufs[done]; + struct sk_buff *skb; + + if (!txd->report) + break; + + skb = txb->skb; + len += skb->len; + + dma_unmap_single(&dev->dev, txb->dma_addr, txb->dma_len, + DMA_TO_DEVICE); + + if (IS_TX_ERROR(txd->report)) { + nb8800_tx_error(dev, txd->report); + kfree_skb(skb); + } else { + consume_skb(skb); + } + + dev->stats.tx_packets++; + dev->stats.tx_bytes += TX_BYTES_TRANSFERRED(txd->report); + dev->stats.collisions += TX_EARLY_COLLISIONS(txd->report); + + txb->skb = NULL; + txb->ready = false; + txd->report = 0; + + done = (done + 1) % TX_DESC_COUNT; + packets++; + } + + if (packets) { + smp_mb__before_atomic(); + atomic_add(packets, &priv->tx_free); + netdev_completed_queue(dev, packets, len); + netif_wake_queue(dev); + priv->tx_done = done; + } +} + +static irqreturn_t nb8800_irq(int irq, void *dev_id) +{ + struct net_device *dev = dev_id; + struct nb8800_priv *priv = netdev_priv(dev); + irqreturn_t ret = IRQ_NONE; + u32 val; + + /* tx interrupt */ + val = nb8800_readl(priv, NB8800_TXC_SR); + if (val) { + nb8800_writel(priv, NB8800_TXC_SR, val); + + if (val & TSR_DI) + nb8800_tx_dma_start_irq(dev); + + if (val & TSR_TI) + napi_schedule_irqoff(&priv->napi); + + if (unlikely(val & TSR_DE)) + netdev_err(dev, "TX DMA error\n"); + + /* should never happen with automatic status retrieval */ + if (unlikely(val & TSR_TO)) + netdev_err(dev, "TX Status FIFO overflow\n"); + + ret = IRQ_HANDLED; + } + + /* rx interrupt */ + val = nb8800_readl(priv, NB8800_RXC_SR); + if (val) { + nb8800_writel(priv, NB8800_RXC_SR, val); + + if (likely(val & (RSR_RI | RSR_DI))) { + nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_poll); + napi_schedule_irqoff(&priv->napi); + } + + if (unlikely(val & RSR_DE)) + netdev_err(dev, "RX DMA error\n"); + + /* should never happen with automatic status retrieval */ + if (unlikely(val & RSR_RO)) + netdev_err(dev, "RX Status FIFO overflow\n"); + + ret = IRQ_HANDLED; + } + + return ret; +} + +static void nb8800_mac_config(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + bool gigabit = priv->speed == SPEED_1000; + u32 mac_mode_mask = RGMII_MODE | HALF_DUPLEX | GMAC_MODE; + u32 mac_mode = 0; + u32 slot_time; + u32 phy_clk; + u32 ict; + + if (!priv->duplex) + mac_mode |= HALF_DUPLEX; + + if (gigabit) { + if (priv->phy_mode == PHY_INTERFACE_MODE_RGMII) + mac_mode |= RGMII_MODE; + + mac_mode |= GMAC_MODE; + phy_clk = 125000000; + + /* Should be 512 but register is only 8 bits */ + slot_time = 255; + } else { + phy_clk = 25000000; + slot_time = 128; + } + + ict = DIV_ROUND_UP(phy_clk, clk_get_rate(priv->clk)); + + nb8800_writeb(priv, NB8800_IC_THRESHOLD, ict); + nb8800_writeb(priv, NB8800_SLOT_TIME, slot_time); + nb8800_maskb(priv, NB8800_MAC_MODE, mac_mode_mask, mac_mode); +} + +static void nb8800_pause_config(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + struct phy_device *phydev = priv->phydev; + u32 rxcr; + + if (priv->pause_aneg) { + if (!phydev || !phydev->link) + return; + + priv->pause_rx = phydev->pause; + priv->pause_tx = phydev->pause ^ phydev->asym_pause; + } + + nb8800_modb(priv, NB8800_RX_CTL, RX_PAUSE_EN, priv->pause_rx); + + rxcr = nb8800_readl(priv, NB8800_RXC_CR); + if (!!(rxcr & RCR_FL) == priv->pause_tx) + return; + + if (netif_running(dev)) { + napi_disable(&priv->napi); + netif_tx_lock_bh(dev); + nb8800_dma_stop(dev); + nb8800_modl(priv, NB8800_RXC_CR, RCR_FL, priv->pause_tx); + nb8800_start_rx(dev); + netif_tx_unlock_bh(dev); + napi_enable(&priv->napi); + } else { + nb8800_modl(priv, NB8800_RXC_CR, RCR_FL, priv->pause_tx); + } +} + +static void nb8800_link_reconfigure(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + struct phy_device *phydev = priv->phydev; + int change = 0; + + if (phydev->link) { + if (phydev->speed != priv->speed) { + priv->speed = phydev->speed; + change = 1; + } + + if (phydev->duplex != priv->duplex) { + priv->duplex = phydev->duplex; + change = 1; + } + + if (change) + nb8800_mac_config(dev); + + nb8800_pause_config(dev); + } + + if (phydev->link != priv->link) { + priv->link = phydev->link; + change = 1; + } + + if (change) + phy_print_status(priv->phydev); +} + +static void nb8800_update_mac_addr(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + int i; + + for (i = 0; i < ETH_ALEN; i++) + nb8800_writeb(priv, NB8800_SRC_ADDR(i), dev->dev_addr[i]); + + for (i = 0; i < ETH_ALEN; i++) + nb8800_writeb(priv, NB8800_UC_ADDR(i), dev->dev_addr[i]); +} + +static int nb8800_set_mac_address(struct net_device *dev, void *addr) +{ + struct sockaddr *sock = addr; + + if (netif_running(dev)) + return -EBUSY; + + ether_addr_copy(dev->dev_addr, sock->sa_data); + nb8800_update_mac_addr(dev); + + return 0; +} + +static void nb8800_mc_init(struct net_device *dev, int val) +{ + struct nb8800_priv *priv = netdev_priv(dev); + + nb8800_writeb(priv, NB8800_MC_INIT, val); + readb_poll_timeout_atomic(priv->base + NB8800_MC_INIT, val, !val, + 1, 1000); +} + +static void nb8800_set_rx_mode(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + struct netdev_hw_addr *ha; + int i; + + if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) { + nb8800_mac_af(dev, false); + return; + } + + nb8800_mac_af(dev, true); + nb8800_mc_init(dev, 0); + + netdev_for_each_mc_addr(ha, dev) { + for (i = 0; i < ETH_ALEN; i++) + nb8800_writeb(priv, NB8800_MC_ADDR(i), ha->addr[i]); + + nb8800_mc_init(dev, 0xff); + } +} + +#define RX_DESC_SIZE (RX_DESC_COUNT * sizeof(struct nb8800_rx_desc)) +#define TX_DESC_SIZE (TX_DESC_COUNT * sizeof(struct nb8800_tx_desc)) + +static void nb8800_dma_free(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + unsigned int i; + + if (priv->rx_bufs) { + for (i = 0; i < RX_DESC_COUNT; i++) + if (priv->rx_bufs[i].page) + put_page(priv->rx_bufs[i].page); + + kfree(priv->rx_bufs); + priv->rx_bufs = NULL; + } + + if (priv->tx_bufs) { + for (i = 0; i < TX_DESC_COUNT; i++) + kfree_skb(priv->tx_bufs[i].skb); + + kfree(priv->tx_bufs); + priv->tx_bufs = NULL; + } + + if (priv->rx_descs) { + dma_free_coherent(dev->dev.parent, RX_DESC_SIZE, priv->rx_descs, + priv->rx_desc_dma); + priv->rx_descs = NULL; + } + + if (priv->tx_descs) { + dma_free_coherent(dev->dev.parent, TX_DESC_SIZE, priv->tx_descs, + priv->tx_desc_dma); + priv->tx_descs = NULL; + } +} + +static void nb8800_dma_reset(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + struct nb8800_rx_desc *rxd; + struct nb8800_tx_desc *txd; + unsigned int i; + + for (i = 0; i < RX_DESC_COUNT; i++) { + dma_addr_t rx_dma = priv->rx_desc_dma + i * sizeof(*rxd); + + rxd = &priv->rx_descs[i]; + rxd->desc.n_addr = rx_dma + sizeof(*rxd); + rxd->desc.r_addr = + rx_dma + offsetof(struct nb8800_rx_desc, report); + rxd->desc.config = priv->rx_dma_config; + rxd->report = 0; + } + + rxd->desc.n_addr = priv->rx_desc_dma; + rxd->desc.config |= DESC_EOC; + + priv->rx_eoc = RX_DESC_COUNT - 1; + + for (i = 0; i < TX_DESC_COUNT; i++) { + struct nb8800_tx_buf *txb = &priv->tx_bufs[i]; + dma_addr_t r_dma = txb->dma_desc + + offsetof(struct nb8800_tx_desc, report); + + txd = &priv->tx_descs[i]; + txd->desc[0].r_addr = r_dma; + txd->desc[1].r_addr = r_dma; + txd->report = 0; + } + + priv->tx_next = 0; + priv->tx_queue = 0; + priv->tx_done = 0; + atomic_set(&priv->tx_free, TX_DESC_COUNT); + + nb8800_writel(priv, NB8800_RX_DESC_ADDR, priv->rx_desc_dma); + + wmb(); /* ensure all setup is written before starting */ +} + +static int nb8800_dma_init(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + unsigned int n_rx = RX_DESC_COUNT; + unsigned int n_tx = TX_DESC_COUNT; + unsigned int i; + int err; + + priv->rx_descs = dma_alloc_coherent(dev->dev.parent, RX_DESC_SIZE, + &priv->rx_desc_dma, GFP_KERNEL); + if (!priv->rx_descs) + goto err_out; + + priv->rx_bufs = kcalloc(n_rx, sizeof(*priv->rx_bufs), GFP_KERNEL); + if (!priv->rx_bufs) + goto err_out; + + for (i = 0; i < n_rx; i++) { + err = nb8800_alloc_rx(dev, i, false); + if (err) + goto err_out; + } + + priv->tx_descs = dma_alloc_coherent(dev->dev.parent, TX_DESC_SIZE, + &priv->tx_desc_dma, GFP_KERNEL); + if (!priv->tx_descs) + goto err_out; + + priv->tx_bufs = kcalloc(n_tx, sizeof(*priv->tx_bufs), GFP_KERNEL); + if (!priv->tx_bufs) + goto err_out; + + for (i = 0; i < n_tx; i++) + priv->tx_bufs[i].dma_desc = + priv->tx_desc_dma + i * sizeof(struct nb8800_tx_desc); + + nb8800_dma_reset(dev); + + return 0; + +err_out: + nb8800_dma_free(dev); + + return -ENOMEM; +} + +static int nb8800_dma_stop(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + struct nb8800_tx_buf *txb = &priv->tx_bufs[0]; + struct nb8800_tx_desc *txd = &priv->tx_descs[0]; + int retry = 5; + u32 txcr; + u32 rxcr; + int err; + unsigned int i; + + /* wait for tx to finish */ + err = readl_poll_timeout_atomic(priv->base + NB8800_TXC_CR, txcr, + !(txcr & TCR_EN) && + priv->tx_done == priv->tx_next, + 1000, 1000000); + if (err) + return err; + + /* The rx DMA only stops if it reaches the end of chain. + * To make this happen, we set the EOC flag on all rx + * descriptors, put the device in loopback mode, and send + * a few dummy frames. The interrupt handler will ignore + * these since NAPI is disabled and no real frames are in + * the tx queue. + */ + + for (i = 0; i < RX_DESC_COUNT; i++) + priv->rx_descs[i].desc.config |= DESC_EOC; + + txd->desc[0].s_addr = + txb->dma_desc + offsetof(struct nb8800_tx_desc, buf); + txd->desc[0].config = DESC_BTS(2) | DESC_DS | DESC_EOF | DESC_EOC | 8; + memset(txd->buf, 0, sizeof(txd->buf)); + + nb8800_mac_af(dev, false); + nb8800_setb(priv, NB8800_MAC_MODE, LOOPBACK_EN); + + do { + nb8800_writel(priv, NB8800_TX_DESC_ADDR, txb->dma_desc); + wmb(); + nb8800_writel(priv, NB8800_TXC_CR, txcr | TCR_EN); + + err = readl_poll_timeout_atomic(priv->base + NB8800_RXC_CR, + rxcr, !(rxcr & RCR_EN), + 1000, 100000); + } while (err && --retry); + + nb8800_mac_af(dev, true); + nb8800_clearb(priv, NB8800_MAC_MODE, LOOPBACK_EN); + nb8800_dma_reset(dev); + + return retry ? 0 : -ETIMEDOUT; +} + +static void nb8800_pause_adv(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + u32 adv = 0; + + if (!priv->phydev) + return; + + if (priv->pause_rx) + adv |= ADVERTISED_Pause | ADVERTISED_Asym_Pause; + if (priv->pause_tx) + adv ^= ADVERTISED_Asym_Pause; + + priv->phydev->supported |= adv; + priv->phydev->advertising |= adv; +} + +static int nb8800_open(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + int err; + + /* clear any pending interrupts */ + nb8800_writel(priv, NB8800_RXC_SR, 0xf); + nb8800_writel(priv, NB8800_TXC_SR, 0xf); + + err = nb8800_dma_init(dev); + if (err) + return err; + + err = request_irq(dev->irq, nb8800_irq, 0, dev_name(&dev->dev), dev); + if (err) + goto err_free_dma; + + nb8800_mac_rx(dev, true); + nb8800_mac_tx(dev, true); + + priv->phydev = of_phy_connect(dev, priv->phy_node, + nb8800_link_reconfigure, 0, + priv->phy_mode); + if (!priv->phydev) + goto err_free_irq; + + nb8800_pause_adv(dev); + + netdev_reset_queue(dev); + napi_enable(&priv->napi); + netif_start_queue(dev); + + nb8800_start_rx(dev); + phy_start(priv->phydev); + + return 0; + +err_free_irq: + free_irq(dev->irq, dev); +err_free_dma: + nb8800_dma_free(dev); + + return err; +} + +static int nb8800_stop(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + + phy_stop(priv->phydev); + + netif_stop_queue(dev); + napi_disable(&priv->napi); + + nb8800_dma_stop(dev); + nb8800_mac_rx(dev, false); + nb8800_mac_tx(dev, false); + + phy_disconnect(priv->phydev); + priv->phydev = NULL; + + free_irq(dev->irq, dev); + + nb8800_dma_free(dev); + + return 0; +} + +static int nb8800_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + struct nb8800_priv *priv = netdev_priv(dev); + + return phy_mii_ioctl(priv->phydev, rq, cmd); +} + +static const struct net_device_ops nb8800_netdev_ops = { + .ndo_open = nb8800_open, + .ndo_stop = nb8800_stop, + .ndo_start_xmit = nb8800_xmit, + .ndo_set_mac_address = nb8800_set_mac_address, + .ndo_set_rx_mode = nb8800_set_rx_mode, + .ndo_do_ioctl = nb8800_ioctl, + .ndo_change_mtu = eth_change_mtu, + .ndo_validate_addr = eth_validate_addr, +}; + +static int nb8800_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct nb8800_priv *priv = netdev_priv(dev); + + if (!priv->phydev) + return -ENODEV; + + return phy_ethtool_gset(priv->phydev, cmd); +} + +static int nb8800_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct nb8800_priv *priv = netdev_priv(dev); + + if (!priv->phydev) + return -ENODEV; + + return phy_ethtool_sset(priv->phydev, cmd); +} + +static int nb8800_nway_reset(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + + if (!priv->phydev) + return -ENODEV; + + return genphy_restart_aneg(priv->phydev); +} + +static void nb8800_get_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pp) +{ + struct nb8800_priv *priv = netdev_priv(dev); + + pp->autoneg = priv->pause_aneg; + pp->rx_pause = priv->pause_rx; + pp->tx_pause = priv->pause_tx; +} + +static int nb8800_set_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pp) +{ + struct nb8800_priv *priv = netdev_priv(dev); + + priv->pause_aneg = pp->autoneg; + priv->pause_rx = pp->rx_pause; + priv->pause_tx = pp->tx_pause; + + nb8800_pause_adv(dev); + + if (!priv->pause_aneg) + nb8800_pause_config(dev); + else if (priv->phydev) + phy_start_aneg(priv->phydev); + + return 0; +} + +static const char nb8800_stats_names[][ETH_GSTRING_LEN] = { + "rx_bytes_ok", + "rx_frames_ok", + "rx_undersize_frames", + "rx_fragment_frames", + "rx_64_byte_frames", + "rx_127_byte_frames", + "rx_255_byte_frames", + "rx_511_byte_frames", + "rx_1023_byte_frames", + "rx_max_size_frames", + "rx_oversize_frames", + "rx_bad_fcs_frames", + "rx_broadcast_frames", + "rx_multicast_frames", + "rx_control_frames", + "rx_pause_frames", + "rx_unsup_control_frames", + "rx_align_error_frames", + "rx_overrun_frames", + "rx_jabber_frames", + "rx_bytes", + "rx_frames", + + "tx_bytes_ok", + "tx_frames_ok", + "tx_64_byte_frames", + "tx_127_byte_frames", + "tx_255_byte_frames", + "tx_511_byte_frames", + "tx_1023_byte_frames", + "tx_max_size_frames", + "tx_oversize_frames", + "tx_broadcast_frames", + "tx_multicast_frames", + "tx_control_frames", + "tx_pause_frames", + "tx_underrun_frames", + "tx_single_collision_frames", + "tx_multi_collision_frames", + "tx_deferred_collision_frames", + "tx_late_collision_frames", + "tx_excessive_collision_frames", + "tx_bytes", + "tx_frames", + "tx_collisions", +}; + +#define NB8800_NUM_STATS ARRAY_SIZE(nb8800_stats_names) + +static int nb8800_get_sset_count(struct net_device *dev, int sset) +{ + if (sset == ETH_SS_STATS) + return NB8800_NUM_STATS; + + return -EOPNOTSUPP; +} + +static void nb8800_get_strings(struct net_device *dev, u32 sset, u8 *buf) +{ + if (sset == ETH_SS_STATS) + memcpy(buf, &nb8800_stats_names, sizeof(nb8800_stats_names)); +} + +static u32 nb8800_read_stat(struct net_device *dev, int index) +{ + struct nb8800_priv *priv = netdev_priv(dev); + + nb8800_writeb(priv, NB8800_STAT_INDEX, index); + + return nb8800_readl(priv, NB8800_STAT_DATA); +} + +static void nb8800_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *estats, u64 *st) +{ + unsigned int i; + u32 rx, tx; + + for (i = 0; i < NB8800_NUM_STATS / 2; i++) { + rx = nb8800_read_stat(dev, i); + tx = nb8800_read_stat(dev, i | 0x80); + st[i] = rx; + st[i + NB8800_NUM_STATS / 2] = tx; + } +} + +static const struct ethtool_ops nb8800_ethtool_ops = { + .get_settings = nb8800_get_settings, + .set_settings = nb8800_set_settings, + .nway_reset = nb8800_nway_reset, + .get_link = ethtool_op_get_link, + .get_pauseparam = nb8800_get_pauseparam, + .set_pauseparam = nb8800_set_pauseparam, + .get_sset_count = nb8800_get_sset_count, + .get_strings = nb8800_get_strings, + .get_ethtool_stats = nb8800_get_ethtool_stats, +}; + +static int nb8800_hw_init(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + u32 val; + + val = TX_RETRY_EN | TX_PAD_EN | TX_APPEND_FCS; + nb8800_writeb(priv, NB8800_TX_CTL1, val); + + /* Collision retry count */ + nb8800_writeb(priv, NB8800_TX_CTL2, 5); + + val = RX_PAD_STRIP | RX_AF_EN; + nb8800_writeb(priv, NB8800_RX_CTL, val); + + /* Chosen by fair dice roll */ + nb8800_writeb(priv, NB8800_RANDOM_SEED, 4); + + /* TX cycles per deferral period */ + nb8800_writeb(priv, NB8800_TX_SDP, 12); + + /* The following three threshold values have been + * experimentally determined for good results. + */ + + /* RX/TX FIFO threshold for partial empty (64-bit entries) */ + nb8800_writeb(priv, NB8800_PE_THRESHOLD, 0); + + /* RX/TX FIFO threshold for partial full (64-bit entries) */ + nb8800_writeb(priv, NB8800_PF_THRESHOLD, 255); + + /* Buffer size for transmit (64-bit entries) */ + nb8800_writeb(priv, NB8800_TX_BUFSIZE, 64); + + /* Configure tx DMA */ + + val = nb8800_readl(priv, NB8800_TXC_CR); + val &= TCR_LE; /* keep endian setting */ + val |= TCR_DM; /* DMA descriptor mode */ + val |= TCR_RS; /* automatically store tx status */ + val |= TCR_DIE; /* interrupt on DMA chain completion */ + val |= TCR_TFI(7); /* interrupt after 7 frames transmitted */ + val |= TCR_BTS(2); /* 32-byte bus transaction size */ + nb8800_writel(priv, NB8800_TXC_CR, val); + + /* TX complete interrupt after 10 ms or 7 frames (see above) */ + val = clk_get_rate(priv->clk) / 100; + nb8800_writel(priv, NB8800_TX_ITR, val); + + /* Configure rx DMA */ + + val = nb8800_readl(priv, NB8800_RXC_CR); + val &= RCR_LE; /* keep endian setting */ + val |= RCR_DM; /* DMA descriptor mode */ + val |= RCR_RS; /* automatically store rx status */ + val |= RCR_DIE; /* interrupt at end of DMA chain */ + val |= RCR_RFI(7); /* interrupt after 7 frames received */ + val |= RCR_BTS(2); /* 32-byte bus transaction size */ + nb8800_writel(priv, NB8800_RXC_CR, val); + + /* The rx interrupt can fire before the DMA has completed + * unless a small delay is added. 50 us is hopefully enough. + */ + priv->rx_itr_irq = clk_get_rate(priv->clk) / 20000; + + /* In NAPI poll mode we want to disable interrupts, but the + * hardware does not permit this. Delay 10 ms instead. + */ + priv->rx_itr_poll = clk_get_rate(priv->clk) / 100; + + nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_irq); + + priv->rx_dma_config = RX_BUF_SIZE | DESC_BTS(2) | DESC_DS | DESC_EOF; + + /* Flow control settings */ + + /* Pause time of 0.1 ms */ + val = 100000 / 512; + nb8800_writeb(priv, NB8800_PQ1, val >> 8); + nb8800_writeb(priv, NB8800_PQ2, val & 0xff); + + /* Auto-negotiate by default */ + priv->pause_aneg = true; + priv->pause_rx = true; + priv->pause_tx = true; + + nb8800_mc_init(dev, 0); + + return 0; +} + +static int nb8800_tangox_init(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + u32 pad_mode = PAD_MODE_MII; + + switch (priv->phy_mode) { + case PHY_INTERFACE_MODE_MII: + case PHY_INTERFACE_MODE_GMII: + pad_mode = PAD_MODE_MII; + break; + + case PHY_INTERFACE_MODE_RGMII: + pad_mode = PAD_MODE_RGMII; + break; + + case PHY_INTERFACE_MODE_RGMII_TXID: + pad_mode = PAD_MODE_RGMII | PAD_MODE_GTX_CLK_DELAY; + break; + + default: + dev_err(dev->dev.parent, "unsupported phy mode %s\n", + phy_modes(priv->phy_mode)); + return -EINVAL; + } + + nb8800_writeb(priv, NB8800_TANGOX_PAD_MODE, pad_mode); + + return 0; +} + +static int nb8800_tangox_reset(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + int clk_div; + + nb8800_writeb(priv, NB8800_TANGOX_RESET, 0); + usleep_range(1000, 10000); + nb8800_writeb(priv, NB8800_TANGOX_RESET, 1); + + wmb(); /* ensure reset is cleared before proceeding */ + + clk_div = DIV_ROUND_UP(clk_get_rate(priv->clk), 2 * MAX_MDC_CLOCK); + nb8800_writew(priv, NB8800_TANGOX_MDIO_CLKDIV, clk_div); + + return 0; +} + +static const struct nb8800_ops nb8800_tangox_ops = { + .init = nb8800_tangox_init, + .reset = nb8800_tangox_reset, +}; + +static int nb8800_tango4_init(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + int err; + + err = nb8800_tangox_init(dev); + if (err) + return err; + + /* On tango4 interrupt on DMA completion per frame works and gives + * better performance despite generating more rx interrupts. + */ + + /* Disable unnecessary interrupt on rx completion */ + nb8800_clearl(priv, NB8800_RXC_CR, RCR_RFI(7)); + + /* Request interrupt on descriptor DMA completion */ + priv->rx_dma_config |= DESC_ID; + + return 0; +} + +static const struct nb8800_ops nb8800_tango4_ops = { + .init = nb8800_tango4_init, + .reset = nb8800_tangox_reset, +}; + +static const struct of_device_id nb8800_dt_ids[] = { + { + .compatible = "aurora,nb8800", + }, + { + .compatible = "sigma,smp8642-ethernet", + .data = &nb8800_tangox_ops, + }, + { + .compatible = "sigma,smp8734-ethernet", + .data = &nb8800_tango4_ops, + }, + { } +}; + +static int nb8800_probe(struct platform_device *pdev) +{ + const struct of_device_id *match; + const struct nb8800_ops *ops = NULL; + struct nb8800_priv *priv; + struct resource *res; + struct net_device *dev; + struct mii_bus *bus; + const unsigned char *mac; + void __iomem *base; + int irq; + int ret; + + match = of_match_device(nb8800_dt_ids, &pdev->dev); + if (match) + ops = match->data; + + irq = platform_get_irq(pdev, 0); + if (irq <= 0) { + dev_err(&pdev->dev, "No IRQ\n"); + return -EINVAL; + } + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(base)) + return PTR_ERR(base); + + dev_dbg(&pdev->dev, "AU-NB8800 Ethernet at %pa\n", &res->start); + + dev = alloc_etherdev(sizeof(*priv)); + if (!dev) + return -ENOMEM; + + platform_set_drvdata(pdev, dev); + SET_NETDEV_DEV(dev, &pdev->dev); + + priv = netdev_priv(dev); + priv->base = base; + + priv->phy_mode = of_get_phy_mode(pdev->dev.of_node); + if (priv->phy_mode < 0) + priv->phy_mode = PHY_INTERFACE_MODE_RGMII; + + priv->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(priv->clk)) { + dev_err(&pdev->dev, "failed to get clock\n"); + ret = PTR_ERR(priv->clk); + goto err_free_dev; + } + + ret = clk_prepare_enable(priv->clk); + if (ret) + goto err_free_dev; + + spin_lock_init(&priv->tx_lock); + + if (ops && ops->reset) { + ret = ops->reset(dev); + if (ret) + goto err_free_dev; + } + + bus = devm_mdiobus_alloc(&pdev->dev); + if (!bus) { + ret = -ENOMEM; + goto err_disable_clk; + } + + bus->name = "nb8800-mii"; + bus->read = nb8800_mdio_read; + bus->write = nb8800_mdio_write; + bus->parent = &pdev->dev; + snprintf(bus->id, MII_BUS_ID_SIZE, "%lx.nb8800-mii", + (unsigned long)res->start); + bus->priv = priv; + + ret = of_mdiobus_register(bus, pdev->dev.of_node); + if (ret) { + dev_err(&pdev->dev, "failed to register MII bus\n"); + goto err_disable_clk; + } + + priv->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); + if (!priv->phy_node) { + dev_err(&pdev->dev, "no PHY specified\n"); + ret = -ENODEV; + goto err_free_bus; + } + + priv->mii_bus = bus; + + ret = nb8800_hw_init(dev); + if (ret) + goto err_free_bus; + + if (ops && ops->init) { + ret = ops->init(dev); + if (ret) + goto err_free_bus; + } + + dev->netdev_ops = &nb8800_netdev_ops; + dev->ethtool_ops = &nb8800_ethtool_ops; + dev->flags |= IFF_MULTICAST; + dev->irq = irq; + + mac = of_get_mac_address(pdev->dev.of_node); + if (mac) + ether_addr_copy(dev->dev_addr, mac); + + if (!is_valid_ether_addr(dev->dev_addr)) + eth_hw_addr_random(dev); + + nb8800_update_mac_addr(dev); + + netif_carrier_off(dev); + + ret = register_netdev(dev); + if (ret) { + netdev_err(dev, "failed to register netdev\n"); + goto err_free_dma; + } + + netif_napi_add(dev, &priv->napi, nb8800_poll, NAPI_POLL_WEIGHT); + + netdev_info(dev, "MAC address %pM\n", dev->dev_addr); + + return 0; + +err_free_dma: + nb8800_dma_free(dev); +err_free_bus: + mdiobus_unregister(bus); +err_disable_clk: + clk_disable_unprepare(priv->clk); +err_free_dev: + free_netdev(dev); + + return ret; +} + +static int nb8800_remove(struct platform_device *pdev) +{ + struct net_device *ndev = platform_get_drvdata(pdev); + struct nb8800_priv *priv = netdev_priv(ndev); + + unregister_netdev(ndev); + + mdiobus_unregister(priv->mii_bus); + + clk_disable_unprepare(priv->clk); + + nb8800_dma_free(ndev); + free_netdev(ndev); + + return 0; +} + +static struct platform_driver nb8800_driver = { + .driver = { + .name = "nb8800", + .of_match_table = nb8800_dt_ids, + }, + .probe = nb8800_probe, + .remove = nb8800_remove, +}; + +module_platform_driver(nb8800_driver); + +MODULE_DESCRIPTION("Aurora AU-NB8800 Ethernet driver"); +MODULE_AUTHOR("Mans Rullgard "); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/aurora/nb8800.h b/drivers/net/ethernet/aurora/nb8800.h new file mode 100644 index 000000000000..e5adbc2aac9f --- /dev/null +++ b/drivers/net/ethernet/aurora/nb8800.h @@ -0,0 +1,316 @@ +#ifndef _NB8800_H_ +#define _NB8800_H_ + +#include +#include +#include +#include +#include + +#define RX_DESC_COUNT 256 +#define TX_DESC_COUNT 256 + +#define NB8800_DESC_LOW 4 + +#define RX_BUF_SIZE 1552 + +#define RX_COPYBREAK 256 +#define RX_COPYHDR 128 + +#define MAX_MDC_CLOCK 2500000 + +/* Stargate Solutions SSN8800 core registers */ +#define NB8800_TX_CTL1 0x000 +#define TX_TPD BIT(5) +#define TX_APPEND_FCS BIT(4) +#define TX_PAD_EN BIT(3) +#define TX_RETRY_EN BIT(2) +#define TX_EN BIT(0) + +#define NB8800_TX_CTL2 0x001 + +#define NB8800_RX_CTL 0x004 +#define RX_BC_DISABLE BIT(7) +#define RX_RUNT BIT(6) +#define RX_AF_EN BIT(5) +#define RX_PAUSE_EN BIT(3) +#define RX_SEND_CRC BIT(2) +#define RX_PAD_STRIP BIT(1) +#define RX_EN BIT(0) + +#define NB8800_RANDOM_SEED 0x008 +#define NB8800_TX_SDP 0x14 +#define NB8800_TX_TPDP1 0x18 +#define NB8800_TX_TPDP2 0x19 +#define NB8800_SLOT_TIME 0x1c + +#define NB8800_MDIO_CMD 0x020 +#define MDIO_CMD_GO BIT(31) +#define MDIO_CMD_WR BIT(26) +#define MDIO_CMD_ADDR(x) ((x) << 21) +#define MDIO_CMD_REG(x) ((x) << 16) +#define MDIO_CMD_DATA(x) ((x) << 0) + +#define NB8800_MDIO_STS 0x024 +#define MDIO_STS_ERR BIT(31) + +#define NB8800_MC_ADDR(i) (0x028 + (i)) +#define NB8800_MC_INIT 0x02e +#define NB8800_UC_ADDR(i) (0x03c + (i)) + +#define NB8800_MAC_MODE 0x044 +#define RGMII_MODE BIT(7) +#define HALF_DUPLEX BIT(4) +#define BURST_EN BIT(3) +#define LOOPBACK_EN BIT(2) +#define GMAC_MODE BIT(0) + +#define NB8800_IC_THRESHOLD 0x050 +#define NB8800_PE_THRESHOLD 0x051 +#define NB8800_PF_THRESHOLD 0x052 +#define NB8800_TX_BUFSIZE 0x054 +#define NB8800_FIFO_CTL 0x056 +#define NB8800_PQ1 0x060 +#define NB8800_PQ2 0x061 +#define NB8800_SRC_ADDR(i) (0x06a + (i)) +#define NB8800_STAT_DATA 0x078 +#define NB8800_STAT_INDEX 0x07c +#define NB8800_STAT_CLEAR 0x07d + +#define NB8800_SLEEP_MODE 0x07e +#define SLEEP_MODE BIT(0) + +#define NB8800_WAKEUP 0x07f +#define WAKEUP BIT(0) + +/* Aurora NB8800 host interface registers */ +#define NB8800_TXC_CR 0x100 +#define TCR_LK BIT(12) +#define TCR_DS BIT(11) +#define TCR_BTS(x) (((x) & 0x7) << 8) +#define TCR_DIE BIT(7) +#define TCR_TFI(x) (((x) & 0x7) << 4) +#define TCR_LE BIT(3) +#define TCR_RS BIT(2) +#define TCR_DM BIT(1) +#define TCR_EN BIT(0) + +#define NB8800_TXC_SR 0x104 +#define TSR_DE BIT(3) +#define TSR_DI BIT(2) +#define TSR_TO BIT(1) +#define TSR_TI BIT(0) + +#define NB8800_TX_SAR 0x108 +#define NB8800_TX_DESC_ADDR 0x10c + +#define NB8800_TX_REPORT_ADDR 0x110 +#define TX_BYTES_TRANSFERRED(x) (((x) >> 16) & 0xffff) +#define TX_FIRST_DEFERRAL BIT(7) +#define TX_EARLY_COLLISIONS(x) (((x) >> 3) & 0xf) +#define TX_LATE_COLLISION BIT(2) +#define TX_PACKET_DROPPED BIT(1) +#define TX_FIFO_UNDERRUN BIT(0) +#define IS_TX_ERROR(r) ((r) & 0x07) + +#define NB8800_TX_FIFO_SR 0x114 +#define NB8800_TX_ITR 0x118 + +#define NB8800_RXC_CR 0x200 +#define RCR_FL BIT(13) +#define RCR_LK BIT(12) +#define RCR_DS BIT(11) +#define RCR_BTS(x) (((x) & 7) << 8) +#define RCR_DIE BIT(7) +#define RCR_RFI(x) (((x) & 7) << 4) +#define RCR_LE BIT(3) +#define RCR_RS BIT(2) +#define RCR_DM BIT(1) +#define RCR_EN BIT(0) + +#define NB8800_RXC_SR 0x204 +#define RSR_DE BIT(3) +#define RSR_DI BIT(2) +#define RSR_RO BIT(1) +#define RSR_RI BIT(0) + +#define NB8800_RX_SAR 0x208 +#define NB8800_RX_DESC_ADDR 0x20c + +#define NB8800_RX_REPORT_ADDR 0x210 +#define RX_BYTES_TRANSFERRED(x) (((x) >> 16) & 0xFFFF) +#define RX_MULTICAST_PKT BIT(9) +#define RX_BROADCAST_PKT BIT(8) +#define RX_LENGTH_ERR BIT(7) +#define RX_FCS_ERR BIT(6) +#define RX_RUNT_PKT BIT(5) +#define RX_FIFO_OVERRUN BIT(4) +#define RX_LATE_COLLISION BIT(3) +#define RX_ALIGNMENT_ERROR BIT(2) +#define RX_ERROR_MASK 0xfc +#define IS_RX_ERROR(r) ((r) & RX_ERROR_MASK) + +#define NB8800_RX_FIFO_SR 0x214 +#define NB8800_RX_ITR 0x218 + +/* Sigma Designs SMP86xx additional registers */ +#define NB8800_TANGOX_PAD_MODE 0x400 +#define PAD_MODE_MASK 0x7 +#define PAD_MODE_MII 0x0 +#define PAD_MODE_RGMII 0x1 +#define PAD_MODE_GTX_CLK_INV BIT(3) +#define PAD_MODE_GTX_CLK_DELAY BIT(4) + +#define NB8800_TANGOX_MDIO_CLKDIV 0x420 +#define NB8800_TANGOX_RESET 0x424 + +/* Hardware DMA descriptor */ +struct nb8800_dma_desc { + u32 s_addr; /* start address */ + u32 n_addr; /* next descriptor address */ + u32 r_addr; /* report address */ + u32 config; +} __aligned(8); + +#define DESC_ID BIT(23) +#define DESC_EOC BIT(22) +#define DESC_EOF BIT(21) +#define DESC_LK BIT(20) +#define DESC_DS BIT(19) +#define DESC_BTS(x) (((x) & 0x7) << 16) + +/* DMA descriptor and associated data for rx. + * Allocated from coherent memory. + */ +struct nb8800_rx_desc { + /* DMA descriptor */ + struct nb8800_dma_desc desc; + + /* Status report filled in by hardware */ + u32 report; +}; + +/* Address of buffer on rx ring */ +struct nb8800_rx_buf { + struct page *page; + unsigned long offset; +}; + +/* DMA descriptors and associated data for tx. + * Allocated from coherent memory. + */ +struct nb8800_tx_desc { + /* DMA descriptor. The second descriptor is used if packet + * data is unaligned. + */ + struct nb8800_dma_desc desc[2]; + + /* Status report filled in by hardware */ + u32 report; + + /* Bounce buffer for initial unaligned part of packet */ + u8 buf[8] __aligned(8); +}; + +/* Packet in tx queue */ +struct nb8800_tx_buf { + /* Currently queued skb */ + struct sk_buff *skb; + + /* DMA address of the first descriptor */ + dma_addr_t dma_desc; + + /* DMA address of packet data */ + dma_addr_t dma_addr; + + /* Length of DMA mapping, less than skb->len if alignment + * buffer is used. + */ + unsigned int dma_len; + + /* Number of packets in chain starting here */ + unsigned int chain_len; + + /* Packet chain ready to be submitted to hardware */ + bool ready; +}; + +struct nb8800_priv { + struct napi_struct napi; + + void __iomem *base; + + /* RX DMA descriptors */ + struct nb8800_rx_desc *rx_descs; + + /* RX buffers referenced by DMA descriptors */ + struct nb8800_rx_buf *rx_bufs; + + /* Current end of chain */ + u32 rx_eoc; + + /* Value for rx interrupt time register in NAPI interrupt mode */ + u32 rx_itr_irq; + + /* Value for rx interrupt time register in NAPI poll mode */ + u32 rx_itr_poll; + + /* Value for config field of rx DMA descriptors */ + u32 rx_dma_config; + + /* TX DMA descriptors */ + struct nb8800_tx_desc *tx_descs; + + /* TX packet queue */ + struct nb8800_tx_buf *tx_bufs; + + /* Number of free tx queue entries */ + atomic_t tx_free; + + /* First free tx queue entry */ + u32 tx_next; + + /* Next buffer to transmit */ + u32 tx_queue; + + /* Start of current packet chain */ + struct nb8800_tx_buf *tx_chain; + + /* Next buffer to reclaim */ + u32 tx_done; + + /* Lock for DMA activation */ + spinlock_t tx_lock; + + struct mii_bus *mii_bus; + struct device_node *phy_node; + struct phy_device *phydev; + + /* PHY connection type from DT */ + int phy_mode; + + /* Current link status */ + int speed; + int duplex; + int link; + + /* Pause settings */ + bool pause_aneg; + bool pause_rx; + bool pause_tx; + + /* DMA base address of rx descriptors, see rx_descs above */ + dma_addr_t rx_desc_dma; + + /* DMA base address of tx descriptors, see tx_descs above */ + dma_addr_t tx_desc_dma; + + struct clk *clk; +}; + +struct nb8800_ops { + int (*init)(struct net_device *dev); + int (*reset)(struct net_device *dev); +}; + +#endif /* _NB8800_H_ */ From 9a65083827da0da5fadf9dea5bca25192073b112 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 19 Nov 2015 14:12:50 -0500 Subject: [PATCH 037/110] tipc: correct settings of broadcast link state Since commit 5266698661401afc5e ("tipc: let broadcast packet reception use new link receive function") the broadcast send link state was meant to always be set to LINK_ESTABLISHED, since we don't need this link to follow the regular link FSM rules. It was also the intention that this state anyway shouldn't impact the run-time working state of the link, since the latter in reality is controlled by the number of registered peers. We have now discovered that this assumption is not quite correct. If the broadcast link is reset because of too many retransmissions, its state will inadvertently go to LINK_RESETTING, and never go back to LINK_ESTABLISHED, because the LINK_FAILURE event was not anticipated. This will work well once, but if it happens a second time, the reset on a link in LINK_RESETTING has has no effect, and neither the broadcast link nor the unicast links will go down as they should. Furthermore, it is confusing that the management tool shows that this link is in UP state when that obviously isn't the case. We now ensure that this state strictly follows the true working state of the link. The state is set to LINK_ESTABLISHED when the number of peers is non-zero, and to LINK_RESET otherwise. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tipc/link.c b/net/tipc/link.c index 9efbdbde2b08..91aea071ab27 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -191,6 +191,7 @@ void tipc_link_add_bc_peer(struct tipc_link *snd_l, snd_l->ackers++; rcv_l->acked = snd_l->snd_nxt - 1; + snd_l->state = LINK_ESTABLISHED; tipc_link_build_bc_init_msg(uc_l, xmitq); } @@ -206,6 +207,7 @@ void tipc_link_remove_bc_peer(struct tipc_link *snd_l, rcv_l->state = LINK_RESET; if (!snd_l->ackers) { tipc_link_reset(snd_l); + snd_l->state = LINK_RESET; __skb_queue_purge(xmitq); } } From 6900317f5eff0a7070c5936e5383f589e0de7a09 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 20 Nov 2015 00:11:56 +0100 Subject: [PATCH 038/110] net, scm: fix PaX detected msg_controllen overflow in scm_detach_fds David and HacKurx reported a following/similar size overflow triggered in a grsecurity kernel, thanks to PaX's gcc size overflow plugin: (Already fixed in later grsecurity versions by Brad and PaX Team.) [ 1002.296137] PAX: size overflow detected in function scm_detach_fds net/core/scm.c:314 cicus.202_127 min, count: 4, decl: msg_controllen; num: 0; context: msghdr; [ 1002.296145] CPU: 0 PID: 3685 Comm: scm_rights_recv Not tainted 4.2.3-grsec+ #7 [ 1002.296149] Hardware name: Apple Inc. MacBookAir5,1/Mac-66F35F19FE2A0D05, [...] [ 1002.296153] ffffffff81c27366 0000000000000000 ffffffff81c27375 ffffc90007843aa8 [ 1002.296162] ffffffff818129ba 0000000000000000 ffffffff81c27366 ffffc90007843ad8 [ 1002.296169] ffffffff8121f838 fffffffffffffffc fffffffffffffffc ffffc90007843e60 [ 1002.296176] Call Trace: [ 1002.296190] [] dump_stack+0x45/0x57 [ 1002.296200] [] report_size_overflow+0x38/0x60 [ 1002.296209] [] scm_detach_fds+0x2ce/0x300 [ 1002.296220] [] unix_stream_read_generic+0x609/0x930 [ 1002.296228] [] unix_stream_recvmsg+0x4f/0x60 [ 1002.296236] [] ? unix_set_peek_off+0x50/0x50 [ 1002.296243] [] sock_recvmsg+0x47/0x60 [ 1002.296248] [] ___sys_recvmsg+0xe2/0x1e0 [ 1002.296257] [] __sys_recvmsg+0x46/0x80 [ 1002.296263] [] SyS_recvmsg+0x2c/0x40 [ 1002.296271] [] entry_SYSCALL_64_fastpath+0x12/0x85 Further investigation showed that this can happen when an *odd* number of fds are being passed over AF_UNIX sockets. In these cases CMSG_LEN(i * sizeof(int)) and CMSG_SPACE(i * sizeof(int)), where i is the number of successfully passed fds, differ by 4 bytes due to the extra CMSG_ALIGN() padding in CMSG_SPACE() to an 8 byte boundary on 64 bit. The padding is used to align subsequent cmsg headers in the control buffer. When the control buffer passed in from the receiver side *lacks* these 4 bytes (e.g. due to buggy/wrong API usage), then msg->msg_controllen will overflow in scm_detach_fds(): int cmlen = CMSG_LEN(i * sizeof(int)); <--- cmlen w/o tail-padding err = put_user(SOL_SOCKET, &cm->cmsg_level); if (!err) err = put_user(SCM_RIGHTS, &cm->cmsg_type); if (!err) err = put_user(cmlen, &cm->cmsg_len); if (!err) { cmlen = CMSG_SPACE(i * sizeof(int)); <--- cmlen w/ 4 byte extra tail-padding msg->msg_control += cmlen; msg->msg_controllen -= cmlen; <--- iff no tail-padding space here ... } ... wrap-around F.e. it will wrap to a length of 18446744073709551612 bytes in case the receiver passed in msg->msg_controllen of 20 bytes, and the sender properly transferred 1 fd to the receiver, so that its CMSG_LEN results in 20 bytes and CMSG_SPACE in 24 bytes. In case of MSG_CMSG_COMPAT (scm_detach_fds_compat()), I haven't seen an issue in my tests as alignment seems always on 4 byte boundary. Same should be in case of native 32 bit, where we end up with 4 byte boundaries as well. In practice, passing msg->msg_controllen of 20 to recvmsg() while receiving a single fd would mean that on successful return, msg->msg_controllen is being set by the kernel to 24 bytes instead, thus more than the input buffer advertised. It could f.e. become an issue if such application later on zeroes or copies the control buffer based on the returned msg->msg_controllen elsewhere. Maximum number of fds we can send is a hard upper limit SCM_MAX_FD (253). Going over the code, it seems like msg->msg_controllen is not being read after scm_detach_fds() in scm_recv() anymore by the kernel, good! Relevant recvmsg() handler are unix_dgram_recvmsg() (unix_seqpacket_recvmsg()) and unix_stream_recvmsg(). Both return back to their recvmsg() caller, and ___sys_recvmsg() places the updated length, that is, new msg_control - old msg_control pointer into msg->msg_controllen (hence the 24 bytes seen in the example). Long time ago, Wei Yongjun fixed something related in commit 1ac70e7ad24a ("[NET]: Fix function put_cmsg() which may cause usr application memory overflow"). RFC3542, section 20.2. says: The fields shown as "XX" are possible padding, between the cmsghdr structure and the data, and between the data and the next cmsghdr structure, if required by the implementation. While sending an application may or may not include padding at the end of last ancillary data in msg_controllen and implementations must accept both as valid. On receiving a portable application must provide space for padding at the end of the last ancillary data as implementations may copy out the padding at the end of the control message buffer and include it in the received msg_controllen. When recvmsg() is called if msg_controllen is too small for all the ancillary data items including any trailing padding after the last item an implementation may set MSG_CTRUNC. Since we didn't place MSG_CTRUNC for already quite a long time, just do the same as in 1ac70e7ad24a to avoid an overflow. Btw, even man-page author got this wrong :/ See db939c9b26e9 ("cmsg.3: Fix error in SCM_RIGHTS code sample"). Some people must have copied this (?), thus it got triggered in the wild (reported several times during boot by David and HacKurx). No Fixes tag this time as pre 2002 (that is, pre history tree). Reported-by: David Sterba Reported-by: HacKurx Cc: PaX Team Cc: Emese Revfy Cc: Brad Spengler Cc: Wei Yongjun Cc: Eric Dumazet Reviewed-by: Hannes Frederic Sowa Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/core/scm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/scm.c b/net/core/scm.c index 3b6899b7d810..8a1741b14302 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -305,6 +305,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) err = put_user(cmlen, &cm->cmsg_len); if (!err) { cmlen = CMSG_SPACE(i*sizeof(int)); + if (msg->msg_controllen < cmlen) + cmlen = msg->msg_controllen; msg->msg_control += cmlen; msg->msg_controllen -= cmlen; } From cf869eb1118fac333b26585bce61f862d0e2b3eb Mon Sep 17 00:00:00 2001 From: Grant Grundler Date: Thu, 19 Nov 2015 17:56:12 -0800 Subject: [PATCH 039/110] net: tulip: update MAINTAINER status to Orphan I haven't had any PCI tulip HW for the past ~5 years. I have been reviewing tulip patches and can continue doing that. Signed-off-by: Grant Grundler Acked-by: Helge Deller Signed-off-by: David S. Miller --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index ea1751283b49..ec0706199efb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10888,9 +10888,9 @@ S: Maintained F: drivers/media/tuners/tua9001* TULIP NETWORK DRIVERS -M: Grant Grundler L: netdev@vger.kernel.org -S: Maintained +L: linux-parisc@vger.kernel.org +S: Orphan F: drivers/net/ethernet/dec/tulip/ TUN/TAP driver From 0e615e9601a15efeeb8942cf7cd4dadba0c8c5a7 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 20 Nov 2015 13:54:19 +0100 Subject: [PATCH 040/110] net: ipmr: fix static mfc/dev leaks on table destruction When destroying an mrt table the static mfc entries and the static devices are kept, which leads to devices that can never be destroyed (because of refcnt taken) and leaked memory, for example: unreferenced object 0xffff880034c144c0 (size 192): comm "mfc-broken", pid 4777, jiffies 4320349055 (age 46001.964s) hex dump (first 32 bytes): 98 53 f0 34 00 88 ff ff 98 53 f0 34 00 88 ff ff .S.4.....S.4.... ef 0a 0a 14 01 02 03 04 00 00 00 00 01 00 00 00 ................ backtrace: [] kmemleak_alloc+0x4e/0xb0 [] kmem_cache_alloc+0x190/0x300 [] ip_mroute_setsockopt+0x5cb/0x910 [] do_ip_setsockopt.isra.11+0x105/0xff0 [] ip_setsockopt+0x30/0xa0 [] raw_setsockopt+0x33/0x90 [] sock_common_setsockopt+0x14/0x20 [] SyS_setsockopt+0x71/0xc0 [] entry_SYSCALL_64_fastpath+0x16/0x7a [] 0xffffffffffffffff Make sure that everything is cleaned on netns destruction. Signed-off-by: Nikolay Aleksandrov Reviewed-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 92dd4b74d513..292123bc30fa 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -134,7 +134,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, int cmd); -static void mroute_clean_tables(struct mr_table *mrt); +static void mroute_clean_tables(struct mr_table *mrt, bool all); static void ipmr_expire_process(unsigned long arg); #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES @@ -350,7 +350,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) static void ipmr_free_table(struct mr_table *mrt) { del_timer_sync(&mrt->ipmr_expire_timer); - mroute_clean_tables(mrt); + mroute_clean_tables(mrt, true); kfree(mrt); } @@ -1208,7 +1208,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, * Close the multicast socket, and clear the vif tables etc */ -static void mroute_clean_tables(struct mr_table *mrt) +static void mroute_clean_tables(struct mr_table *mrt, bool all) { int i; LIST_HEAD(list); @@ -1217,8 +1217,9 @@ static void mroute_clean_tables(struct mr_table *mrt) /* Shut down all active vif entries */ for (i = 0; i < mrt->maxvif; i++) { - if (!(mrt->vif_table[i].flags & VIFF_STATIC)) - vif_delete(mrt, i, 0, &list); + if (!all && (mrt->vif_table[i].flags & VIFF_STATIC)) + continue; + vif_delete(mrt, i, 0, &list); } unregister_netdevice_many(&list); @@ -1226,7 +1227,7 @@ static void mroute_clean_tables(struct mr_table *mrt) for (i = 0; i < MFC_LINES; i++) { list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) { - if (c->mfc_flags & MFC_STATIC) + if (!all && (c->mfc_flags & MFC_STATIC)) continue; list_del_rcu(&c->list); mroute_netlink_event(mrt, c, RTM_DELROUTE); @@ -1261,7 +1262,7 @@ static void mrtsock_destruct(struct sock *sk) NETCONFA_IFINDEX_ALL, net->ipv4.devconf_all); RCU_INIT_POINTER(mrt->mroute_sk, NULL); - mroute_clean_tables(mrt); + mroute_clean_tables(mrt, false); } } rtnl_unlock(); From 4c6980462f32b4f282c5d8e5f7ea8070e2937725 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 20 Nov 2015 13:54:20 +0100 Subject: [PATCH 041/110] net: ip6mr: fix static mfc/dev leaks on table destruction Similar to ipv4, when destroying an mrt table the static mfc entries and the static devices are kept, which leads to devices that can never be destroyed (because of refcnt taken) and leaked memory. Make sure that everything is cleaned up on netns destruction. Fixes: 8229efdaef1e ("netns: ip6mr: enable namespace support in ipv6 multicast forwarding code") CC: Benjamin Thery Signed-off-by: Nikolay Aleksandrov Reviewed-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index ad19136086dd..7a4a1b81dbb6 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -118,7 +118,7 @@ static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc, int cmd); static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb); -static void mroute_clean_tables(struct mr6_table *mrt); +static void mroute_clean_tables(struct mr6_table *mrt, bool all); static void ipmr_expire_process(unsigned long arg); #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES @@ -334,7 +334,7 @@ static struct mr6_table *ip6mr_new_table(struct net *net, u32 id) static void ip6mr_free_table(struct mr6_table *mrt) { del_timer_sync(&mrt->ipmr_expire_timer); - mroute_clean_tables(mrt); + mroute_clean_tables(mrt, true); kfree(mrt); } @@ -1542,7 +1542,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, * Close the multicast socket, and clear the vif tables etc */ -static void mroute_clean_tables(struct mr6_table *mrt) +static void mroute_clean_tables(struct mr6_table *mrt, bool all) { int i; LIST_HEAD(list); @@ -1552,8 +1552,9 @@ static void mroute_clean_tables(struct mr6_table *mrt) * Shut down all active vif entries */ for (i = 0; i < mrt->maxvif; i++) { - if (!(mrt->vif6_table[i].flags & VIFF_STATIC)) - mif6_delete(mrt, i, &list); + if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC)) + continue; + mif6_delete(mrt, i, &list); } unregister_netdevice_many(&list); @@ -1562,7 +1563,7 @@ static void mroute_clean_tables(struct mr6_table *mrt) */ for (i = 0; i < MFC6_LINES; i++) { list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) { - if (c->mfc_flags & MFC_STATIC) + if (!all && (c->mfc_flags & MFC_STATIC)) continue; write_lock_bh(&mrt_lock); list_del(&c->list); @@ -1625,7 +1626,7 @@ int ip6mr_sk_done(struct sock *sk) net->ipv6.devconf_all); write_unlock_bh(&mrt_lock); - mroute_clean_tables(mrt); + mroute_clean_tables(mrt, false); err = 0; break; } From 7cecd9ab80f43972c056dc068338f7bcc407b71c Mon Sep 17 00:00:00 2001 From: Mirza Krak Date: Tue, 10 Nov 2015 14:59:34 +0100 Subject: [PATCH 042/110] can: sja1000: clear interrupts on start According to SJA1000 data sheet error-warning (EI) interrupt is not cleared by setting the controller in to reset-mode. Then if we have the following case: - system is suspended (echo mem > /sys/power/state) and SJA1000 is left in operating state - A bus error condition occurs which activates EI interrupt, system is still suspended which means EI interrupt will be not be handled nor cleared. If the above two events occur, on resume there is no way to return the SJA1000 to operating state, except to cycle power to it. By simply reading the IR register on start we will clear any previous conditions that could be present. Signed-off-by: Mirza Krak Reported-by: Christian Magnusson Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/sja1000/sja1000.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c index 7b92e911a616..f10834be48a5 100644 --- a/drivers/net/can/sja1000/sja1000.c +++ b/drivers/net/can/sja1000/sja1000.c @@ -218,6 +218,9 @@ static void sja1000_start(struct net_device *dev) priv->write_reg(priv, SJA1000_RXERR, 0x0); priv->read_reg(priv, SJA1000_ECC); + /* clear interrupt flags */ + priv->read_reg(priv, SJA1000_IR); + /* leave reset mode */ set_normal_mode(dev); } From ffd461f80d536336811d573f197f3e6d9872d054 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Sat, 21 Nov 2015 18:41:20 +0100 Subject: [PATCH 043/110] can: fix assignment of error location in CAN error messages As Dan Carpenter reported in http://marc.info/?l=linux-can&m=144793696016187 the assignment of the error location in CAN error messages had some bit wise overlaps. Indeed the value to be assigned in data[3] is no bitfield but defines a single value which points to a location inside the CAN frame on the wire. This patch fixes the assignments for the error locations in error messages. Reported-by: Dan Carpenter Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- drivers/net/can/c_can/c_can.c | 6 ++---- drivers/net/can/cc770/cc770.c | 2 +- drivers/net/can/flexcan.c | 4 ++-- drivers/net/can/m_can/m_can.c | 6 ++---- drivers/net/can/pch_can.c | 3 +-- drivers/net/can/rcar_can.c | 6 +++--- drivers/net/can/ti_hecc.c | 6 ++---- drivers/net/can/usb/kvaser_usb.c | 5 ++--- drivers/net/can/usb/usb_8dev.c | 3 +-- drivers/net/can/xilinx_can.c | 5 ++--- 10 files changed, 18 insertions(+), 28 deletions(-) diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index 5d214d135332..7c9892ab0a6a 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -975,8 +975,7 @@ static int c_can_handle_bus_err(struct net_device *dev, break; case LEC_ACK_ERROR: netdev_dbg(dev, "ack error\n"); - cf->data[3] |= (CAN_ERR_PROT_LOC_ACK | - CAN_ERR_PROT_LOC_ACK_DEL); + cf->data[3] = CAN_ERR_PROT_LOC_ACK; break; case LEC_BIT1_ERROR: netdev_dbg(dev, "bit1 error\n"); @@ -988,8 +987,7 @@ static int c_can_handle_bus_err(struct net_device *dev, break; case LEC_CRC_ERROR: netdev_dbg(dev, "CRC error\n"); - cf->data[3] |= (CAN_ERR_PROT_LOC_CRC_SEQ | - CAN_ERR_PROT_LOC_CRC_DEL); + cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ; break; default: break; diff --git a/drivers/net/can/cc770/cc770.c b/drivers/net/can/cc770/cc770.c index 70a8cbb29e75..1e37313054f3 100644 --- a/drivers/net/can/cc770/cc770.c +++ b/drivers/net/can/cc770/cc770.c @@ -578,7 +578,7 @@ static int cc770_err(struct net_device *dev, u8 status) cf->data[2] |= CAN_ERR_PROT_BIT0; break; case STAT_LEC_CRC: - cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ; + cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ; break; } } diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 868fe945e35a..41c0fc9f3b14 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -535,13 +535,13 @@ static void do_bus_err(struct net_device *dev, if (reg_esr & FLEXCAN_ESR_ACK_ERR) { netdev_dbg(dev, "ACK_ERR irq\n"); cf->can_id |= CAN_ERR_ACK; - cf->data[3] |= CAN_ERR_PROT_LOC_ACK; + cf->data[3] = CAN_ERR_PROT_LOC_ACK; tx_errors = 1; } if (reg_esr & FLEXCAN_ESR_CRC_ERR) { netdev_dbg(dev, "CRC_ERR irq\n"); cf->data[2] |= CAN_ERR_PROT_BIT; - cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ; + cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ; rx_errors = 1; } if (reg_esr & FLEXCAN_ESR_FRM_ERR) { diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index ef655177bb5e..9dd3ca7a73aa 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -500,8 +500,7 @@ static int m_can_handle_lec_err(struct net_device *dev, break; case LEC_ACK_ERROR: netdev_dbg(dev, "ack error\n"); - cf->data[3] |= (CAN_ERR_PROT_LOC_ACK | - CAN_ERR_PROT_LOC_ACK_DEL); + cf->data[3] = CAN_ERR_PROT_LOC_ACK; break; case LEC_BIT1_ERROR: netdev_dbg(dev, "bit1 error\n"); @@ -513,8 +512,7 @@ static int m_can_handle_lec_err(struct net_device *dev, break; case LEC_CRC_ERROR: netdev_dbg(dev, "CRC error\n"); - cf->data[3] |= (CAN_ERR_PROT_LOC_CRC_SEQ | - CAN_ERR_PROT_LOC_CRC_DEL); + cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ; break; default: break; diff --git a/drivers/net/can/pch_can.c b/drivers/net/can/pch_can.c index e187ca783da0..c1317889d3d8 100644 --- a/drivers/net/can/pch_can.c +++ b/drivers/net/can/pch_can.c @@ -559,8 +559,7 @@ static void pch_can_error(struct net_device *ndev, u32 status) stats->rx_errors++; break; case PCH_CRC_ERR: - cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ | - CAN_ERR_PROT_LOC_CRC_DEL; + cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ; priv->can.can_stats.bus_error++; stats->rx_errors++; break; diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c index 7bd54191f962..9161f045d44c 100644 --- a/drivers/net/can/rcar_can.c +++ b/drivers/net/can/rcar_can.c @@ -251,7 +251,7 @@ static void rcar_can_error(struct net_device *ndev) tx_errors++; writeb(~RCAR_CAN_ECSR_ADEF, &priv->regs->ecsr); if (skb) - cf->data[3] |= CAN_ERR_PROT_LOC_ACK_DEL; + cf->data[3] = CAN_ERR_PROT_LOC_ACK_DEL; } if (ecsr & RCAR_CAN_ECSR_BE0F) { netdev_dbg(priv->ndev, "Bit Error (dominant)\n"); @@ -272,7 +272,7 @@ static void rcar_can_error(struct net_device *ndev) rx_errors++; writeb(~RCAR_CAN_ECSR_CEF, &priv->regs->ecsr); if (skb) - cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ; + cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ; } if (ecsr & RCAR_CAN_ECSR_AEF) { netdev_dbg(priv->ndev, "ACK Error\n"); @@ -280,7 +280,7 @@ static void rcar_can_error(struct net_device *ndev) writeb(~RCAR_CAN_ECSR_AEF, &priv->regs->ecsr); if (skb) { cf->can_id |= CAN_ERR_ACK; - cf->data[3] |= CAN_ERR_PROT_LOC_ACK; + cf->data[3] = CAN_ERR_PROT_LOC_ACK; } } if (ecsr & RCAR_CAN_ECSR_FEF) { diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index cf345cbfe819..6eab4fe7e872 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -737,13 +737,11 @@ static int ti_hecc_error(struct net_device *ndev, int int_status, } if (err_status & HECC_CANES_CRCE) { hecc_set_bit(priv, HECC_CANES, HECC_CANES_CRCE); - cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ | - CAN_ERR_PROT_LOC_CRC_DEL; + cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ; } if (err_status & HECC_CANES_ACKE) { hecc_set_bit(priv, HECC_CANES, HECC_CANES_ACKE); - cf->data[3] |= CAN_ERR_PROT_LOC_ACK | - CAN_ERR_PROT_LOC_ACK_DEL; + cf->data[3] = CAN_ERR_PROT_LOC_ACK; } } diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 8b17a9065b0b..022bfa13ebfa 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -944,10 +944,9 @@ static void kvaser_usb_rx_error(const struct kvaser_usb *dev, cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT; if (es->leaf.error_factor & M16C_EF_ACKE) - cf->data[3] |= (CAN_ERR_PROT_LOC_ACK); + cf->data[3] = CAN_ERR_PROT_LOC_ACK; if (es->leaf.error_factor & M16C_EF_CRCE) - cf->data[3] |= (CAN_ERR_PROT_LOC_CRC_SEQ | - CAN_ERR_PROT_LOC_CRC_DEL); + cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ; if (es->leaf.error_factor & M16C_EF_FORME) cf->data[2] |= CAN_ERR_PROT_FORM; if (es->leaf.error_factor & M16C_EF_STFE) diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index de95b1ccba3e..017ae5002169 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -402,8 +402,7 @@ static void usb_8dev_rx_err_msg(struct usb_8dev_priv *priv, break; case USB_8DEV_STATUSMSG_CRC: cf->data[2] |= CAN_ERR_PROT_UNSPEC; - cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ | - CAN_ERR_PROT_LOC_CRC_DEL; + cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ; rx_errors = 1; break; case USB_8DEV_STATUSMSG_BIT0: diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index fc55e8e0351d..4c57ddf12684 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -618,7 +618,7 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr) stats->tx_errors++; if (skb) { cf->can_id |= CAN_ERR_ACK; - cf->data[3] |= CAN_ERR_PROT_LOC_ACK; + cf->data[3] = CAN_ERR_PROT_LOC_ACK; } } @@ -654,8 +654,7 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr) stats->rx_errors++; if (skb) { cf->can_id |= CAN_ERR_PROT; - cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ | - CAN_ERR_PROT_LOC_CRC_DEL; + cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ; } } priv->can.can_stats.bus_error++; From a2ec19f888f1fb06e2424486423a16f86ad1fcc4 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Sat, 21 Nov 2015 18:41:21 +0100 Subject: [PATCH 044/110] can: remove obsolete assignment for CAN protocol error type The assignment 'cf->data[2] |= CAN_ERR_PROT_UNSPEC' used at CAN error message creation time is obsolete as CAN_ERR_PROT_UNSPEC is zero and cf->data[2] is initialized with zero in alloc_can_err_skb() anyway. So we could either assign 'cf->data[2] = CAN_ERR_PROT_UNSPEC' correctly or we can remove the obsolete OR operation entirely. Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- drivers/net/can/bfin_can.c | 2 -- drivers/net/can/c_can/c_can.c | 1 - drivers/net/can/janz-ican3.c | 1 - drivers/net/can/m_can/m_can.c | 1 - drivers/net/can/rcar_can.c | 5 ++--- drivers/net/can/sja1000/sja1000.c | 1 - drivers/net/can/sun4i_can.c | 1 - drivers/net/can/ti_hecc.c | 1 - drivers/net/can/usb/ems_usb.c | 1 - drivers/net/can/usb/esd_usb2.c | 1 - drivers/net/can/usb/usb_8dev.c | 1 - drivers/net/can/xilinx_can.c | 4 +--- 12 files changed, 3 insertions(+), 17 deletions(-) diff --git a/drivers/net/can/bfin_can.c b/drivers/net/can/bfin_can.c index 57dadd52b428..1deb8ff90a89 100644 --- a/drivers/net/can/bfin_can.c +++ b/drivers/net/can/bfin_can.c @@ -501,8 +501,6 @@ static int bfin_can_err(struct net_device *dev, u16 isrc, u16 status) cf->data[2] |= CAN_ERR_PROT_FORM; else if (status & SER) cf->data[2] |= CAN_ERR_PROT_STUFF; - else - cf->data[2] |= CAN_ERR_PROT_UNSPEC; } priv->can.state = state; diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index 7c9892ab0a6a..f91b094288da 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -962,7 +962,6 @@ static int c_can_handle_bus_err(struct net_device *dev, * type of the last error to occur on the CAN bus */ cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; - cf->data[2] |= CAN_ERR_PROT_UNSPEC; switch (lec_type) { case LEC_STUFF_ERROR: diff --git a/drivers/net/can/janz-ican3.c b/drivers/net/can/janz-ican3.c index c1e85368a198..5d04f5464faf 100644 --- a/drivers/net/can/janz-ican3.c +++ b/drivers/net/can/janz-ican3.c @@ -1096,7 +1096,6 @@ static int ican3_handle_cevtind(struct ican3_dev *mod, struct ican3_msg *msg) cf->data[2] |= CAN_ERR_PROT_STUFF; break; default: - cf->data[2] |= CAN_ERR_PROT_UNSPEC; cf->data[3] = ecc & ECC_SEG; break; } diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 9dd3ca7a73aa..39cf911f7a1e 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -487,7 +487,6 @@ static int m_can_handle_lec_err(struct net_device *dev, * type of the last error to occur on the CAN bus */ cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; - cf->data[2] |= CAN_ERR_PROT_UNSPEC; switch (lec_type) { case LEC_STUFF_ERROR: diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c index 9161f045d44c..bc46be39549d 100644 --- a/drivers/net/can/rcar_can.c +++ b/drivers/net/can/rcar_can.c @@ -241,10 +241,9 @@ static void rcar_can_error(struct net_device *ndev) u8 ecsr; netdev_dbg(priv->ndev, "Bus error interrupt:\n"); - if (skb) { + if (skb) cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT; - cf->data[2] = CAN_ERR_PROT_UNSPEC; - } + ecsr = readb(&priv->regs->ecsr); if (ecsr & RCAR_CAN_ECSR_ADEF) { netdev_dbg(priv->ndev, "ACK Delimiter Error\n"); diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c index f10834be48a5..8dda3b703d39 100644 --- a/drivers/net/can/sja1000/sja1000.c +++ b/drivers/net/can/sja1000/sja1000.c @@ -449,7 +449,6 @@ static int sja1000_err(struct net_device *dev, uint8_t isrc, uint8_t status) cf->data[2] |= CAN_ERR_PROT_STUFF; break; default: - cf->data[2] |= CAN_ERR_PROT_UNSPEC; cf->data[3] = ecc & ECC_SEG; break; } diff --git a/drivers/net/can/sun4i_can.c b/drivers/net/can/sun4i_can.c index d9a42c646783..68ef0a4cd821 100644 --- a/drivers/net/can/sun4i_can.c +++ b/drivers/net/can/sun4i_can.c @@ -575,7 +575,6 @@ static int sun4i_can_err(struct net_device *dev, u8 isrc, u8 status) cf->data[2] |= CAN_ERR_PROT_STUFF; break; default: - cf->data[2] |= CAN_ERR_PROT_UNSPEC; cf->data[3] = (ecc & SUN4I_STA_ERR_SEG_CODE) >> 16; break; diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index 6eab4fe7e872..680d1ff07a55 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -722,7 +722,6 @@ static int ti_hecc_error(struct net_device *ndev, int int_status, if (err_status & HECC_BUS_ERROR) { ++priv->can.can_stats.bus_error; cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT; - cf->data[2] |= CAN_ERR_PROT_UNSPEC; if (err_status & HECC_CANES_FE) { hecc_set_bit(priv, HECC_CANES, HECC_CANES_FE); cf->data[2] |= CAN_ERR_PROT_FORM; diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index 2d390384ef3b..fc5b75675cd8 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -377,7 +377,6 @@ static void ems_usb_rx_err(struct ems_usb *dev, struct ems_cpc_msg *msg) cf->data[2] |= CAN_ERR_PROT_STUFF; break; default: - cf->data[2] |= CAN_ERR_PROT_UNSPEC; cf->data[3] = ecc & SJA1000_ECC_SEG; break; } diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c index 0e5a4493ba4f..113e64fcd73b 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb2.c @@ -282,7 +282,6 @@ static void esd_usb2_rx_event(struct esd_usb2_net_priv *priv, cf->data[2] |= CAN_ERR_PROT_STUFF; break; default: - cf->data[2] |= CAN_ERR_PROT_UNSPEC; cf->data[3] = ecc & SJA1000_ECC_SEG; break; } diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index 017ae5002169..a731720f1d13 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -401,7 +401,6 @@ static void usb_8dev_rx_err_msg(struct usb_8dev_priv *priv, tx_errors = 1; break; case USB_8DEV_STATUSMSG_CRC: - cf->data[2] |= CAN_ERR_PROT_UNSPEC; cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ; rx_errors = 1; break; diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 4c57ddf12684..51670b322409 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -608,10 +608,8 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr) /* Check for error interrupt */ if (isr & XCAN_IXR_ERROR_MASK) { - if (skb) { + if (skb) cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; - cf->data[2] |= CAN_ERR_PROT_UNSPEC; - } /* Check for Ack error interrupt */ if (err_status & XCAN_ESR_ACKER_MASK) { From fe761bcb9046029dbdb277de41e40c1c5ad0cf8c Mon Sep 17 00:00:00 2001 From: Shaohui Xie Date: Fri, 20 Nov 2015 11:54:08 +0800 Subject: [PATCH 045/110] net: fsl: expands dependencies of NET_VENDOR_FREESCALE Freescale hosts some ARMv8 based SoCs, and a generic convention ARCH_LAYERSCAPE is used to cover such SoCs. Adding ARCH_LAYERSCAPE to dependencies of NET_VENDOR_FREESCALE to support networking on those SoCs. The ARCH_LAYERSCAPE is introduced by: commit: 53a5fde05 arm64: Use generic Layerscape SoC family naming Signed-off-by: Shaohui Xie Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig index ff76d4e9dc1b..bee32a9d9876 100644 --- a/drivers/net/ethernet/freescale/Kconfig +++ b/drivers/net/ethernet/freescale/Kconfig @@ -7,7 +7,8 @@ config NET_VENDOR_FREESCALE default y depends on FSL_SOC || QUICC_ENGINE || CPM1 || CPM2 || PPC_MPC512x || \ M523x || M527x || M5272 || M528x || M520x || M532x || \ - ARCH_MXC || ARCH_MXS || (PPC_MPC52xx && PPC_BESTCOMM) + ARCH_MXC || ARCH_MXS || (PPC_MPC52xx && PPC_BESTCOMM) || \ + ARCH_LAYERSCAPE ---help--- If you have a network (Ethernet) card belonging to this class, say Y. From 3b13758f51de30618d9c7f3fc174d8d1a3cb13cd Mon Sep 17 00:00:00 2001 From: Nina Schiff Date: Fri, 20 Nov 2015 12:31:39 -0800 Subject: [PATCH 046/110] cgroups: Allow dynamically changing net_classid The classid of a process is changed either when a process is moved to or from a cgroup or when the net_cls.classid file is updated. Previously net_cls only supported propogating these changes to the cgroup's related sockets when a process was added or removed from the cgroup. This means it was neccessary to remove and re-add all processes to a cgroup in order to update its classid. This change introduces support for doing this dynamically - i.e. when the value is changed in the net_cls_classid file, this will also trigger an update to the classid associated with all sockets controlled by the cgroup. This mimics the behaviour of other cgroup subsystems. net_prio circumvents this issue by storing an index into a table with each socket (and so any updates to the table, don't require updating the value associated with the socket). net_cls, however, passes the socket the classid directly, and so this additional step is needed. Signed-off-by: Nina Schiff Acked-by: Tejun Heo Signed-off-by: David S. Miller --- net/core/netclassid_cgroup.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 6441f47b1a8f..2e4df84c34a1 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -56,7 +56,7 @@ static void cgrp_css_free(struct cgroup_subsys_state *css) kfree(css_cls_state(css)); } -static int update_classid(const void *v, struct file *file, unsigned n) +static int update_classid_sock(const void *v, struct file *file, unsigned n) { int err; struct socket *sock = sock_from_file(file, &err); @@ -67,18 +67,25 @@ static int update_classid(const void *v, struct file *file, unsigned n) return 0; } +static void update_classid(struct cgroup_subsys_state *css, void *v) +{ + struct css_task_iter it; + struct task_struct *p; + + css_task_iter_start(css, &it); + while ((p = css_task_iter_next(&it))) { + task_lock(p); + iterate_fd(p->files, 0, update_classid_sock, v); + task_unlock(p); + } + css_task_iter_end(&it); +} + static void cgrp_attach(struct cgroup_subsys_state *css, struct cgroup_taskset *tset) { - struct cgroup_cls_state *cs = css_cls_state(css); - void *v = (void *)(unsigned long)cs->classid; - struct task_struct *p; - - cgroup_taskset_for_each(p, tset) { - task_lock(p); - iterate_fd(p->files, 0, update_classid, v); - task_unlock(p); - } + update_classid(css, + (void *)(unsigned long)css_cls_state(css)->classid); } static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft) @@ -89,8 +96,11 @@ static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft) static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, u64 value) { - css_cls_state(css)->classid = (u32) value; + struct cgroup_cls_state *cs = css_cls_state(css); + cs->classid = (u32)value; + + update_classid(css, (void *)(unsigned long)cs->classid); return 0; } From 7d267278a9ece963d77eefec61630223fce08c6c Mon Sep 17 00:00:00 2001 From: Rainer Weikusat Date: Fri, 20 Nov 2015 22:07:23 +0000 Subject: [PATCH 047/110] unix: avoid use-after-free in ep_remove_wait_queue Rainer Weikusat writes: An AF_UNIX datagram socket being the client in an n:1 association with some server socket is only allowed to send messages to the server if the receive queue of this socket contains at most sk_max_ack_backlog datagrams. This implies that prospective writers might be forced to go to sleep despite none of the message presently enqueued on the server receive queue were sent by them. In order to ensure that these will be woken up once space becomes again available, the present unix_dgram_poll routine does a second sock_poll_wait call with the peer_wait wait queue of the server socket as queue argument (unix_dgram_recvmsg does a wake up on this queue after a datagram was received). This is inherently problematic because the server socket is only guaranteed to remain alive for as long as the client still holds a reference to it. In case the connection is dissolved via connect or by the dead peer detection logic in unix_dgram_sendmsg, the server socket may be freed despite "the polling mechanism" (in particular, epoll) still has a pointer to the corresponding peer_wait queue. There's no way to forcibly deregister a wait queue with epoll. Based on an idea by Jason Baron, the patch below changes the code such that a wait_queue_t belonging to the client socket is enqueued on the peer_wait queue of the server whenever the peer receive queue full condition is detected by either a sendmsg or a poll. A wake up on the peer queue is then relayed to the ordinary wait queue of the client socket via wake function. The connection to the peer wait queue is again dissolved if either a wake up is about to be relayed or the client socket reconnects or a dead peer is detected or the client socket is itself closed. This enables removing the second sock_poll_wait from unix_dgram_poll, thus avoiding the use-after-free, while still ensuring that no blocked writer sleeps forever. Signed-off-by: Rainer Weikusat Fixes: ec0d215f9420 ("af_unix: fix 'poll for write'/connected DGRAM sockets") Reviewed-by: Jason Baron Signed-off-by: David S. Miller --- include/net/af_unix.h | 1 + net/unix/af_unix.c | 183 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 165 insertions(+), 19 deletions(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index b36d837c701e..2a91a0561a47 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -62,6 +62,7 @@ struct unix_sock { #define UNIX_GC_CANDIDATE 0 #define UNIX_GC_MAYBE_CYCLE 1 struct socket_wq peer_wq; + wait_queue_t peer_wake; }; static inline struct unix_sock *unix_sk(const struct sock *sk) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 955ec152cb71..4e95bdf973d9 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -326,6 +326,118 @@ found: return s; } +/* Support code for asymmetrically connected dgram sockets + * + * If a datagram socket is connected to a socket not itself connected + * to the first socket (eg, /dev/log), clients may only enqueue more + * messages if the present receive queue of the server socket is not + * "too large". This means there's a second writeability condition + * poll and sendmsg need to test. The dgram recv code will do a wake + * up on the peer_wait wait queue of a socket upon reception of a + * datagram which needs to be propagated to sleeping would-be writers + * since these might not have sent anything so far. This can't be + * accomplished via poll_wait because the lifetime of the server + * socket might be less than that of its clients if these break their + * association with it or if the server socket is closed while clients + * are still connected to it and there's no way to inform "a polling + * implementation" that it should let go of a certain wait queue + * + * In order to propagate a wake up, a wait_queue_t of the client + * socket is enqueued on the peer_wait queue of the server socket + * whose wake function does a wake_up on the ordinary client socket + * wait queue. This connection is established whenever a write (or + * poll for write) hit the flow control condition and broken when the + * association to the server socket is dissolved or after a wake up + * was relayed. + */ + +static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags, + void *key) +{ + struct unix_sock *u; + wait_queue_head_t *u_sleep; + + u = container_of(q, struct unix_sock, peer_wake); + + __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait, + q); + u->peer_wake.private = NULL; + + /* relaying can only happen while the wq still exists */ + u_sleep = sk_sleep(&u->sk); + if (u_sleep) + wake_up_interruptible_poll(u_sleep, key); + + return 0; +} + +static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other) +{ + struct unix_sock *u, *u_other; + int rc; + + u = unix_sk(sk); + u_other = unix_sk(other); + rc = 0; + spin_lock(&u_other->peer_wait.lock); + + if (!u->peer_wake.private) { + u->peer_wake.private = other; + __add_wait_queue(&u_other->peer_wait, &u->peer_wake); + + rc = 1; + } + + spin_unlock(&u_other->peer_wait.lock); + return rc; +} + +static void unix_dgram_peer_wake_disconnect(struct sock *sk, + struct sock *other) +{ + struct unix_sock *u, *u_other; + + u = unix_sk(sk); + u_other = unix_sk(other); + spin_lock(&u_other->peer_wait.lock); + + if (u->peer_wake.private == other) { + __remove_wait_queue(&u_other->peer_wait, &u->peer_wake); + u->peer_wake.private = NULL; + } + + spin_unlock(&u_other->peer_wait.lock); +} + +static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk, + struct sock *other) +{ + unix_dgram_peer_wake_disconnect(sk, other); + wake_up_interruptible_poll(sk_sleep(sk), + POLLOUT | + POLLWRNORM | + POLLWRBAND); +} + +/* preconditions: + * - unix_peer(sk) == other + * - association is stable + */ +static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) +{ + int connected; + + connected = unix_dgram_peer_wake_connect(sk, other); + + if (unix_recvq_full(other)) + return 1; + + if (connected) + unix_dgram_peer_wake_disconnect(sk, other); + + return 0; +} + static int unix_writable(const struct sock *sk) { return sk->sk_state != TCP_LISTEN && @@ -431,6 +543,8 @@ static void unix_release_sock(struct sock *sk, int embrion) skpair->sk_state_change(skpair); sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); } + + unix_dgram_peer_wake_disconnect(sk, skpair); sock_put(skpair); /* It may now die */ unix_peer(sk) = NULL; } @@ -666,6 +780,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) INIT_LIST_HEAD(&u->link); mutex_init(&u->readlock); /* single task reading lock */ init_waitqueue_head(&u->peer_wait); + init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); unix_insert_socket(unix_sockets_unbound(sk), sk); out: if (sk == NULL) @@ -1033,6 +1148,8 @@ restart: if (unix_peer(sk)) { struct sock *old_peer = unix_peer(sk); unix_peer(sk) = other; + unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); + unix_state_double_unlock(sk, other); if (other != old_peer) @@ -1472,6 +1589,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, struct scm_cookie scm; int max_level; int data_len = 0; + int sk_locked; wait_for_unix_gc(); err = scm_send(sock, msg, &scm, false); @@ -1550,12 +1668,14 @@ restart: goto out_free; } + sk_locked = 0; unix_state_lock(other); +restart_locked: err = -EPERM; if (!unix_may_send(sk, other)) goto out_unlock; - if (sock_flag(other, SOCK_DEAD)) { + if (unlikely(sock_flag(other, SOCK_DEAD))) { /* * Check with 1003.1g - what should * datagram error @@ -1563,10 +1683,14 @@ restart: unix_state_unlock(other); sock_put(other); + if (!sk_locked) + unix_state_lock(sk); + err = 0; - unix_state_lock(sk); if (unix_peer(sk) == other) { unix_peer(sk) = NULL; + unix_dgram_peer_wake_disconnect_wakeup(sk, other); + unix_state_unlock(sk); unix_dgram_disconnected(sk, other); @@ -1592,21 +1716,38 @@ restart: goto out_unlock; } - if (unix_peer(other) != sk && unix_recvq_full(other)) { - if (!timeo) { + if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) { + if (timeo) { + timeo = unix_wait_for_peer(other, timeo); + + err = sock_intr_errno(timeo); + if (signal_pending(current)) + goto out_free; + + goto restart; + } + + if (!sk_locked) { + unix_state_unlock(other); + unix_state_double_lock(sk, other); + } + + if (unix_peer(sk) != other || + unix_dgram_peer_wake_me(sk, other)) { err = -EAGAIN; + sk_locked = 1; goto out_unlock; } - timeo = unix_wait_for_peer(other, timeo); - - err = sock_intr_errno(timeo); - if (signal_pending(current)) - goto out_free; - - goto restart; + if (!sk_locked) { + sk_locked = 1; + goto restart_locked; + } } + if (unlikely(sk_locked)) + unix_state_unlock(sk); + if (sock_flag(other, SOCK_RCVTSTAMP)) __net_timestamp(skb); maybe_add_creds(skb, sock, other); @@ -1620,6 +1761,8 @@ restart: return len; out_unlock: + if (sk_locked) + unix_state_unlock(sk); unix_state_unlock(other); out_free: kfree_skb(skb); @@ -2476,14 +2619,16 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, return mask; writable = unix_writable(sk); - other = unix_peer_get(sk); - if (other) { - if (unix_peer(other) != sk) { - sock_poll_wait(file, &unix_sk(other)->peer_wait, wait); - if (unix_recvq_full(other)) - writable = 0; - } - sock_put(other); + if (writable) { + unix_state_lock(sk); + + other = unix_peer(sk); + if (other && unix_peer(other) != sk && + unix_recvq_full(other) && + unix_dgram_peer_wake_me(sk, other)) + writable = 0; + + unix_state_unlock(sk); } if (writable) From 3d1a54e801b661fdc7a409cfc350b6ee555e00fc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 21 Nov 2015 13:34:12 +0300 Subject: [PATCH 048/110] net/hsr: fix a warning message WARN_ON_ONCE() takes a condition, it doesn't take an error message. I have converted this to WARN() instead. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/hsr/hsr_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 35a9788bb3ae..c7d1adca30d8 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -312,7 +312,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master, u8 type) return; out: - WARN_ON_ONCE("HSR: Could not send supervision frame\n"); + WARN_ONCE(1, "HSR: Could not send supervision frame\n"); kfree_skb(skb); } From 7f109f7cc37108cba7243bc832988525b0d85909 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 21 Nov 2015 19:46:19 +0100 Subject: [PATCH 049/110] vrf: fix double free and memory corruption on register_netdevice failure When vrf's ->newlink is called, if register_netdevice() fails then it does free_netdev(), but that's also done by rtnl_newlink() so a second free happens and memory gets corrupted, to reproduce execute the following line a couple of times (1 - 5 usually is enough): $ for i in `seq 1 5`; do ip link add vrf: type vrf table 1; done; This works because we fail in register_netdevice() because of the wrong name "vrf:". And here's a trace of one crash: [ 28.792157] ------------[ cut here ]------------ [ 28.792407] kernel BUG at fs/namei.c:246! [ 28.792608] invalid opcode: 0000 [#1] SMP [ 28.793240] Modules linked in: vrf nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace sunrpc crct10dif_pclmul crc32_pclmul crc32c_intel qxl drm_kms_helper ttm drm aesni_intel aes_x86_64 psmouse glue_helper lrw evdev gf128mul i2c_piix4 ablk_helper cryptd ppdev parport_pc parport serio_raw pcspkr virtio_balloon virtio_console i2c_core acpi_cpufreq button 9pnet_virtio 9p 9pnet fscache ipv6 autofs4 ext4 crc16 mbcache jbd2 virtio_blk virtio_net sg sr_mod cdrom ata_generic ehci_pci uhci_hcd ehci_hcd e1000 usbcore usb_common ata_piix libata virtio_pci virtio_ring virtio scsi_mod floppy [ 28.796016] CPU: 0 PID: 1148 Comm: ld-linux-x86-64 Not tainted 4.4.0-rc1+ #24 [ 28.796016] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.1-20150318_183358- 04/01/2014 [ 28.796016] task: ffff8800352561c0 ti: ffff88003592c000 task.ti: ffff88003592c000 [ 28.796016] RIP: 0010:[] [] putname+0x43/0x60 [ 28.796016] RSP: 0018:ffff88003592fe88 EFLAGS: 00010246 [ 28.796016] RAX: 0000000000000000 RBX: ffff8800352561c0 RCX: 0000000000000001 [ 28.796016] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88003784f000 [ 28.796016] RBP: ffff88003592ff08 R08: 0000000000000001 R09: 0000000000000000 [ 28.796016] R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000 [ 28.796016] R13: 000000000000047c R14: ffff88003784f000 R15: ffff8800358c4a00 [ 28.796016] FS: 0000000000000000(0000) GS:ffff88003fc00000(0000) knlGS:0000000000000000 [ 28.796016] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 28.796016] CR2: 00007ffd583bc2d9 CR3: 0000000035a99000 CR4: 00000000000406f0 [ 28.796016] Stack: [ 28.796016] ffffffff8121045d ffffffff812102d3 ffff8800352561c0 ffff880035a91660 [ 28.796016] ffff8800008a9880 0000000000000000 ffffffff81a49940 00ffffff81218684 [ 28.796016] ffff8800352561c0 000000000000047c 0000000000000000 ffff880035b36d80 [ 28.796016] Call Trace: [ 28.796016] [] ? do_execveat_common.isra.34+0x74d/0x930 [ 28.796016] [] ? do_execveat_common.isra.34+0x5c3/0x930 [ 28.796016] [] do_execve+0x2c/0x30 [ 28.796016] [] call_usermodehelper_exec_async+0xf0/0x140 [ 28.796016] [] ? umh_complete+0x40/0x40 [ 28.796016] [] ret_from_fork+0x3f/0x70 [ 28.796016] Code: 48 8d 47 1c 48 89 e5 53 48 8b 37 48 89 fb 48 39 c6 74 1a 48 8b 3d 7e e9 8f 00 e8 49 fa fc ff 48 89 df e8 f1 01 fd ff 5b 5d f3 c3 <0f> 0b 48 89 fe 48 8b 3d 61 e9 8f 00 e8 2c fa fc ff 5b 5d eb e9 [ 28.796016] RIP [] putname+0x43/0x60 [ 28.796016] RSP Fixes: 193125dbd8eb ("net: Introduce VRF device driver") Signed-off-by: Nikolay Aleksandrov Acked-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 92fa3e1ea65c..4f9748457f5a 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -907,7 +907,6 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct net_vrf *vrf = netdev_priv(dev); - int err; if (!data || !data[IFLA_VRF_TABLE]) return -EINVAL; @@ -916,15 +915,7 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev, dev->priv_flags |= IFF_L3MDEV_MASTER; - err = register_netdevice(dev); - if (err < 0) - goto out_fail; - - return 0; - -out_fail: - free_netdev(dev); - return err; + return register_netdevice(dev); } static size_t vrf_nl_getsize(const struct net_device *dev) From 3c25a860d17b7378822f35d8c9141db9507e3beb Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 22 Nov 2015 01:08:54 +0200 Subject: [PATCH 050/110] broadcom: fix PHY_ID_BCM5481 entry in the id table Commit fcb26ec5b18d ("broadcom: move all PHY_ID's to header") updated broadcom_tbl to use PHY_IDs, but incorrectly replaced 0x0143bca0 with PHY_ID_BCM5482 (making a duplicate entry, and completely omitting the original). Fix that. Fixes: fcb26ec5b18d ("broadcom: move all PHY_ID's to header") Signed-off-by: Aaro Koskinen Signed-off-by: David S. Miller --- drivers/net/phy/broadcom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 07a6119121c3..3ce5d9514623 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -614,7 +614,7 @@ static struct mdio_device_id __maybe_unused broadcom_tbl[] = { { PHY_ID_BCM5461, 0xfffffff0 }, { PHY_ID_BCM54616S, 0xfffffff0 }, { PHY_ID_BCM5464, 0xfffffff0 }, - { PHY_ID_BCM5482, 0xfffffff0 }, + { PHY_ID_BCM5481, 0xfffffff0 }, { PHY_ID_BCM5482, 0xfffffff0 }, { PHY_ID_BCM50610, 0xfffffff0 }, { PHY_ID_BCM50610M, 0xfffffff0 }, From f4195d1eac954a67adf112dd53404560cc55b942 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Sun, 22 Nov 2015 15:46:05 +0800 Subject: [PATCH 051/110] tipc: avoid packets leaking on socket receive queue Even if we drain receive queue thoroughly in tipc_release() after tipc socket is removed from rhashtable, it is possible that some packets are in flight because some CPU runs receiver and did rhashtable lookup before we removed socket. They will achieve receive queue, but nobody delete them at all. To avoid this leak, we register a private socket destructor to purge receive queue, meaning releasing packets pending on receive queue will be delayed until the last reference of tipc socket will be released. Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/socket.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 552dbaba9cf3..b53246fb0412 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -105,6 +105,7 @@ struct tipc_sock { static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb); static void tipc_data_ready(struct sock *sk); static void tipc_write_space(struct sock *sk); +static void tipc_sock_destruct(struct sock *sk); static int tipc_release(struct socket *sock); static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags); static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p); @@ -381,6 +382,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, sk->sk_rcvbuf = sysctl_tipc_rmem[1]; sk->sk_data_ready = tipc_data_ready; sk->sk_write_space = tipc_write_space; + sk->sk_destruct = tipc_sock_destruct; tsk->conn_timeout = CONN_TIMEOUT_DEFAULT; tsk->sent_unacked = 0; atomic_set(&tsk->dupl_rcvcnt, 0); @@ -470,9 +472,6 @@ static int tipc_release(struct socket *sock) tipc_node_remove_conn(net, dnode, tsk->portid); } - /* Discard any remaining (connection-based) messages in receive queue */ - __skb_queue_purge(&sk->sk_receive_queue); - /* Reject any messages that accumulated in backlog queue */ sock->state = SS_DISCONNECTING; release_sock(sk); @@ -1515,6 +1514,11 @@ static void tipc_data_ready(struct sock *sk) rcu_read_unlock(); } +static void tipc_sock_destruct(struct sock *sk) +{ + __skb_queue_purge(&sk->sk_receive_queue); +} + /** * filter_connect - Handle all incoming messages for a connection-based socket * @tsk: TIPC socket From 7098356baca723513e97ca0020df4e18bc353be3 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Tue, 24 Nov 2015 13:57:57 +0800 Subject: [PATCH 052/110] tipc: fix error handling of expanding buffer headroom Coverity says: *** CID 1338065: Error handling issues (CHECKED_RETURN) /net/tipc/udp_media.c: 162 in tipc_udp_send_msg() 156 struct udp_media_addr *dst = (struct udp_media_addr *)&dest->value; 157 struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value; 158 struct sk_buff *clone; 159 struct rtable *rt; 160 161 if (skb_headroom(skb) < UDP_MIN_HEADROOM) >>> CID 1338065: Error handling issues (CHECKED_RETURN) >>> Calling "pskb_expand_head" without checking return value (as is done elsewhere 51 out of 56 times). 162 pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC); 163 164 clone = skb_clone(skb, GFP_ATOMIC); 165 skb_set_inner_protocol(clone, htons(ETH_P_TIPC)); 166 ub = rcu_dereference_rtnl(b->media_ptr); 167 if (!ub) { When expanding buffer headroom over udp tunnel with pskb_expand_head(), it's unfortunate that we don't check its return value. As a result, if the function returns an error code due to the lack of memory, it may cause unpredictable consequence as we unconditionally consider that it's always successful. Fixes: e53567948f82 ("tipc: conditionally expand buffer headroom over udp tunnel") Reported-by: Cc: Stephen Hemminger Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/udp_media.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index ad2719ad4c1b..70c03271b798 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -158,8 +158,11 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value; struct rtable *rt; - if (skb_headroom(skb) < UDP_MIN_HEADROOM) - pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC); + if (skb_headroom(skb) < UDP_MIN_HEADROOM) { + err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC); + if (err) + goto tx_error; + } skb_set_inner_protocol(skb, htons(ETH_P_TIPC)); ub = rcu_dereference_rtnl(b->media_ptr); From 6527f833bf3fa34ed53e10b8010760fff42169f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Mon, 23 Nov 2015 14:32:10 +0100 Subject: [PATCH 053/110] net: cdc_ncm: fix NULL pointer deref in cdc_ncm_bind_common MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 77b0a099674a ("cdc-ncm: use common parser") added a dangerous new trust in the CDC functional descriptors presented by the device, unconditionally assuming that any device handled by the driver has a CDC Union descriptor. This descriptor is required by the NCM and MBIM specs, but crashing on non-compliant devices is still unacceptable. Not only will that allow malicious devices to crash the kernel, but in this case it is also well known that there are non-compliant real devices on the market - as shown by the comment accompanying the IAD workaround in the same function. The Sierra Wireless EM7305 is an example of such device, having a CDC header and a CDC MBIM descriptor but no CDC Union: Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 12 bAlternateSetting 0 bNumEndpoints 1 bInterfaceClass 2 Communications bInterfaceSubClass 14 bInterfaceProtocol 0 iInterface 0 CDC Header: bcdCDC 1.10 CDC MBIM: bcdMBIMVersion 1.00 wMaxControlMessage 4096 bNumberFilters 16 bMaxFilterSize 128 wMaxSegmentSize 4064 bmNetworkCapabilities 0x20 8-byte ntb input size Endpoint Descriptor: .. The conversion to a common parser also left the local cdc_union variable untouched. This caused the IAD workaround code to be applied to all devices with an IAD descriptor, which was never intended. Finish the conversion by testing for hdr.usb_cdc_union_desc instead. Cc: Oliver Neukum Fixes: 77b0a099674a ("cdc-ncm: use common parser") Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index a187f08113ec..3b1ba8237768 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -691,7 +691,6 @@ static void cdc_ncm_free(struct cdc_ncm_ctx *ctx) int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting, int drvflags) { - const struct usb_cdc_union_desc *union_desc = NULL; struct cdc_ncm_ctx *ctx; struct usb_driver *driver; u8 *buf; @@ -725,15 +724,16 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_ /* parse through descriptors associated with control interface */ cdc_parse_cdc_header(&hdr, intf, buf, len); - ctx->data = usb_ifnum_to_if(dev->udev, - hdr.usb_cdc_union_desc->bSlaveInterface0); + if (hdr.usb_cdc_union_desc) + ctx->data = usb_ifnum_to_if(dev->udev, + hdr.usb_cdc_union_desc->bSlaveInterface0); ctx->ether_desc = hdr.usb_cdc_ether_desc; ctx->func_desc = hdr.usb_cdc_ncm_desc; ctx->mbim_desc = hdr.usb_cdc_mbim_desc; ctx->mbim_extended_desc = hdr.usb_cdc_mbim_extended_desc; /* some buggy devices have an IAD but no CDC Union */ - if (!union_desc && intf->intf_assoc && intf->intf_assoc->bInterfaceCount == 2) { + if (!hdr.usb_cdc_union_desc && intf->intf_assoc && intf->intf_assoc->bInterfaceCount == 2) { ctx->data = usb_ifnum_to_if(dev->udev, intf->cur_altsetting->desc.bInterfaceNumber + 1); dev_dbg(&intf->dev, "CDC Union missing - got slave from IAD\n"); } From aeb20b6b3f4e1f88ce6a2802dabc667b607412ef Mon Sep 17 00:00:00 2001 From: Iyappan Subramanian Date: Mon, 23 Nov 2015 12:04:52 -0800 Subject: [PATCH 054/110] drivers: net: xgene: fix: ifconfig up/down crash Fixing kernel crash when doing ifconfig down and up in a loop, [ 124.028237] Call trace: [ 124.030670] [] memcpy+0x20/0x180 [ 124.035436] [] skb_clone+0x3c/0xa8 [ 124.040374] [] __skb_tstamp_tx+0xc0/0x118 [ 124.045918] [] skb_tstamp_tx+0x10/0x1c [ 124.051203] [] xgene_enet_start_xmit+0x2e4/0x33c [ 124.057352] [] dev_hard_start_xmit+0x2e8/0x400 [ 124.063327] [] sch_direct_xmit+0x90/0x1d4 [ 124.068870] [] __dev_queue_xmit+0x28c/0x498 [ 124.074585] [] dev_queue_xmit_sk+0x10/0x1c [ 124.080216] [] ip_finish_output2+0x3d0/0x438 [ 124.086017] [] ip_finish_output+0x198/0x1ac [ 124.091732] [] ip_output+0xec/0x164 [ 124.096755] [] ip_local_out_sk+0x38/0x48 [ 124.102211] [] ip_queue_xmit+0x288/0x330 [ 124.107668] [] tcp_transmit_skb+0x908/0x964 [ 124.113383] [] tcp_send_ack+0x128/0x138 [ 124.118753] [] __tcp_ack_snd_check+0x5c/0x94 [ 124.124555] [] tcp_rcv_established+0x554/0x68c [ 124.130530] [] tcp_v4_do_rcv+0xa4/0x37c [ 124.135900] [] release_sock+0xb4/0x150 [ 124.141184] [] tcp_recvmsg+0x448/0x9e0 [ 124.146468] [] inet_recvmsg+0xa0/0xc0 [ 124.151666] [] sock_recvmsg+0x10/0x1c [ 124.156863] [] SyS_recvfrom+0xa4/0xf8 [ 124.162061] Code: f2400c84 540001c0 cb040042 36000064 (38401423) [ 124.168133] ---[ end trace 7ab2550372e8a65b ]--- The fix was to reorder napi_enable, napi_disable, request_irq and free_irq calls, move register_netdev after dma_coerce_mask_and_coherent. Signed-off-by: Iyappan Subramanian Tested-by: Khuong Dinh Signed-off-by: David S. Miller --- .../net/ethernet/apm/xgene/xgene_enet_main.c | 33 ++++++++++--------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 991412ce6f48..1adfe7036843 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -688,10 +688,10 @@ static int xgene_enet_open(struct net_device *ndev) mac_ops->tx_enable(pdata); mac_ops->rx_enable(pdata); + xgene_enet_napi_enable(pdata); ret = xgene_enet_register_irq(ndev); if (ret) return ret; - xgene_enet_napi_enable(pdata); if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) phy_start(pdata->phy_dev); @@ -715,13 +715,13 @@ static int xgene_enet_close(struct net_device *ndev) else cancel_delayed_work_sync(&pdata->link_work); - xgene_enet_napi_disable(pdata); - xgene_enet_free_irq(ndev); - xgene_enet_process_ring(pdata->rx_ring, -1); - mac_ops->tx_disable(pdata); mac_ops->rx_disable(pdata); + xgene_enet_free_irq(ndev); + xgene_enet_napi_disable(pdata); + xgene_enet_process_ring(pdata->rx_ring, -1); + return 0; } @@ -1474,30 +1474,33 @@ static int xgene_enet_probe(struct platform_device *pdev) } ndev->hw_features = ndev->features; - ret = register_netdev(ndev); - if (ret) { - netdev_err(ndev, "Failed to register netdev\n"); - goto err; - } - ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(64)); if (ret) { netdev_err(ndev, "No usable DMA configuration\n"); goto err; } + ret = register_netdev(ndev); + if (ret) { + netdev_err(ndev, "Failed to register netdev\n"); + goto err; + } + ret = xgene_enet_init_hw(pdata); if (ret) goto err; - xgene_enet_napi_add(pdata); mac_ops = pdata->mac_ops; - if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) + if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) { ret = xgene_enet_mdio_config(pdata); - else + if (ret) + goto err; + } else { INIT_DELAYED_WORK(&pdata->link_work, mac_ops->link_state); + } - return ret; + xgene_enet_napi_add(pdata); + return 0; err: unregister_netdev(ndev); free_netdev(ndev); From 264640fc2c5f4f913db5c73fa3eb1ead2c45e9d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= Date: Tue, 24 Nov 2015 15:07:11 +0100 Subject: [PATCH 055/110] ipv6: distinguish frag queues by device for multicast and link-local packets If a fragmented multicast packet is received on an ethernet device which has an active macvlan on top of it, each fragment is duplicated and received both on the underlying device and the macvlan. If some fragments for macvlan are processed before the whole packet for the underlying device is reassembled, the "overlapping fragments" test in ip6_frag_queue() discards the whole fragment queue. To resolve this, add device ifindex to the search key and require it to match reassembling multicast packets and packets to link-local addresses. Note: similar patch has been already submitted by Yoshifuji Hideaki in http://patchwork.ozlabs.org/patch/220979/ but got lost and forgotten for some reason. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- include/net/ipv6.h | 1 + net/ipv6/netfilter/nf_conntrack_reasm.c | 5 +++-- net/ipv6/reassembly.c | 10 +++++++--- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index e1a10b0ac0b0..ea5a13ef85a6 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -490,6 +490,7 @@ struct ip6_create_arg { u32 user; const struct in6_addr *src; const struct in6_addr *dst; + int iif; u8 ecn; }; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index d5efeb87350e..bab4441ed4e4 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -190,7 +190,7 @@ static void nf_ct_frag6_expire(unsigned long data) /* Creation primitives. */ static inline struct frag_queue *fq_find(struct net *net, __be32 id, u32 user, struct in6_addr *src, - struct in6_addr *dst, u8 ecn) + struct in6_addr *dst, int iif, u8 ecn) { struct inet_frag_queue *q; struct ip6_create_arg arg; @@ -200,6 +200,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id, arg.user = user; arg.src = src; arg.dst = dst; + arg.iif = iif; arg.ecn = ecn; local_bh_disable(); @@ -601,7 +602,7 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use fhdr = (struct frag_hdr *)skb_transport_header(clone); fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, - ip6_frag_ecn(hdr)); + skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr)); if (fq == NULL) { pr_debug("Can't find and can't create new queue\n"); goto ret_orig; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 44e21a03cfc3..45f5ae51de65 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -108,7 +108,10 @@ bool ip6_frag_match(const struct inet_frag_queue *q, const void *a) return fq->id == arg->id && fq->user == arg->user && ipv6_addr_equal(&fq->saddr, arg->src) && - ipv6_addr_equal(&fq->daddr, arg->dst); + ipv6_addr_equal(&fq->daddr, arg->dst) && + (arg->iif == fq->iif || + !(ipv6_addr_type(arg->dst) & (IPV6_ADDR_MULTICAST | + IPV6_ADDR_LINKLOCAL))); } EXPORT_SYMBOL(ip6_frag_match); @@ -180,7 +183,7 @@ static void ip6_frag_expire(unsigned long data) static struct frag_queue * fq_find(struct net *net, __be32 id, const struct in6_addr *src, - const struct in6_addr *dst, u8 ecn) + const struct in6_addr *dst, int iif, u8 ecn) { struct inet_frag_queue *q; struct ip6_create_arg arg; @@ -190,6 +193,7 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src, arg.user = IP6_DEFRAG_LOCAL_DELIVER; arg.src = src; arg.dst = dst; + arg.iif = iif; arg.ecn = ecn; hash = inet6_hash_frag(id, src, dst); @@ -551,7 +555,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) } fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, - ip6_frag_ecn(hdr)); + skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr)); if (fq) { int ret; From 33c40e242ce681092ab778c238f3fff5a345ee0e Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 24 Nov 2015 14:41:59 +0000 Subject: [PATCH 056/110] rxrpc: Correctly handle ack at end of client call transmit phase Normally, the transmit phase of a client call is implicitly ack'd by the reception of the first data packet of the response being received. However, if a security negotiation happens, the transmit phase, if it is entirely contained in a single packet, may get an ack packet in response and then may get aborted due to security negotiation failure. Because the client has shifted state to RXRPC_CALL_CLIENT_AWAIT_REPLY due to having transmitted all the data, the code that handles processing of the received ack packet doesn't note the hard ack the data packet. The following abort packet in the case of security negotiation failure then incurs an assertion failure when it tries to drain the Tx queue because the hard ack state is out of sync (hard ack means the packets have been processed and can be discarded by the sender; a soft ack means that the packets are received but could still be discarded and rerequested by the receiver). To fix this, we should record the hard ack we received for the ack packet. The assertion failure looks like: RxRPC: Assertion failed 1 <= 0 is false 0x1 <= 0x0 is false ------------[ cut here ]------------ kernel BUG at ../net/rxrpc/ar-ack.c:431! ... RIP: 0010:[] [] rxrpc_rotate_tx_window+0xbc/0x131 [af_rxrpc] ... Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/ar-ack.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c index e0547f521f20..adc555e0323d 100644 --- a/net/rxrpc/ar-ack.c +++ b/net/rxrpc/ar-ack.c @@ -723,8 +723,10 @@ process_further: if ((call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY || call->state == RXRPC_CALL_SERVER_AWAIT_ACK) && - hard > tx) + hard > tx) { + call->acks_hard = tx; goto all_acked; + } smp_rmb(); rxrpc_rotate_tx_window(call, hard - 1); From fbdd29bfd2da979b7ac6a0084af56624156c1069 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 24 Nov 2015 17:09:30 +0100 Subject: [PATCH 057/110] net: ipmr, ip6mr: fix vif/tunnel failure race condition Since (at least) commit b17a7c179dd3 ("[NET]: Do sysfs registration as part of register_netdevice."), netdev_run_todo() deals only with unregistration, so we don't need to do the rtnl_unlock/lock cycle to finish registration when failing pimreg or dvmrp device creation. In fact that opens a race condition where someone can delete the device while rtnl is unlocked because it's fully registered. The problem gets worse when netlink support is introduced as there are more points of entry that can cause it and it also makes reusing that code correctly impossible. Signed-off-by: Nikolay Aleksandrov Reviewed-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 8 -------- net/ipv6/ip6mr.c | 4 ---- 2 files changed, 12 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 292123bc30fa..c3a38353f5dc 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -441,10 +441,6 @@ struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) return dev; failure: - /* allow the register to be completed before unregistering. */ - rtnl_unlock(); - rtnl_lock(); - unregister_netdevice(dev); return NULL; } @@ -540,10 +536,6 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) return dev; failure: - /* allow the register to be completed before unregistering. */ - rtnl_unlock(); - rtnl_lock(); - unregister_netdevice(dev); return NULL; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 7a4a1b81dbb6..a10e77103c88 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -765,10 +765,6 @@ static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt) return dev; failure: - /* allow the register to be completed before unregistering. */ - rtnl_unlock(); - rtnl_lock(); - unregister_netdevice(dev); return NULL; } From 20f795666d3accbb75969730019aeb03f50ef0ec Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Tue, 24 Nov 2015 13:51:53 -0500 Subject: [PATCH 058/110] net: openvswitch: Remove invalid comment During pre-upstream development, the openvswitch datapath used a custom hashtable to store vports that could fail on delete due to lack of memory. However, prior to upstream submission, this code was reworked to use an hlist based hastable with flexible-array based buckets. As such the failure condition was eliminated from the vport_del path, rendering this comment invalid. Signed-off-by: Aaron Conole Signed-off-by: David S. Miller --- net/openvswitch/vport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 0ac0fd004d7e..e194c10a1889 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -256,8 +256,8 @@ int ovs_vport_set_options(struct vport *vport, struct nlattr *options) * * @vport: vport to delete. * - * Detaches @vport from its datapath and destroys it. It is possible to fail - * for reasons such as lack of memory. ovs_mutex must be held. + * Detaches @vport from its datapath and destroys it. ovs_mutex must + * be held. */ void ovs_vport_del(struct vport *vport) { From 8c7188b23474cca017b3ef354c4a58456f68303a Mon Sep 17 00:00:00 2001 From: Quentin Casasnovas Date: Tue, 24 Nov 2015 17:13:21 -0500 Subject: [PATCH 059/110] RDS: fix race condition when sending a message on unbound socket Sasha's found a NULL pointer dereference in the RDS connection code when sending a message to an apparently unbound socket. The problem is caused by the code checking if the socket is bound in rds_sendmsg(), which checks the rs_bound_addr field without taking a lock on the socket. This opens a race where rs_bound_addr is temporarily set but where the transport is not in rds_bind(), leading to a NULL pointer dereference when trying to dereference 'trans' in __rds_conn_create(). Vegard wrote a reproducer for this issue, so kindly ask him to share if you're interested. I cannot reproduce the NULL pointer dereference using Vegard's reproducer with this patch, whereas I could without. Complete earlier incomplete fix to CVE-2015-6937: 74e98eb08588 ("RDS: verify the underlying transport exists before creating a connection") Cc: David S. Miller Cc: stable@vger.kernel.org Reviewed-by: Vegard Nossum Reviewed-by: Sasha Levin Acked-by: Santosh Shilimkar Signed-off-by: Quentin Casasnovas Signed-off-by: David S. Miller --- net/rds/connection.c | 6 ------ net/rds/send.c | 4 +++- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/net/rds/connection.c b/net/rds/connection.c index d4564036a339..e3b118cae81d 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -186,12 +186,6 @@ static struct rds_connection *__rds_conn_create(struct net *net, } } - if (trans == NULL) { - kmem_cache_free(rds_conn_slab, conn); - conn = ERR_PTR(-ENODEV); - goto out; - } - conn->c_trans = trans; ret = trans->conn_alloc(conn, gfp); diff --git a/net/rds/send.c b/net/rds/send.c index 827155c2ead1..c9cdb358ea88 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1013,11 +1013,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) release_sock(sk); } - /* racing with another thread binding seems ok here */ + lock_sock(sk); if (daddr == 0 || rs->rs_bound_addr == 0) { + release_sock(sk); ret = -ENOTCONN; /* XXX not a great errno */ goto out; } + release_sock(sk); if (payload_len > rds_sk_sndbuf(rs)) { ret = -EMSGSIZE; From 19cebbcb04c8277bb8a7905957c8af11967c4e28 Mon Sep 17 00:00:00 2001 From: Christoph Biedl Date: Wed, 25 Nov 2015 07:47:40 +0100 Subject: [PATCH 060/110] isdn: Partially revert debug format string usage clean up Commit 35a4a57 ("isdn: clean up debug format string usage") introduced a safeguard to avoid accidential format string interpolation of data when calling debugl1 or HiSax_putstatus. This did however not take into account VHiSax_putstatus (called by HiSax_putstatus) does *not* call vsprintf if the head parameter is NULL - the format string is treated as plain text then instead. As a result, the string "%s" is processed literally, and the actual information is lost. This affects the isdnlog userspace program which stopped logging information since that commit. So revert the HiSax_putstatus invocations to the previous state. Fixes: 35a4a5733b0a ("isdn: clean up debug format string usage") Cc: Kees Cook Cc: Karsten Keil Signed-off-by: Christoph Biedl Signed-off-by: David S. Miller --- drivers/isdn/hisax/config.c | 2 +- drivers/isdn/hisax/hfc_pci.c | 2 +- drivers/isdn/hisax/hfc_sx.c | 2 +- drivers/isdn/hisax/q931.c | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/isdn/hisax/config.c b/drivers/isdn/hisax/config.c index b33f53b3ca93..bf04d2a3cf4a 100644 --- a/drivers/isdn/hisax/config.c +++ b/drivers/isdn/hisax/config.c @@ -1896,7 +1896,7 @@ static void EChannel_proc_rcv(struct hisax_d_if *d_if) ptr--; *ptr++ = '\n'; *ptr = 0; - HiSax_putstatus(cs, NULL, "%s", cs->dlog); + HiSax_putstatus(cs, NULL, cs->dlog); } else HiSax_putstatus(cs, "LogEcho: ", "warning Frame too big (%d)", diff --git a/drivers/isdn/hisax/hfc_pci.c b/drivers/isdn/hisax/hfc_pci.c index 4a4825528188..90449e1e91e5 100644 --- a/drivers/isdn/hisax/hfc_pci.c +++ b/drivers/isdn/hisax/hfc_pci.c @@ -901,7 +901,7 @@ Begin: ptr--; *ptr++ = '\n'; *ptr = 0; - HiSax_putstatus(cs, NULL, "%s", cs->dlog); + HiSax_putstatus(cs, NULL, cs->dlog); } else HiSax_putstatus(cs, "LogEcho: ", "warning Frame too big (%d)", total - 3); } diff --git a/drivers/isdn/hisax/hfc_sx.c b/drivers/isdn/hisax/hfc_sx.c index b1fad81f0722..13b2151c10f5 100644 --- a/drivers/isdn/hisax/hfc_sx.c +++ b/drivers/isdn/hisax/hfc_sx.c @@ -674,7 +674,7 @@ receive_emsg(struct IsdnCardState *cs) ptr--; *ptr++ = '\n'; *ptr = 0; - HiSax_putstatus(cs, NULL, "%s", cs->dlog); + HiSax_putstatus(cs, NULL, cs->dlog); } else HiSax_putstatus(cs, "LogEcho: ", "warning Frame too big (%d)", skb->len); } diff --git a/drivers/isdn/hisax/q931.c b/drivers/isdn/hisax/q931.c index b420f8bd862e..ba4beb25d872 100644 --- a/drivers/isdn/hisax/q931.c +++ b/drivers/isdn/hisax/q931.c @@ -1179,7 +1179,7 @@ LogFrame(struct IsdnCardState *cs, u_char *buf, int size) dp--; *dp++ = '\n'; *dp = 0; - HiSax_putstatus(cs, NULL, "%s", cs->dlog); + HiSax_putstatus(cs, NULL, cs->dlog); } else HiSax_putstatus(cs, "LogFrame: ", "warning Frame too big (%d)", size); } @@ -1246,7 +1246,7 @@ dlogframe(struct IsdnCardState *cs, struct sk_buff *skb, int dir) } if (finish) { *dp = 0; - HiSax_putstatus(cs, NULL, "%s", cs->dlog); + HiSax_putstatus(cs, NULL, cs->dlog); return; } if ((0xfe & buf[0]) == PROTO_DIS_N0) { /* 1TR6 */ @@ -1509,5 +1509,5 @@ dlogframe(struct IsdnCardState *cs, struct sk_buff *skb, int dir) dp += sprintf(dp, "Unknown protocol %x!", buf[0]); } *dp = 0; - HiSax_putstatus(cs, NULL, "%s", cs->dlog); + HiSax_putstatus(cs, NULL, cs->dlog); } From c9da161c6517ba12154059d3b965c2cbaf16f90f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 24 Nov 2015 21:28:15 +0100 Subject: [PATCH 061/110] bpf: fix clearing on persistent program array maps Currently, when having map file descriptors pointing to program arrays, there's still the issue that we unconditionally flush program array contents via bpf_fd_array_map_clear() in bpf_map_release(). This happens when such a file descriptor is released and is independent of the map's refcount. Having this flush independent of the refcount is for a reason: there can be arbitrary complex dependency chains among tail calls, also circular ones (direct or indirect, nesting limit determined during runtime), and we need to make sure that the map drops all references to eBPF programs it holds, so that the map's refcount can eventually drop to zero and initiate its freeing. Btw, a walk of the whole dependency graph would not be possible for various reasons, one being complexity and another one inconsistency, i.e. new programs can be added to parts of the graph at any time, so there's no guaranteed consistent state for the time of such a walk. Now, the program array pinning itself works, but the issue is that each derived file descriptor on close would nevertheless call unconditionally into bpf_fd_array_map_clear(). Instead, keep track of users and postpone this flush until the last reference to a user is dropped. As this only concerns a subset of references (f.e. a prog array could hold a program that itself has reference on the prog array holding it, etc), we need to track them separately. Short analysis on the refcounting: on map creation time usercnt will be one, so there's no change in behaviour for bpf_map_release(), if unpinned. If we already fail in map_create(), we are immediately freed, and no file descriptor has been made public yet. In bpf_obj_pin_user(), we need to probe for a possible map in bpf_fd_probe_obj() already with a usercnt reference, so before we drop the reference on the fd with fdput(). Therefore, if actual pinning fails, we need to drop that reference again in bpf_any_put(), otherwise we keep holding it. When last reference drops on the inode, the bpf_any_put() in bpf_evict_inode() will take care of dropping the usercnt again. In the bpf_obj_get_user() case, the bpf_any_get() will grab a reference on the usercnt, still at a time when we have the reference on the path. Should we later on fail to grab a new file descriptor, bpf_any_put() will drop it, otherwise we hold it until bpf_map_release() time. Joint work with Alexei. Fixes: b2197755b263 ("bpf: add support for persistent maps/progs") Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 5 ++++- kernel/bpf/inode.c | 6 +++--- kernel/bpf/syscall.c | 36 +++++++++++++++++++++++++----------- kernel/bpf/verifier.c | 3 +-- 4 files changed, 33 insertions(+), 17 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index de464e6683b6..83d1926c61e4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -40,6 +40,7 @@ struct bpf_map { struct user_struct *user; const struct bpf_map_ops *ops; struct work_struct work; + atomic_t usercnt; }; struct bpf_map_type_list { @@ -167,8 +168,10 @@ struct bpf_prog *bpf_prog_get(u32 ufd); void bpf_prog_put(struct bpf_prog *prog); void bpf_prog_put_rcu(struct bpf_prog *prog); -struct bpf_map *bpf_map_get(u32 ufd); +struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); +void bpf_map_inc(struct bpf_map *map, bool uref); +void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); extern int sysctl_unprivileged_bpf_disabled; diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index be6d726e31c9..5a8a797d50b7 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -34,7 +34,7 @@ static void *bpf_any_get(void *raw, enum bpf_type type) atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt); break; case BPF_TYPE_MAP: - atomic_inc(&((struct bpf_map *)raw)->refcnt); + bpf_map_inc(raw, true); break; default: WARN_ON_ONCE(1); @@ -51,7 +51,7 @@ static void bpf_any_put(void *raw, enum bpf_type type) bpf_prog_put(raw); break; case BPF_TYPE_MAP: - bpf_map_put(raw); + bpf_map_put_with_uref(raw); break; default: WARN_ON_ONCE(1); @@ -64,7 +64,7 @@ static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type) void *raw; *type = BPF_TYPE_MAP; - raw = bpf_map_get(ufd); + raw = bpf_map_get_with_uref(ufd); if (IS_ERR(raw)) { *type = BPF_TYPE_PROG; raw = bpf_prog_get(ufd); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0d3313d02a7e..4a8f3c1d7da6 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -82,6 +82,14 @@ static void bpf_map_free_deferred(struct work_struct *work) map->ops->map_free(map); } +static void bpf_map_put_uref(struct bpf_map *map) +{ + if (atomic_dec_and_test(&map->usercnt)) { + if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) + bpf_fd_array_map_clear(map); + } +} + /* decrement map refcnt and schedule it for freeing via workqueue * (unrelying map implementation ops->map_free() might sleep) */ @@ -93,17 +101,15 @@ void bpf_map_put(struct bpf_map *map) } } +void bpf_map_put_with_uref(struct bpf_map *map) +{ + bpf_map_put_uref(map); + bpf_map_put(map); +} + static int bpf_map_release(struct inode *inode, struct file *filp) { - struct bpf_map *map = filp->private_data; - - if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) - /* prog_array stores refcnt-ed bpf_prog pointers - * release them all when user space closes prog_array_fd - */ - bpf_fd_array_map_clear(map); - - bpf_map_put(map); + bpf_map_put_with_uref(filp->private_data); return 0; } @@ -142,6 +148,7 @@ static int map_create(union bpf_attr *attr) return PTR_ERR(map); atomic_set(&map->refcnt, 1); + atomic_set(&map->usercnt, 1); err = bpf_map_charge_memlock(map); if (err) @@ -174,7 +181,14 @@ struct bpf_map *__bpf_map_get(struct fd f) return f.file->private_data; } -struct bpf_map *bpf_map_get(u32 ufd) +void bpf_map_inc(struct bpf_map *map, bool uref) +{ + atomic_inc(&map->refcnt); + if (uref) + atomic_inc(&map->usercnt); +} + +struct bpf_map *bpf_map_get_with_uref(u32 ufd) { struct fd f = fdget(ufd); struct bpf_map *map; @@ -183,7 +197,7 @@ struct bpf_map *bpf_map_get(u32 ufd) if (IS_ERR(map)) return map; - atomic_inc(&map->refcnt); + bpf_map_inc(map, true); fdput(f); return map; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c6073056badf..a7945d10b378 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2021,8 +2021,7 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env) * will be used by the valid program until it's unloaded * and all maps are released in free_bpf_prog_info() */ - atomic_inc(&map->refcnt); - + bpf_map_inc(map, false); fdput(f); next_insn: insn++; From 880621c2605b82eb5af91a2c94223df6f5a3fb64 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sun, 22 Nov 2015 17:46:09 +0100 Subject: [PATCH 062/110] packet: Allow packets with only a header (but no payload) Commit 9c7077622dd91 ("packet: make packet_snd fail on len smaller than l2 header") added validation for the packet size in packet_snd. This change enforces that every packet needs a header (with at least hard_header_len bytes) plus a payload with at least one byte. Before this change the payload was optional. This fixes PPPoE connections which do not have a "Service" or "Host-Uniq" configured (which is violating the spec, but is still widely used in real-world setups). Those are currently failing with the following message: "pppd: packet size is too short (24 <= 24)" Signed-off-by: Martin Blumenstingl Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 ++- net/packet/af_packet.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 67bfac1abfc1..3b5d134e945a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1398,7 +1398,8 @@ enum netdev_priv_flags { * @dma: DMA channel * @mtu: Interface MTU value * @type: Interface hardware type - * @hard_header_len: Hardware header length + * @hard_header_len: Hardware header length, which means that this is the + * minimum size of a packet. * * @needed_headroom: Extra headroom the hardware may need, but not in all * cases can this be guaranteed diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 1cf928fb573e..992396aa635c 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2329,8 +2329,8 @@ static void tpacket_destruct_skb(struct sk_buff *skb) static bool ll_header_truncated(const struct net_device *dev, int len) { /* net device doesn't like empty head */ - if (unlikely(len <= dev->hard_header_len)) { - net_warn_ratelimited("%s: packet size is too short (%d <= %d)\n", + if (unlikely(len < dev->hard_header_len)) { + net_warn_ratelimited("%s: packet size is too short (%d < %d)\n", current->comm, len, dev->hard_header_len); return true; } From 9ffad80a9c65d7c2ab5ad6cb8b4b0559b9ed8b8c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 25 Nov 2015 09:02:10 -0800 Subject: [PATCH 063/110] drivers: net: xgene: fix possible use after free Once TX has been enabled on a NIC, it is illegal to access skb, as this skb might have been freed by another cpu, from TX completion handler. Signed-off-by: Eric Dumazet Reported-by: Mark Rutland Tested-by: Mark Rutland Cc: Iyappan Subramanian Acked-by: Iyappan Subramanian Signed-off-by: David S. Miller --- drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 1adfe7036843..9147a0107c44 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -450,12 +450,12 @@ static netdev_tx_t xgene_enet_start_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } - pdata->ring_ops->wr_cmd(tx_ring, count); skb_tx_timestamp(skb); pdata->stats.tx_packets++; pdata->stats.tx_bytes += skb->len; + pdata->ring_ops->wr_cmd(tx_ring, count); return NETDEV_TX_OK; } From ac316c783d5bef4240db3de000c1bc74481df88e Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Thu, 26 Nov 2015 08:35:41 +0100 Subject: [PATCH 064/110] stmmac: fix a filter problem after resuming. When resume the HW is re-configured but some settings can be lost. For example, the MAC Address_X High/Low Registers used for VLAN tagging.. So, while resuming, the set_filter callback needs to be invoked to re-program perfect and hash-table registers. Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 64d8aa4e0cad..e3d96f2cc4f8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3102,6 +3102,7 @@ int stmmac_resume(struct net_device *ndev) init_dma_desc_rings(ndev, GFP_ATOMIC); stmmac_hw_setup(ndev, false); stmmac_init_tx_coalesce(priv); + stmmac_set_rx_mode(ndev); napi_enable(&priv->napi); From 61adcc03bd010a494664dc46049dc8da245bc277 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Thu, 26 Nov 2015 08:35:42 +0100 Subject: [PATCH 065/110] stmmac: fix csr clock divisor for 300MHz This patch is to fix the csr clock in case of 300MHz is provided. Reported-by: Kent Borg Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index e3d96f2cc4f8..6256b32cec37 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -185,7 +185,7 @@ static void stmmac_clk_csr_set(struct stmmac_priv *priv) priv->clk_csr = STMMAC_CSR_100_150M; else if ((clk_rate >= CSR_F_150M) && (clk_rate < CSR_F_250M)) priv->clk_csr = STMMAC_CSR_150_250M; - else if ((clk_rate >= CSR_F_250M) && (clk_rate < CSR_F_300M)) + else if ((clk_rate >= CSR_F_250M) && (clk_rate <= CSR_F_300M)) priv->clk_csr = STMMAC_CSR_250_300M; } } From 22407e13172e9d5257ad3548a4f69bff8ed20111 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Thu, 26 Nov 2015 08:35:43 +0100 Subject: [PATCH 066/110] stmmac: dwmac-sti: fix st,tx-retime-src check In case of the st,tx-retime-src is missing from device-tree (it's an optional field) the driver will invoke the strcasecmp to check which clock has been selected and this is a bug; the else condition is needed. In the dwmac_setup, the "rs" variable, passed to the strcasecmp, was not initialized and the compiler, depending on the options adopted, could take it in some different part of the stack generating the hang in such configuration. Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c index 7f6f4a4fcc70..58c05acc2aab 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c @@ -299,16 +299,17 @@ static int sti_dwmac_parse_data(struct sti_dwmac *dwmac, if (IS_PHY_IF_MODE_GBIT(dwmac->interface)) { const char *rs; + dwmac->tx_retime_src = TX_RETIME_SRC_CLKGEN; + err = of_property_read_string(np, "st,tx-retime-src", &rs); if (err < 0) { dev_warn(dev, "Use internal clock source\n"); - dwmac->tx_retime_src = TX_RETIME_SRC_CLKGEN; - } else if (!strcasecmp(rs, "clk_125")) { - dwmac->tx_retime_src = TX_RETIME_SRC_CLK_125; - } else if (!strcasecmp(rs, "txclk")) { - dwmac->tx_retime_src = TX_RETIME_SRC_TXCLK; + } else { + if (!strcasecmp(rs, "clk_125")) + dwmac->tx_retime_src = TX_RETIME_SRC_CLK_125; + else if (!strcasecmp(rs, "txclk")) + dwmac->tx_retime_src = TX_RETIME_SRC_TXCLK; } - dwmac->speed = SPEED_1000; } From ae26c1c6cb9bd5ad6fa1dbfdf1fe430ac09b0d28 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Thu, 26 Nov 2015 08:35:44 +0100 Subject: [PATCH 067/110] stmmac: fix PHY reset during resume When stmmac_mdio_reset, was called from stmmac_resume, it was not resetting the PHY due to which MAC was not getting reset properly and hence ethernet interface not was resumed properly. The issue was currently only reproducible on stih301-b2204. Signed-off-by: Pankaj Dev Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/stmmac_mdio.c | 28 +++++++++---------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index ebf6abc4853f..bba670c42e37 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -138,7 +138,6 @@ int stmmac_mdio_reset(struct mii_bus *bus) #ifdef CONFIG_OF if (priv->device->of_node) { - int reset_gpio, active_low; if (data->reset_gpio < 0) { struct device_node *np = priv->device->of_node; @@ -154,24 +153,23 @@ int stmmac_mdio_reset(struct mii_bus *bus) "snps,reset-active-low"); of_property_read_u32_array(np, "snps,reset-delays-us", data->delays, 3); + + if (gpio_request(data->reset_gpio, "mdio-reset")) + return 0; } - reset_gpio = data->reset_gpio; - active_low = data->active_low; + gpio_direction_output(data->reset_gpio, + data->active_low ? 1 : 0); + if (data->delays[0]) + msleep(DIV_ROUND_UP(data->delays[0], 1000)); - if (!gpio_request(reset_gpio, "mdio-reset")) { - gpio_direction_output(reset_gpio, active_low ? 1 : 0); - if (data->delays[0]) - msleep(DIV_ROUND_UP(data->delays[0], 1000)); + gpio_set_value(data->reset_gpio, data->active_low ? 0 : 1); + if (data->delays[1]) + msleep(DIV_ROUND_UP(data->delays[1], 1000)); - gpio_set_value(reset_gpio, active_low ? 0 : 1); - if (data->delays[1]) - msleep(DIV_ROUND_UP(data->delays[1], 1000)); - - gpio_set_value(reset_gpio, active_low ? 1 : 0); - if (data->delays[2]) - msleep(DIV_ROUND_UP(data->delays[2], 1000)); - } + gpio_set_value(data->reset_gpio, data->active_low ? 1 : 0); + if (data->delays[2]) + msleep(DIV_ROUND_UP(data->delays[2], 1000)); } #endif From e527c4a769d375ac0472450c52bde29087f49cd9 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Thu, 26 Nov 2015 08:35:45 +0100 Subject: [PATCH 068/110] stmmac: fix oversized frame reception The receive skb buffers can be preallocated when the link is opened according to mtu size. While testing on a network environment with not standard MTU (e.g. 3000), a panic occurred if an incoming packet had a length greater than rx skb buffer size. This is because the HW is programmed to copy, from the DMA, an Jumbo frame and the Sw must check if the allocated buffer is enough to store the frame. Signed-off-by: Alexandre TORGUE Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 6256b32cec37..3c6549aee11d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2232,6 +2232,12 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit) frame_len = priv->hw->desc->get_rx_frame_len(p, coe); + /* check if frame_len fits the preallocated memory */ + if (frame_len > priv->dma_buf_sz) { + priv->dev->stats.rx_length_errors++; + break; + } + /* ACS is set; GMAC core strips PAD/FCS for IEEE 802.3 * Type frames (LLC/LLC-SNAP) */ From 9490f886b192964796285907d777ff00fba1fa0f Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 26 Nov 2015 12:08:18 +0100 Subject: [PATCH 069/110] af-unix: passcred support for sendpage sendpage did not care about credentials at all. This could lead to situations in which because of fd passing between processes we could append data to skbs with different scm data. It is illegal to splice those skbs together. Instead we have to allocate a new skb and if requested fill out the scm details. Fixes: 869e7c62486ec ("net: af_unix: implement stream sendpage support") Reported-by: Al Viro Cc: Al Viro Cc: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/unix/af_unix.c | 79 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 64 insertions(+), 15 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 4e95bdf973d9..6ced74690eee 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1551,6 +1551,14 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen return err; } +static bool unix_passcred_enabled(const struct socket *sock, + const struct sock *other) +{ + return test_bit(SOCK_PASSCRED, &sock->flags) || + !other->sk_socket || + test_bit(SOCK_PASSCRED, &other->sk_socket->flags); +} + /* * Some apps rely on write() giving SCM_CREDENTIALS * We include credentials if source or destination socket @@ -1561,14 +1569,41 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, { if (UNIXCB(skb).pid) return; - if (test_bit(SOCK_PASSCRED, &sock->flags) || - !other->sk_socket || - test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { + if (unix_passcred_enabled(sock, other)) { UNIXCB(skb).pid = get_pid(task_tgid(current)); current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); } } +static int maybe_init_creds(struct scm_cookie *scm, + struct socket *socket, + const struct sock *other) +{ + int err; + struct msghdr msg = { .msg_controllen = 0 }; + + err = scm_send(socket, &msg, scm, false); + if (err) + return err; + + if (unix_passcred_enabled(socket, other)) { + scm->pid = get_pid(task_tgid(current)); + current_uid_gid(&scm->creds.uid, &scm->creds.gid); + } + return err; +} + +static bool unix_skb_scm_eq(struct sk_buff *skb, + struct scm_cookie *scm) +{ + const struct unix_skb_parms *u = &UNIXCB(skb); + + return u->pid == scm->pid && + uid_eq(u->uid, scm->creds.uid) && + gid_eq(u->gid, scm->creds.gid) && + unix_secdata_eq(scm, skb); +} + /* * Send AF_UNIX data. */ @@ -1884,8 +1919,10 @@ out_err: static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, int offset, size_t size, int flags) { - int err = 0; - bool send_sigpipe = true; + int err; + bool send_sigpipe = false; + bool init_scm = true; + struct scm_cookie scm; struct sock *other, *sk = socket->sk; struct sk_buff *skb, *newskb = NULL, *tail = NULL; @@ -1903,7 +1940,7 @@ alloc_skb: newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT, &err, 0); if (!newskb) - return err; + goto err; } /* we must acquire readlock as we modify already present @@ -1912,12 +1949,12 @@ alloc_skb: err = mutex_lock_interruptible(&unix_sk(other)->readlock); if (err) { err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS; - send_sigpipe = false; goto err; } if (sk->sk_shutdown & SEND_SHUTDOWN) { err = -EPIPE; + send_sigpipe = true; goto err_unlock; } @@ -1926,17 +1963,27 @@ alloc_skb: if (sock_flag(other, SOCK_DEAD) || other->sk_shutdown & RCV_SHUTDOWN) { err = -EPIPE; + send_sigpipe = true; goto err_state_unlock; } + if (init_scm) { + err = maybe_init_creds(&scm, socket, other); + if (err) + goto err_state_unlock; + init_scm = false; + } + skb = skb_peek_tail(&other->sk_receive_queue); if (tail && tail == skb) { skb = newskb; - } else if (!skb) { - if (newskb) + } else if (!skb || !unix_skb_scm_eq(skb, &scm)) { + if (newskb) { skb = newskb; - else + } else { + tail = skb; goto alloc_skb; + } } else if (newskb) { /* this is fast path, we don't necessarily need to * call to kfree_skb even though with newskb == NULL @@ -1957,6 +2004,9 @@ alloc_skb: atomic_add(size, &sk->sk_wmem_alloc); if (newskb) { + err = unix_scm_to_skb(&scm, skb, false); + if (err) + goto err_state_unlock; spin_lock(&other->sk_receive_queue.lock); __skb_queue_tail(&other->sk_receive_queue, newskb); spin_unlock(&other->sk_receive_queue.lock); @@ -1966,7 +2016,7 @@ alloc_skb: mutex_unlock(&unix_sk(other)->readlock); other->sk_data_ready(other); - + scm_destroy(&scm); return size; err_state_unlock: @@ -1977,6 +2027,8 @@ err: kfree_skb(newskb); if (send_sigpipe && !(flags & MSG_NOSIGNAL)) send_sig(SIGPIPE, current, 0); + if (!init_scm) + scm_destroy(&scm); return err; } @@ -2280,10 +2332,7 @@ unlock: if (check_creds) { /* Never glue messages from different writers */ - if ((UNIXCB(skb).pid != scm.pid) || - !uid_eq(UNIXCB(skb).uid, scm.creds.uid) || - !gid_eq(UNIXCB(skb).gid, scm.creds.gid) || - !unix_secdata_eq(&scm, skb)) + if (!unix_skb_scm_eq(skb, &scm)) break; } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { /* Copy credentials */ From fea0f6650979a4fddd3f3fe255563ed15a2fc318 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 26 Nov 2015 11:59:45 +0000 Subject: [PATCH 070/110] net: fsl: Don't use NO_IRQ to check return value of irq_of_parse_and_map() This driver can be built on arm64 but relies on NO_IRQ to check the return value of irq_of_parse_and_map() which fails to build on arm64 because the architecture does not provide a NO_IRQ. Fix this to correctly check the return value of irq_of_parse_and_map(). Even on ARM systems where the driver was previously used the check was broken since on ARM NO_IRQ is -1 but irq_of_parse_and_map() returns 0 on error. Signed-off-by: Mark Brown Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/gianfar.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 3e6b9b437497..7cf898455e60 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -647,9 +647,9 @@ static int gfar_parse_group(struct device_node *np, if (model && strcasecmp(model, "FEC")) { gfar_irq(grp, RX)->irq = irq_of_parse_and_map(np, 1); gfar_irq(grp, ER)->irq = irq_of_parse_and_map(np, 2); - if (gfar_irq(grp, TX)->irq == NO_IRQ || - gfar_irq(grp, RX)->irq == NO_IRQ || - gfar_irq(grp, ER)->irq == NO_IRQ) + if (!gfar_irq(grp, TX)->irq || + !gfar_irq(grp, RX)->irq || + !gfar_irq(grp, ER)->irq) return -EINVAL; } From 0f2c0d32e6536ad39c3e9589d42c53d0ee3bfa08 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 26 Nov 2015 11:59:46 +0000 Subject: [PATCH 071/110] net: fsl: Fix error checking for platform_get_irq() The gianfar driver has recently been enabled on arm64 but fails to build since it check the return value of platform_get_irq() against NO_IRQ. Fix this by instead checking for a negative error code. Even on ARM where this code was previously being built this check was incorrect since platform_get_irq() returns a negative error code which may not be exactly the (unsigned int)(-1) that NO_IRQ is defined to be. Signed-off-by: Mark Brown Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/gianfar_ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/gianfar_ptp.c b/drivers/net/ethernet/freescale/gianfar_ptp.c index 664d0c261269..b40fba929d65 100644 --- a/drivers/net/ethernet/freescale/gianfar_ptp.c +++ b/drivers/net/ethernet/freescale/gianfar_ptp.c @@ -467,7 +467,7 @@ static int gianfar_ptp_probe(struct platform_device *dev) etsects->irq = platform_get_irq(dev, 0); - if (etsects->irq == NO_IRQ) { + if (etsects->irq < 0) { pr_err("irq not in device tree\n"); goto no_node; } From 142a2e7ece8d8ac0e818eb2c91f99ca894730e2a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Nov 2015 08:18:14 -0800 Subject: [PATCH 072/110] tcp: initialize tp->copied_seq in case of cross SYN connection Dmitry provided a syzkaller (http://github.com/google/syzkaller) generated program that triggers the WARNING at net/ipv4/tcp.c:1729 in tcp_recvmsg() : WARN_ON(tp->copied_seq != tp->rcv_nxt && !(flags & (MSG_PEEK | MSG_TRUNC))); His program is specifically attempting a Cross SYN TCP exchange, that we support (for the pleasure of hackers ?), but it looks we lack proper tcp->copied_seq initialization. Thanks again Dmitry for your report and testings. Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a4a0b6b3bcf2..2d656eef7f8e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5683,6 +5683,7 @@ discard: } tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; + tp->copied_seq = tp->rcv_nxt; tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1; /* RFC1323: The window in SYN & SYN/ACK segments is From 304d888b29cf96f1dd53511ee686499cd8cdf249 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 27 Nov 2015 18:17:05 +0100 Subject: [PATCH 073/110] Revert "ipv6: ndisc: inherit metadata dst when creating ndisc requests" This reverts commit ab450605b35caa768ca33e86db9403229bf42be4. In IPv6, we cannot inherit the dst of the original dst. ndisc packets are IPv6 packets and may take another route than the original packet. This patch breaks the following scenario: a packet comes from eth0 and is forwarded through vxlan1. The encapsulated packet triggers an NS which cannot be sent because of the wrong route. CC: Jiri Benc CC: Thomas Graf Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/net/ndisc.h | 3 +-- net/ipv6/addrconf.c | 2 +- net/ipv6/ndisc.c | 10 +++------- net/ipv6/route.c | 2 +- 4 files changed, 6 insertions(+), 11 deletions(-) diff --git a/include/net/ndisc.h b/include/net/ndisc.h index bf3937431030..2d8edaad29cb 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -181,8 +181,7 @@ void ndisc_cleanup(void); int ndisc_rcv(struct sk_buff *skb); void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, - const struct in6_addr *daddr, const struct in6_addr *saddr, - struct sk_buff *oskb); + const struct in6_addr *daddr, const struct in6_addr *saddr); void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, const struct in6_addr *daddr); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d84742f003a9..61f26851655c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3642,7 +3642,7 @@ static void addrconf_dad_work(struct work_struct *w) /* send a neighbour solicitation for our addr */ addrconf_addr_solict_mult(&ifp->addr, &mcaddr); - ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any, NULL); + ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any); out: in6_ifa_put(ifp); rtnl_unlock(); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 3e0f855e1bea..d6161e1c48c8 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -556,8 +556,7 @@ static void ndisc_send_unsol_na(struct net_device *dev) } void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, - const struct in6_addr *daddr, const struct in6_addr *saddr, - struct sk_buff *oskb) + const struct in6_addr *daddr, const struct in6_addr *saddr) { struct sk_buff *skb; struct in6_addr addr_buf; @@ -593,9 +592,6 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr); - if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE) && oskb) - skb_dst_copy(skb, oskb); - ndisc_send_skb(skb, daddr, saddr); } @@ -682,12 +678,12 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) "%s: trying to ucast probe in NUD_INVALID: %pI6\n", __func__, target); } - ndisc_send_ns(dev, target, target, saddr, skb); + ndisc_send_ns(dev, target, target, saddr); } else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) { neigh_app_ns(neigh); } else { addrconf_addr_solict_mult(target, &mcaddr); - ndisc_send_ns(dev, target, &mcaddr, saddr, skb); + ndisc_send_ns(dev, target, &mcaddr, saddr); } } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6f01fe122abd..826e6aa44f8d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -523,7 +523,7 @@ static void rt6_probe_deferred(struct work_struct *w) container_of(w, struct __rt6_probe_work, work); addrconf_addr_solict_mult(&work->target, &mcaddr); - ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, NULL); + ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL); dev_put(work->dev); kfree(work); } From ee9159ddce14bc1dec9435ae4e3bd3153e783706 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Fri, 27 Nov 2015 14:18:39 -0500 Subject: [PATCH 074/110] wan/x25: Fix use-after-free in x25_asy_open_tty() The N_X25 line discipline may access the previous line discipline's closed and already-freed private data on open [1]. The tty->disc_data field _never_ refers to valid data on entry to the line discipline's open() method. Rather, the ldisc is expected to initialize that field for its own use for the lifetime of the instance (ie. from open() to close() only). [1] [ 634.336761] ================================================================== [ 634.338226] BUG: KASAN: use-after-free in x25_asy_open_tty+0x13d/0x490 at addr ffff8800a743efd0 [ 634.339558] Read of size 4 by task syzkaller_execu/8981 [ 634.340359] ============================================================================= [ 634.341598] BUG kmalloc-512 (Not tainted): kasan: bad access detected ... [ 634.405018] Call Trace: [ 634.405277] dump_stack (lib/dump_stack.c:52) [ 634.405775] print_trailer (mm/slub.c:655) [ 634.406361] object_err (mm/slub.c:662) [ 634.406824] kasan_report_error (mm/kasan/report.c:138 mm/kasan/report.c:236) [ 634.409581] __asan_report_load4_noabort (mm/kasan/report.c:279) [ 634.411355] x25_asy_open_tty (drivers/net/wan/x25_asy.c:559 (discriminator 1)) [ 634.413997] tty_ldisc_open.isra.2 (drivers/tty/tty_ldisc.c:447) [ 634.414549] tty_set_ldisc (drivers/tty/tty_ldisc.c:567) [ 634.415057] tty_ioctl (drivers/tty/tty_io.c:2646 drivers/tty/tty_io.c:2879) [ 634.423524] do_vfs_ioctl (fs/ioctl.c:43 fs/ioctl.c:607) [ 634.427491] SyS_ioctl (fs/ioctl.c:622 fs/ioctl.c:613) [ 634.427945] entry_SYSCALL_64_fastpath (arch/x86/entry/entry_64.S:188) Reported-and-tested-by: Sasha Levin Cc: Signed-off-by: Peter Hurley Signed-off-by: David S. Miller --- drivers/net/wan/x25_asy.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index 5c47b011a9d7..cd39025d2abf 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -549,16 +549,12 @@ static void x25_asy_receive_buf(struct tty_struct *tty, static int x25_asy_open_tty(struct tty_struct *tty) { - struct x25_asy *sl = tty->disc_data; + struct x25_asy *sl; int err; if (tty->ops->write == NULL) return -EOPNOTSUPP; - /* First make sure we're not already connected. */ - if (sl && sl->magic == X25_ASY_MAGIC) - return -EEXIST; - /* OK. Find a free X.25 channel to use. */ sl = x25_asy_alloc(); if (sl == NULL) From 5738a09d58d5ad2871f1f9a42bf6a3aa9ece5b3c Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 28 Nov 2015 01:29:30 +0300 Subject: [PATCH 075/110] vmxnet3: fix checks for dma mapping errors vmxnet3_drv does not check dma_addr with dma_mapping_error() after mapping dma memory. The patch adds the checks and tries to handle failures. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Acked-by: Shrikrishna Khare Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 71 ++++++++++++++++++++++++++----- 1 file changed, 60 insertions(+), 11 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 899ea4288197..417903715437 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -587,6 +587,12 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, &adapter->pdev->dev, rbi->skb->data, rbi->len, PCI_DMA_FROMDEVICE); + if (dma_mapping_error(&adapter->pdev->dev, + rbi->dma_addr)) { + dev_kfree_skb_any(rbi->skb); + rq->stats.rx_buf_alloc_failure++; + break; + } } else { /* rx buffer skipped by the device */ } @@ -605,13 +611,18 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, &adapter->pdev->dev, rbi->page, 0, PAGE_SIZE, PCI_DMA_FROMDEVICE); + if (dma_mapping_error(&adapter->pdev->dev, + rbi->dma_addr)) { + put_page(rbi->page); + rq->stats.rx_buf_alloc_failure++; + break; + } } else { /* rx buffers skipped by the device */ } val = VMXNET3_RXD_BTYPE_BODY << VMXNET3_RXD_BTYPE_SHIFT; } - BUG_ON(rbi->dma_addr == 0); gd->rxd.addr = cpu_to_le64(rbi->dma_addr); gd->dword[2] = cpu_to_le32((!ring->gen << VMXNET3_RXD_GEN_SHIFT) | val | rbi->len); @@ -655,7 +666,7 @@ vmxnet3_append_frag(struct sk_buff *skb, struct Vmxnet3_RxCompDesc *rcd, } -static void +static int vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx, struct vmxnet3_tx_queue *tq, struct pci_dev *pdev, struct vmxnet3_adapter *adapter) @@ -715,6 +726,8 @@ vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx, tbi->dma_addr = dma_map_single(&adapter->pdev->dev, skb->data + buf_offset, buf_size, PCI_DMA_TODEVICE); + if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr)) + return -EFAULT; tbi->len = buf_size; @@ -755,6 +768,8 @@ vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx, tbi->dma_addr = skb_frag_dma_map(&adapter->pdev->dev, frag, buf_offset, buf_size, DMA_TO_DEVICE); + if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr)) + return -EFAULT; tbi->len = buf_size; @@ -782,6 +797,8 @@ vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx, /* set the last buf_info for the pkt */ tbi->skb = skb; tbi->sop_idx = ctx->sop_txd - tq->tx_ring.base; + + return 0; } @@ -1020,7 +1037,8 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, } /* fill tx descs related to addr & len */ - vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter); + if (vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter)) + goto unlock_drop_pkt; /* setup the EOP desc */ ctx.eop_txd->dword[3] = cpu_to_le32(VMXNET3_TXD_CQ | VMXNET3_TXD_EOP); @@ -1231,6 +1249,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, struct vmxnet3_rx_buf_info *rbi; struct sk_buff *skb, *new_skb = NULL; struct page *new_page = NULL; + dma_addr_t new_dma_addr; int num_to_alloc; struct Vmxnet3_RxDesc *rxd; u32 idx, ring_idx; @@ -1287,6 +1306,21 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, skip_page_frags = true; goto rcd_done; } + new_dma_addr = dma_map_single(&adapter->pdev->dev, + new_skb->data, rbi->len, + PCI_DMA_FROMDEVICE); + if (dma_mapping_error(&adapter->pdev->dev, + new_dma_addr)) { + dev_kfree_skb(new_skb); + /* Skb allocation failed, do not handover this + * skb to stack. Reuse it. Drop the existing pkt + */ + rq->stats.rx_buf_alloc_failure++; + ctx->skb = NULL; + rq->stats.drop_total++; + skip_page_frags = true; + goto rcd_done; + } dma_unmap_single(&adapter->pdev->dev, rbi->dma_addr, rbi->len, @@ -1303,9 +1337,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, /* Immediate refill */ rbi->skb = new_skb; - rbi->dma_addr = dma_map_single(&adapter->pdev->dev, - rbi->skb->data, rbi->len, - PCI_DMA_FROMDEVICE); + rbi->dma_addr = new_dma_addr; rxd->addr = cpu_to_le64(rbi->dma_addr); rxd->len = rbi->len; if (adapter->version == 2 && @@ -1348,6 +1380,19 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, skip_page_frags = true; goto rcd_done; } + new_dma_addr = dma_map_page(&adapter->pdev->dev + , rbi->page, + 0, PAGE_SIZE, + PCI_DMA_FROMDEVICE); + if (dma_mapping_error(&adapter->pdev->dev, + new_dma_addr)) { + put_page(new_page); + rq->stats.rx_buf_alloc_failure++; + dev_kfree_skb(ctx->skb); + ctx->skb = NULL; + skip_page_frags = true; + goto rcd_done; + } dma_unmap_page(&adapter->pdev->dev, rbi->dma_addr, rbi->len, @@ -1357,10 +1402,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, /* Immediate refill */ rbi->page = new_page; - rbi->dma_addr = dma_map_page(&adapter->pdev->dev - , rbi->page, - 0, PAGE_SIZE, - PCI_DMA_FROMDEVICE); + rbi->dma_addr = new_dma_addr; rxd->addr = cpu_to_le64(rbi->dma_addr); rxd->len = rbi->len; } @@ -2167,7 +2209,8 @@ vmxnet3_set_mc(struct net_device *netdev) PCI_DMA_TODEVICE); } - if (new_table_pa) { + if (!dma_mapping_error(&adapter->pdev->dev, + new_table_pa)) { new_mode |= VMXNET3_RXM_MCAST; rxConf->mfTablePA = cpu_to_le64(new_table_pa); } else { @@ -3075,6 +3118,11 @@ vmxnet3_probe_device(struct pci_dev *pdev, adapter->adapter_pa = dma_map_single(&adapter->pdev->dev, adapter, sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE); + if (dma_mapping_error(&adapter->pdev->dev, adapter->adapter_pa)) { + dev_err(&pdev->dev, "Failed to map dma\n"); + err = -EFAULT; + goto err_dma_map; + } adapter->shared = dma_alloc_coherent( &adapter->pdev->dev, sizeof(struct Vmxnet3_DriverShared), @@ -3233,6 +3281,7 @@ err_alloc_queue_desc: err_alloc_shared: dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa, sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE); +err_dma_map: free_netdev(netdev); return err; } From 9cd3e072b0be17446e37d7414eac8a3499e0601e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 29 Nov 2015 20:03:10 -0800 Subject: [PATCH 076/110] net: rename SOCK_ASYNC_NOSPACE and SOCK_ASYNC_WAITDATA This patch is a cleanup to make following patch easier to review. Goal is to move SOCK_ASYNC_NOSPACE and SOCK_ASYNC_WAITDATA from (struct socket)->flags to a (struct socket_wq)->flags to benefit from RCU protection in sock_wake_async() To ease backports, we rename both constants. Two new helpers, sk_set_bit(int nr, struct sock *sk) and sk_clear_bit(int net, struct sock *sk) are added so that following patch can change their implementation. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- crypto/algif_aead.c | 4 ++-- crypto/algif_skcipher.c | 6 +++--- drivers/net/macvtap.c | 4 ++-- drivers/net/tun.c | 4 ++-- fs/dlm/lowcomms.c | 4 ++-- include/linux/net.h | 6 +++--- include/net/sock.h | 10 ++++++++++ net/bluetooth/af_bluetooth.c | 6 +++--- net/caif/caif_socket.c | 4 ++-- net/core/datagram.c | 2 +- net/core/sock.c | 8 ++++---- net/core/stream.c | 4 ++-- net/dccp/proto.c | 3 +-- net/decnet/af_decnet.c | 8 ++++---- net/ipv4/tcp.c | 7 +++---- net/iucv/af_iucv.c | 2 +- net/nfc/llcp_sock.c | 2 +- net/rxrpc/ar-output.c | 2 +- net/sctp/socket.c | 2 +- net/socket.c | 4 ++-- net/sunrpc/xprtsock.c | 14 +++++++------- net/unix/af_unix.c | 6 +++--- 22 files changed, 60 insertions(+), 52 deletions(-) diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index 0aa6fdfb448a..6d4d4569447e 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -125,7 +125,7 @@ static int aead_wait_for_data(struct sock *sk, unsigned flags) if (flags & MSG_DONTWAIT) return -EAGAIN; - set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); for (;;) { if (signal_pending(current)) @@ -139,7 +139,7 @@ static int aead_wait_for_data(struct sock *sk, unsigned flags) } finish_wait(sk_sleep(sk), &wait); - clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); return err; } diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index af31a0ee4057..ca9efe17db1a 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -212,7 +212,7 @@ static int skcipher_wait_for_wmem(struct sock *sk, unsigned flags) if (flags & MSG_DONTWAIT) return -EAGAIN; - set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); for (;;) { if (signal_pending(current)) @@ -258,7 +258,7 @@ static int skcipher_wait_for_data(struct sock *sk, unsigned flags) return -EAGAIN; } - set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); for (;;) { if (signal_pending(current)) @@ -272,7 +272,7 @@ static int skcipher_wait_for_data(struct sock *sk, unsigned flags) } finish_wait(sk_sleep(sk), &wait); - clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); return err; } diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 54036ae0a388..0fc521941c71 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -498,7 +498,7 @@ static void macvtap_sock_write_space(struct sock *sk) wait_queue_head_t *wqueue; if (!sock_writeable(sk) || - !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) + !test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags)) return; wqueue = sk_sleep(sk); @@ -585,7 +585,7 @@ static unsigned int macvtap_poll(struct file *file, poll_table * wait) mask |= POLLIN | POLLRDNORM; if (sock_writeable(&q->sk) || - (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock.flags) && + (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock.flags) && sock_writeable(&q->sk))) mask |= POLLOUT | POLLWRNORM; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b1878faea397..f0db770e8b2f 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1040,7 +1040,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait) mask |= POLLIN | POLLRDNORM; if (sock_writeable(sk) || - (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags) && + (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) && sock_writeable(sk))) mask |= POLLOUT | POLLWRNORM; @@ -1488,7 +1488,7 @@ static void tun_sock_write_space(struct sock *sk) if (!sock_writeable(sk)) return; - if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) + if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags)) return; wqueue = sk_sleep(sk); diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 87e9d796cf7d..3a37bd3f9637 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -421,7 +421,7 @@ static void lowcomms_write_space(struct sock *sk) if (test_and_clear_bit(CF_APP_LIMITED, &con->flags)) { con->sock->sk->sk_write_pending--; - clear_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags); + clear_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags); } if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) @@ -1448,7 +1448,7 @@ static void send_to_sock(struct connection *con) msg_flags); if (ret == -EAGAIN || ret == 0) { if (ret == -EAGAIN && - test_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags) && + test_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags) && !test_and_set_bit(CF_APP_LIMITED, &con->flags)) { /* Notify TCP that we're limited by the * application window size. diff --git a/include/linux/net.h b/include/linux/net.h index 70ac5e28e6b7..f514e4dd5521 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -34,8 +34,8 @@ struct inode; struct file; struct net; -#define SOCK_ASYNC_NOSPACE 0 -#define SOCK_ASYNC_WAITDATA 1 +#define SOCKWQ_ASYNC_NOSPACE 0 +#define SOCKWQ_ASYNC_WAITDATA 1 #define SOCK_NOSPACE 2 #define SOCK_PASSCRED 3 #define SOCK_PASSSEC 4 @@ -96,7 +96,7 @@ struct socket_wq { * struct socket - general BSD socket * @state: socket state (%SS_CONNECTED, etc) * @type: socket type (%SOCK_STREAM, etc) - * @flags: socket flags (%SOCK_ASYNC_NOSPACE, etc) + * @flags: socket flags (%SOCK_NOSPACE, etc) * @ops: protocol specific socket operations * @file: File back pointer for gc * @sk: internal networking protocol agnostic socket representation diff --git a/include/net/sock.h b/include/net/sock.h index 7f89e4ba18d1..c155d09d8af4 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2005,6 +2005,16 @@ static inline unsigned long sock_wspace(struct sock *sk) return amt; } +static inline void sk_set_bit(int nr, struct sock *sk) +{ + set_bit(nr, &sk->sk_socket->flags); +} + +static inline void sk_clear_bit(int nr, struct sock *sk) +{ + clear_bit(nr, &sk->sk_socket->flags); +} + static inline void sk_wake_async(struct sock *sk, int how, int band) { if (sock_flag(sk, SOCK_FASYNC)) diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index a3bffd1ec2b4..70306cc9d814 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -271,11 +271,11 @@ static long bt_sock_data_wait(struct sock *sk, long timeo) if (signal_pending(current) || !timeo) break; - set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); - clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); } __set_current_state(TASK_RUNNING); @@ -441,7 +441,7 @@ unsigned int bt_sock_poll(struct file *file, struct socket *sock, if (!test_bit(BT_SK_SUSPEND, &bt_sk(sk)->flags) && sock_writeable(sk)) mask |= POLLOUT | POLLWRNORM | POLLWRBAND; else - set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); return mask; } diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index cc858919108e..aa209b1066c9 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -323,7 +323,7 @@ static long caif_stream_data_wait(struct sock *sk, long timeo) !timeo) break; - set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); @@ -331,7 +331,7 @@ static long caif_stream_data_wait(struct sock *sk, long timeo) if (sock_flag(sk, SOCK_DEAD)) break; - clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); } finish_wait(sk_sleep(sk), &wait); diff --git a/net/core/datagram.c b/net/core/datagram.c index 617088aee21d..d62af69ad844 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -785,7 +785,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, if (sock_writeable(sk)) mask |= POLLOUT | POLLWRNORM | POLLWRBAND; else - set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); return mask; } diff --git a/net/core/sock.c b/net/core/sock.c index 1e4dd54bfb5a..9d79569935a3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1815,7 +1815,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo) { DEFINE_WAIT(wait); - clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); for (;;) { if (!timeo) break; @@ -1861,7 +1861,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf) break; - set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); err = -EAGAIN; if (!timeo) @@ -2048,9 +2048,9 @@ int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb) DEFINE_WAIT(wait); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb); - clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); finish_wait(sk_sleep(sk), &wait); return rc; } diff --git a/net/core/stream.c b/net/core/stream.c index d70f77a0c889..43309428644d 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -126,7 +126,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) current_timeo = vm_wait = (prandom_u32() % (HZ / 5)) + 2; while (1) { - set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); @@ -139,7 +139,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) } if (signal_pending(current)) goto do_interrupted; - clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); if (sk_stream_memory_free(sk) && !vm_wait) break; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index b5cf13a28009..41e65804ddf5 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -339,8 +339,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock, if (sk_stream_is_writeable(sk)) { mask |= POLLOUT | POLLWRNORM; } else { /* send SIGIO later */ - set_bit(SOCK_ASYNC_NOSPACE, - &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); /* Race breaker. If space is freed after diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 675cf94e04f8..eebf5ac8ce18 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1747,9 +1747,9 @@ static int dn_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, } prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target)); - clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); finish_wait(sk_sleep(sk), &wait); } @@ -2004,10 +2004,10 @@ static int dn_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) } prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); sk_wait_event(sk, &timeo, !dn_queue_too_long(scp, queue, flags)); - clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); finish_wait(sk_sleep(sk), &wait); continue; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c1728771cf89..c82cca18c90f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -517,8 +517,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (sk_stream_is_writeable(sk)) { mask |= POLLOUT | POLLWRNORM; } else { /* send SIGIO later */ - set_bit(SOCK_ASYNC_NOSPACE, - &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); /* Race breaker. If space is freed after @@ -906,7 +905,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, goto out_err; } - clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); mss_now = tcp_send_mss(sk, &size_goal, flags); copied = 0; @@ -1134,7 +1133,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) } /* This should be in poll */ - clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); mss_now = tcp_send_mss(sk, &size_goal, flags); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index fcb2752419c6..435608c4306d 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1483,7 +1483,7 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock, if (sock_writeable(sk) && iucv_below_msglim(sk)) mask |= POLLOUT | POLLWRNORM | POLLWRBAND; else - set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); return mask; } diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index b7de0da46acd..ecf0a0196f18 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -572,7 +572,7 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock, if (sock_writeable(sk) && sk->sk_state == LLCP_CONNECTED) mask |= POLLOUT | POLLWRNORM | POLLWRBAND; else - set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); pr_debug("mask 0x%x\n", mask); diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c index a40d3afe93b7..14c4e12c47b0 100644 --- a/net/rxrpc/ar-output.c +++ b/net/rxrpc/ar-output.c @@ -531,7 +531,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); /* this should be in poll */ - clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) return -EPIPE; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 897c01c029ca..2353985d689c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6458,7 +6458,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) if (sctp_writeable(sk)) { mask |= POLLOUT | POLLWRNORM; } else { - set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); /* * Since the socket is not locked, the buffer * might be made available after the writeable check and diff --git a/net/socket.c b/net/socket.c index dd2c247c99e3..16be908205fc 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1072,11 +1072,11 @@ int sock_wake_async(struct socket *sock, int how, int band) } switch (how) { case SOCK_WAKE_WAITD: - if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) + if (test_bit(SOCKWQ_ASYNC_WAITDATA, &sock->flags)) break; goto call_kill; case SOCK_WAKE_SPACE: - if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags)) + if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags)) break; /* fall through */ case SOCK_WAKE_IO: diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 1d1a70498910..2ffaf6a79499 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -398,7 +398,7 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, if (unlikely(!sock)) return -ENOTSOCK; - clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags); + clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags); if (base != 0) { addr = NULL; addrlen = 0; @@ -442,7 +442,7 @@ static void xs_nospace_callback(struct rpc_task *task) struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt); transport->inet->sk_write_pending--; - clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); + clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags); } /** @@ -467,7 +467,7 @@ static int xs_nospace(struct rpc_task *task) /* Don't race with disconnect */ if (xprt_connected(xprt)) { - if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) { + if (test_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags)) { /* * Notify TCP that we're limited by the application * window size @@ -478,7 +478,7 @@ static int xs_nospace(struct rpc_task *task) xprt_wait_for_buffer_space(task, xs_nospace_callback); } } else { - clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); + clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags); ret = -ENOTCONN; } @@ -626,7 +626,7 @@ process_status: case -EPERM: /* When the server has died, an ICMP port unreachable message * prompts ECONNREFUSED. */ - clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); + clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags); } return status; @@ -715,7 +715,7 @@ static int xs_tcp_send_request(struct rpc_task *task) case -EADDRINUSE: case -ENOBUFS: case -EPIPE: - clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); + clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags); } return status; @@ -1618,7 +1618,7 @@ static void xs_write_space(struct sock *sk) if (unlikely(!(xprt = xprt_from_sock(sk)))) return; - if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0) + if (test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags) == 0) return; xprt_write_space(xprt); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 6ced74690eee..45aebd966978 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2191,7 +2191,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo, !timeo) break; - set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); unix_state_unlock(sk); timeo = freezable_schedule_timeout(timeo); unix_state_lock(sk); @@ -2199,7 +2199,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo, if (sock_flag(sk, SOCK_DEAD)) break; - clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); } finish_wait(sk_sleep(sk), &wait); @@ -2683,7 +2683,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, if (writable) mask |= POLLOUT | POLLWRNORM | POLLWRBAND; else - set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); return mask; } From ceb5d58b217098a657f3850b7a2640f995032e62 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 29 Nov 2015 20:03:11 -0800 Subject: [PATCH 077/110] net: fix sock_wake_async() rcu protection Dmitry provided a syzkaller (http://github.com/google/syzkaller) triggering a fault in sock_wake_async() when async IO is requested. Said program stressed af_unix sockets, but the issue is generic and should be addressed in core networking stack. The problem is that by the time sock_wake_async() is called, we should not access the @flags field of 'struct socket', as the inode containing this socket might be freed without further notice, and without RCU grace period. We already maintain an RCU protected structure, "struct socket_wq" so moving SOCKWQ_ASYNC_NOSPACE & SOCKWQ_ASYNC_WAITDATA into it is the safe route. It also reduces number of cache lines needing dirtying, so might provide a performance improvement anyway. In followup patches, we might move remaining flags (SOCK_NOSPACE, SOCK_PASSCRED, SOCK_PASSSEC) to save 8 bytes and let 'struct socket' being mostly read and let it being shared between cpus. Reported-by: Dmitry Vyukov Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/net.h | 7 ++++++- include/net/sock.h | 23 ++++++++++++++++------- net/core/stream.c | 2 +- net/sctp/socket.c | 24 ++++++++++++++---------- net/socket.c | 21 +++++++-------------- 5 files changed, 44 insertions(+), 33 deletions(-) diff --git a/include/linux/net.h b/include/linux/net.h index f514e4dd5521..0b4ac7da583a 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -34,6 +34,10 @@ struct inode; struct file; struct net; +/* Historically, SOCKWQ_ASYNC_NOSPACE & SOCKWQ_ASYNC_WAITDATA were located + * in sock->flags, but moved into sk->sk_wq->flags to be RCU protected. + * Eventually all flags will be in sk->sk_wq_flags. + */ #define SOCKWQ_ASYNC_NOSPACE 0 #define SOCKWQ_ASYNC_WAITDATA 1 #define SOCK_NOSPACE 2 @@ -89,6 +93,7 @@ struct socket_wq { /* Note: wait MUST be first field of socket_wq */ wait_queue_head_t wait; struct fasync_struct *fasync_list; + unsigned long flags; /* %SOCKWQ_ASYNC_NOSPACE, etc */ struct rcu_head rcu; } ____cacheline_aligned_in_smp; @@ -202,7 +207,7 @@ enum { SOCK_WAKE_URG, }; -int sock_wake_async(struct socket *sk, int how, int band); +int sock_wake_async(struct socket_wq *sk_wq, int how, int band); int sock_register(const struct net_proto_family *fam); void sock_unregister(int family); int __sock_create(struct net *net, int family, int type, int proto, diff --git a/include/net/sock.h b/include/net/sock.h index c155d09d8af4..0434138c5f95 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -384,8 +384,10 @@ struct sock { int sk_rcvbuf; struct sk_filter __rcu *sk_filter; - struct socket_wq __rcu *sk_wq; - + union { + struct socket_wq __rcu *sk_wq; + struct socket_wq *sk_wq_raw; + }; #ifdef CONFIG_XFRM struct xfrm_policy *sk_policy[2]; #endif @@ -2005,20 +2007,27 @@ static inline unsigned long sock_wspace(struct sock *sk) return amt; } +/* Note: + * We use sk->sk_wq_raw, from contexts knowing this + * pointer is not NULL and cannot disappear/change. + */ static inline void sk_set_bit(int nr, struct sock *sk) { - set_bit(nr, &sk->sk_socket->flags); + set_bit(nr, &sk->sk_wq_raw->flags); } static inline void sk_clear_bit(int nr, struct sock *sk) { - clear_bit(nr, &sk->sk_socket->flags); + clear_bit(nr, &sk->sk_wq_raw->flags); } -static inline void sk_wake_async(struct sock *sk, int how, int band) +static inline void sk_wake_async(const struct sock *sk, int how, int band) { - if (sock_flag(sk, SOCK_FASYNC)) - sock_wake_async(sk->sk_socket, how, band); + if (sock_flag(sk, SOCK_FASYNC)) { + rcu_read_lock(); + sock_wake_async(rcu_dereference(sk->sk_wq), how, band); + rcu_read_unlock(); + } } /* Since sk_{r,w}mem_alloc sums skb->truesize, even a small frame might diff --git a/net/core/stream.c b/net/core/stream.c index 43309428644d..b96f7a79e544 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -39,7 +39,7 @@ void sk_stream_write_space(struct sock *sk) wake_up_interruptible_poll(&wq->wait, POLLOUT | POLLWRNORM | POLLWRBAND); if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) - sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); + sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT); rcu_read_unlock(); } } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 2353985d689c..5e35ef34008b 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6801,26 +6801,30 @@ no_packet: static void __sctp_write_space(struct sctp_association *asoc) { struct sock *sk = asoc->base.sk; - struct socket *sock = sk->sk_socket; - if ((sctp_wspace(asoc) > 0) && sock) { - if (waitqueue_active(&asoc->wait)) - wake_up_interruptible(&asoc->wait); + if (sctp_wspace(asoc) <= 0) + return; - if (sctp_writeable(sk)) { - wait_queue_head_t *wq = sk_sleep(sk); + if (waitqueue_active(&asoc->wait)) + wake_up_interruptible(&asoc->wait); - if (wq && waitqueue_active(wq)) - wake_up_interruptible(wq); + if (sctp_writeable(sk)) { + struct socket_wq *wq; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (wq) { + if (waitqueue_active(&wq->wait)) + wake_up_interruptible(&wq->wait); /* Note that we try to include the Async I/O support * here by modeling from the current TCP/UDP code. * We have not tested with it yet. */ if (!(sk->sk_shutdown & SEND_SHUTDOWN)) - sock_wake_async(sock, - SOCK_WAKE_SPACE, POLL_OUT); + sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT); } + rcu_read_unlock(); } } diff --git a/net/socket.c b/net/socket.c index 16be908205fc..456fadb3d819 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1056,27 +1056,20 @@ static int sock_fasync(int fd, struct file *filp, int on) return 0; } -/* This function may be called only under socket lock or callback_lock or rcu_lock */ +/* This function may be called only under rcu_lock */ -int sock_wake_async(struct socket *sock, int how, int band) +int sock_wake_async(struct socket_wq *wq, int how, int band) { - struct socket_wq *wq; + if (!wq || !wq->fasync_list) + return -1; - if (!sock) - return -1; - rcu_read_lock(); - wq = rcu_dereference(sock->wq); - if (!wq || !wq->fasync_list) { - rcu_read_unlock(); - return -1; - } switch (how) { case SOCK_WAKE_WAITD: - if (test_bit(SOCKWQ_ASYNC_WAITDATA, &sock->flags)) + if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags)) break; goto call_kill; case SOCK_WAKE_SPACE: - if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags)) + if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags)) break; /* fall through */ case SOCK_WAKE_IO: @@ -1086,7 +1079,7 @@ call_kill: case SOCK_WAKE_URG: kill_fasync(&wq->fasync_list, SIGURG, band); } - rcu_read_unlock(); + return 0; } EXPORT_SYMBOL(sock_wake_async); From fbca9d2d35c6ef1b323fae75cc9545005ba25097 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 30 Nov 2015 13:02:56 +0100 Subject: [PATCH 078/110] bpf, array: fix heap out-of-bounds access when updating elements During own review but also reported by Dmitry's syzkaller [1] it has been noticed that we trigger a heap out-of-bounds access on eBPF array maps when updating elements. This happens with each map whose map->value_size (specified during map creation time) is not multiple of 8 bytes. In array_map_alloc(), elem_size is round_up(attr->value_size, 8) and used to align array map slots for faster access. However, in function array_map_update_elem(), we update the element as ... memcpy(array->value + array->elem_size * index, value, array->elem_size); ... where we access 'value' out-of-bounds, since it was allocated from map_update_elem() from syscall side as kmalloc(map->value_size, GFP_USER) and later on copied through copy_from_user(value, uvalue, map->value_size). Thus, up to 7 bytes, we can access out-of-bounds. Same could happen from within an eBPF program, where in worst case we access beyond an eBPF program's designated stack. Since 1be7f75d1668 ("bpf: enable non-root eBPF programs") didn't hit an official release yet, it only affects priviledged users. In case of array_map_lookup_elem(), the verifier prevents eBPF programs from accessing beyond map->value_size through check_map_access(). Also from syscall side map_lookup_elem() only copies map->value_size back to user, so nothing could leak. [1] http://github.com/google/syzkaller Fixes: 28fbcfa08d8e ("bpf: add array type of eBPF maps") Reported-by: Dmitry Vyukov Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/arraymap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 3f4c99e06c6b..4c67ce39732e 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -105,7 +105,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, /* all elements already exist */ return -EEXIST; - memcpy(array->value + array->elem_size * index, value, array->elem_size); + memcpy(array->value + array->elem_size * index, value, map->value_size); return 0; } From 83e4bf7a7486532df2dc3db27e0e07a250990ed2 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 30 Nov 2015 12:31:43 +0100 Subject: [PATCH 079/110] openvswitch: properly refcount vport-vxlan module After 614732eaa12d, no refcount is maintained for the vport-vxlan module. This allows the userspace to remove such module while vport-vxlan devices still exist, which leads to later oops. v1 -> v2: - move vport 'owner' initialization in ovs_vport_ops_register() and make such function a macro Fixes: 614732eaa12d ("openvswitch: Use regular VXLAN net_device device") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/openvswitch/vport-geneve.c | 1 - net/openvswitch/vport-gre.c | 1 - net/openvswitch/vport.c | 4 ++-- net/openvswitch/vport.h | 8 +++++++- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index efb736bb6855..e41cd12d9b2d 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -117,7 +117,6 @@ static struct vport_ops ovs_geneve_vport_ops = { .destroy = ovs_netdev_tunnel_destroy, .get_options = geneve_get_options, .send = dev_queue_xmit, - .owner = THIS_MODULE, }; static int __init ovs_geneve_tnl_init(void) diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index c3257d78d3d2..7f8897f33a67 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -89,7 +89,6 @@ static struct vport_ops ovs_gre_vport_ops = { .create = gre_create, .send = dev_queue_xmit, .destroy = ovs_netdev_tunnel_destroy, - .owner = THIS_MODULE, }; static int __init ovs_gre_tnl_init(void) diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index e194c10a1889..31cbc8c5c7db 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -71,7 +71,7 @@ static struct hlist_head *hash_bucket(const struct net *net, const char *name) return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)]; } -int ovs_vport_ops_register(struct vport_ops *ops) +int __ovs_vport_ops_register(struct vport_ops *ops) { int err = -EEXIST; struct vport_ops *o; @@ -87,7 +87,7 @@ errout: ovs_unlock(); return err; } -EXPORT_SYMBOL_GPL(ovs_vport_ops_register); +EXPORT_SYMBOL_GPL(__ovs_vport_ops_register); void ovs_vport_ops_unregister(struct vport_ops *ops) { diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index bdfd82a7c064..8ea3a96980ac 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -196,7 +196,13 @@ static inline const char *ovs_vport_name(struct vport *vport) return vport->dev->name; } -int ovs_vport_ops_register(struct vport_ops *ops); +int __ovs_vport_ops_register(struct vport_ops *ops); +#define ovs_vport_ops_register(ops) \ + ({ \ + (ops)->owner = THIS_MODULE; \ + __ovs_vport_ops_register(ops); \ + }) + void ovs_vport_ops_unregister(struct vport_ops *ops); static inline struct rtable *ovs_tunnel_route_lookup(struct net *net, From 723cda5b055851f5e8bf61aacd8008c43c99e801 Mon Sep 17 00:00:00 2001 From: Thanneeru Srinivasulu Date: Wed, 2 Dec 2015 15:36:13 +0530 Subject: [PATCH 080/110] net: thunderx: Force to load octeon-mdio before bgx driver. Signed-off-by: Thanneeru Srinivasulu Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 3 +++ drivers/net/ethernet/cavium/thunder/thunder_bgx.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 180aa9fabf48..2574a7ea1c0e 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1009,6 +1009,9 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct bgx *bgx = NULL; u8 lmac; + /* Load octeon mdio driver */ + octeon_mdiobus_force_mod_depencency(); + bgx = devm_kzalloc(dev, sizeof(*bgx), GFP_KERNEL); if (!bgx) return -ENOMEM; diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h index 07b7ec66c60d..89a02fa26f79 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h @@ -182,6 +182,7 @@ enum MCAST_MODE { #define BCAST_ACCEPT 1 #define CAM_ACCEPT 1 +void octeon_mdiobus_force_mod_depencency(void); void bgx_add_dmac_addr(u64 dmac, int node, int bgx_idx, int lmac); unsigned bgx_get_map(int node); int bgx_get_lmac_count(int node, int bgx); From a7b1f535a8d45816cfe25c0fd900fc726ba5acce Mon Sep 17 00:00:00 2001 From: Thanneeru Srinivasulu Date: Wed, 2 Dec 2015 15:36:14 +0530 Subject: [PATCH 081/110] net: thunderx: Wait for delayed work to finish before destroying it While VNIC or BGX driver teardown, wait for already scheduled delayed work to finish before destroying it. Signed-off-by: Thanneeru Srinivasulu Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic_main.c | 3 +-- drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index c561fdcb79a7..cfc24a1daff1 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -1074,8 +1074,7 @@ static void nic_remove(struct pci_dev *pdev) if (nic->check_link) { /* Destroy work Queue */ - cancel_delayed_work(&nic->dwork); - flush_workqueue(nic->check_link); + cancel_delayed_work_sync(&nic->dwork); destroy_workqueue(nic->check_link); } diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 2574a7ea1c0e..6534b7362091 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -695,8 +695,7 @@ static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid) lmac = &bgx->lmac[lmacid]; if (lmac->check_link) { /* Destroy work queue */ - cancel_delayed_work(&lmac->dwork); - flush_workqueue(lmac->check_link); + cancel_delayed_work_sync(&lmac->dwork); destroy_workqueue(lmac->check_link); } From 006394a7cb20559418c602b8433ec1839b6fc1d3 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Wed, 2 Dec 2015 15:36:15 +0530 Subject: [PATCH 082/110] net: thunderx: Set CQ timer threshold properly Properly set CQ timer threshold and also set it to 2us. With previous incorrect settings it was set to 0.5us which is too less. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic.h | 5 ++--- drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 2 +- drivers/net/ethernet/cavium/thunder/nicvf_queues.h | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index d3950b20feb9..39ca6744a4e6 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -120,10 +120,9 @@ * Calculated for SCLK of 700Mhz * value written should be a 1/16th of what is expected * - * 1 tick per 0.05usec = value of 2.2 - * This 10% would be covered in CQ timer thresh value + * 1 tick per 0.025usec */ -#define NICPF_CLK_PER_INT_TICK 2 +#define NICPF_CLK_PER_INT_TICK 1 /* Time to wait before we decide that a SQ is stuck. * diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index e404ea837727..206b6a71a545 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -592,7 +592,7 @@ void nicvf_cmp_queue_config(struct nicvf *nic, struct queue_set *qs, /* Set threshold value for interrupt generation */ nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_THRESH, qidx, cq->thresh); nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2, - qidx, nic->cq_coalesce_usecs); + qidx, CMP_QUEUE_TIMER_THRESH); } /* Configures transmit queue */ diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h index fb4957d09914..033e8306e91c 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h @@ -76,7 +76,7 @@ #define CMP_QSIZE CMP_QUEUE_SIZE2 #define CMP_QUEUE_LEN (1ULL << (CMP_QSIZE + 10)) #define CMP_QUEUE_CQE_THRESH 0 -#define CMP_QUEUE_TIMER_THRESH 220 /* 10usec */ +#define CMP_QUEUE_TIMER_THRESH 80 /* ~2usec */ #define RBDR_SIZE RBDR_SIZE0 #define RCV_BUF_COUNT (1ULL << (RBDR_SIZE + 13)) From 0b72a9a1060e7547e71e7f600849a2d3006bf63a Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Wed, 2 Dec 2015 15:36:16 +0530 Subject: [PATCH 083/110] net: thunderx: Switchon carrier only upon interface link up Call netif_carrier_on() only if interface's link is up. Switching this on upon IFF_UP by default, is causing issues with ethernet channel bonding in LACP mode. Initial NETDEV_CHANGE notification was being skipped. Also fixed some issues with link/speed/duplex reporting via ethtool. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- .../net/ethernet/cavium/thunder/nicvf_ethtool.c | 16 +++++++++++++++- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 4 +--- .../net/ethernet/cavium/thunder/thunder_bgx.c | 2 ++ 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c index af54c10945c2..a12b2e38cf61 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c @@ -112,6 +112,13 @@ static int nicvf_get_settings(struct net_device *netdev, cmd->supported = 0; cmd->transceiver = XCVR_EXTERNAL; + + if (!nic->link_up) { + cmd->duplex = DUPLEX_UNKNOWN; + ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN); + return 0; + } + if (nic->speed <= 1000) { cmd->port = PORT_MII; cmd->autoneg = AUTONEG_ENABLE; @@ -125,6 +132,13 @@ static int nicvf_get_settings(struct net_device *netdev, return 0; } +static u32 nicvf_get_link(struct net_device *netdev) +{ + struct nicvf *nic = netdev_priv(netdev); + + return nic->link_up; +} + static void nicvf_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info) { @@ -660,7 +674,7 @@ static int nicvf_set_channels(struct net_device *dev, static const struct ethtool_ops nicvf_ethtool_ops = { .get_settings = nicvf_get_settings, - .get_link = ethtool_op_get_link, + .get_link = nicvf_get_link, .get_drvinfo = nicvf_get_drvinfo, .get_msglevel = nicvf_get_msglevel, .set_msglevel = nicvf_set_msglevel, diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 7f709cbdcd87..dde8dc720cd3 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1057,6 +1057,7 @@ int nicvf_stop(struct net_device *netdev) netif_carrier_off(netdev); netif_tx_stop_all_queues(nic->netdev); + nic->link_up = false; /* Teardown secondary qsets first */ if (!nic->sqs_mode) { @@ -1211,9 +1212,6 @@ int nicvf_open(struct net_device *netdev) nic->drv_stats.txq_stop = 0; nic->drv_stats.txq_wake = 0; - netif_carrier_on(netdev); - netif_tx_start_all_queues(netdev); - return 0; cleanup: nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 6534b7362091..d77e41a459b9 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -612,6 +612,8 @@ static void bgx_poll_for_link(struct work_struct *work) lmac->last_duplex = 1; } else { lmac->link_up = 0; + lmac->last_speed = SPEED_UNKNOWN; + lmac->last_duplex = DUPLEX_UNKNOWN; } if (lmac->last_link != lmac->link_up) { From bc69fdfc6c13b7350be9bcb48328d8f231ed98bb Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Wed, 2 Dec 2015 15:36:17 +0530 Subject: [PATCH 084/110] net: thunderx: Enable BGX LMAC's RX/TX only after VF is up Enable or disable BGX LMAC's RX/TX based on corresponding VF's status. If otherwise, when multiple LMAC's physical link is up then packets from all LMAC's whose corresponding VF is not yet initialized will get forwarded to VF0. This is due to VNIC's default configuration where CPI, RSSI e.t.c point to VF0/QSET0/RQ0. This patch will prevent multiple copies of packets on VF0. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- .../net/ethernet/cavium/thunder/nic_main.c | 20 ++++++++++++++++++- .../net/ethernet/cavium/thunder/thunder_bgx.c | 20 +++++++++++++++++-- .../net/ethernet/cavium/thunder/thunder_bgx.h | 1 + 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index cfc24a1daff1..4b7fd63ae57c 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -37,6 +37,7 @@ struct nicpf { #define NIC_GET_BGX_FROM_VF_LMAC_MAP(map) ((map >> 4) & 0xF) #define NIC_GET_LMAC_FROM_VF_LMAC_MAP(map) (map & 0xF) u8 vf_lmac_map[MAX_LMAC]; + u8 lmac_cnt; struct delayed_work dwork; struct workqueue_struct *check_link; u8 link[MAX_LMAC]; @@ -279,6 +280,7 @@ static void nic_set_lmac_vf_mapping(struct nicpf *nic) u64 lmac_credit; nic->num_vf_en = 0; + nic->lmac_cnt = 0; for (bgx = 0; bgx < NIC_MAX_BGX; bgx++) { if (!(bgx_map & (1 << bgx))) @@ -288,6 +290,7 @@ static void nic_set_lmac_vf_mapping(struct nicpf *nic) nic->vf_lmac_map[next_bgx_lmac++] = NIC_SET_VF_LMAC_MAP(bgx, lmac); nic->num_vf_en += lmac_cnt; + nic->lmac_cnt += lmac_cnt; /* Program LMAC credits */ lmac_credit = (1ull << 1); /* channel credit enable */ @@ -715,6 +718,13 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) case NIC_MBOX_MSG_CFG_DONE: /* Last message of VF config msg sequence */ nic->vf_enabled[vf] = true; + if (vf >= nic->lmac_cnt) + goto unlock; + + bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); + lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); + + bgx_lmac_rx_tx_enable(nic->node, bgx, lmac, true); goto unlock; case NIC_MBOX_MSG_SHUTDOWN: /* First msg in VF teardown sequence */ @@ -722,6 +732,14 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) if (vf >= nic->num_vf_en) nic->sqs_used[vf - nic->num_vf_en] = false; nic->pqs_vf[vf] = 0; + + if (vf >= nic->lmac_cnt) + break; + + bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); + lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); + + bgx_lmac_rx_tx_enable(nic->node, bgx, lmac, false); break; case NIC_MBOX_MSG_ALLOC_SQS: nic_alloc_sqs(nic, &mbx.sqs_alloc); @@ -940,7 +958,7 @@ static void nic_poll_for_link(struct work_struct *work) mbx.link_status.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE; - for (vf = 0; vf < nic->num_vf_en; vf++) { + for (vf = 0; vf < nic->lmac_cnt; vf++) { /* Poll only if VF is UP */ if (!nic->vf_enabled[vf]) continue; diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index d77e41a459b9..9df26c2263bc 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -186,6 +186,23 @@ void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac) } EXPORT_SYMBOL(bgx_set_lmac_mac); +void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable) +{ + struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + u64 cfg; + + if (!bgx) + return; + + cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG); + if (enable) + cfg |= CMR_PKT_RX_EN | CMR_PKT_TX_EN; + else + cfg &= ~(CMR_PKT_RX_EN | CMR_PKT_TX_EN); + bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg); +} +EXPORT_SYMBOL(bgx_lmac_rx_tx_enable); + static void bgx_sgmii_change_link_state(struct lmac *lmac) { struct bgx *bgx = lmac->bgx; @@ -656,8 +673,7 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid) } /* Enable lmac */ - bgx_reg_modify(bgx, lmacid, BGX_CMRX_CFG, - CMR_EN | CMR_PKT_RX_EN | CMR_PKT_TX_EN); + bgx_reg_modify(bgx, lmacid, BGX_CMRX_CFG, CMR_EN); /* Restore default cfg, incase low level firmware changed it */ bgx_reg_write(bgx, lmacid, BGX_CMRX_RX_DMAC_CTL, 0x03); diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h index 89a02fa26f79..149e179363a1 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h @@ -183,6 +183,7 @@ enum MCAST_MODE { #define CAM_ACCEPT 1 void octeon_mdiobus_force_mod_depencency(void); +void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable); void bgx_add_dmac_addr(u64 dmac, int node, int bgx_idx, int lmac); unsigned bgx_get_map(int node); int bgx_get_lmac_count(int node, int bgx); From 835112b28919d88d989a0a9313e323ad82e18b59 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Tue, 17 Nov 2015 10:24:40 +0200 Subject: [PATCH 085/110] mac80211: don't teardown sdata on sdata stop Interfaces are being initialized (setup) on addition, and torn down on removal. However, p2p device is being torn down when stopped, resulting in the next p2p start operation being done on uninitialized interface. Solve it by calling ieee80211_teardown_sdata() only on interface removal (for the non-netdev case). Signed-off-by: Eliad Peller Signed-off-by: Emmanuel Grumbach [squashed in fix to call teardown after unregister] Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 53ee049efbff..c9e325d2e120 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1862,6 +1862,7 @@ void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata) unregister_netdevice(sdata->dev); } else { cfg80211_unregister_wdev(&sdata->wdev); + ieee80211_teardown_sdata(sdata); kfree(sdata); } } @@ -1871,7 +1872,6 @@ void ieee80211_sdata_stop(struct ieee80211_sub_if_data *sdata) if (WARN_ON_ONCE(!test_bit(SDATA_STATE_RUNNING, &sdata->state))) return; ieee80211_do_stop(sdata, true); - ieee80211_teardown_sdata(sdata); } void ieee80211_remove_interfaces(struct ieee80211_local *local) From 4e39ccac0d678eacb5dd6ffc5057531af33c12d6 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Sat, 21 Nov 2015 18:13:40 +0800 Subject: [PATCH 086/110] mac80211: do not actively scan DFS channels DFS channels should not be actively scanned as we can't be sure if we are allowed or not. If the current channel is in the DFS band, active scan might be performed after CSA, but we have no guarantee about other channels, therefore it is safer to prevent active scanning at all. Cc: stable@vger.kernel.org Signed-off-by: Antonio Quartulli Signed-off-by: Johannes Berg --- net/mac80211/scan.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 4aeca4b0c3cb..a413e52f7691 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -597,8 +597,8 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, /* We need to ensure power level is at max for scanning. */ ieee80211_hw_config(local, 0); - if ((req->channels[0]->flags & - IEEE80211_CHAN_NO_IR) || + if ((req->channels[0]->flags & (IEEE80211_CHAN_NO_IR | + IEEE80211_CHAN_RADAR)) || !req->n_ssids) { next_delay = IEEE80211_PASSIVE_CHANNEL_TIME; } else { @@ -645,7 +645,7 @@ ieee80211_scan_get_channel_time(struct ieee80211_channel *chan) * TODO: channel switching also consumes quite some time, * add that delay as well to get a better estimation */ - if (chan->flags & IEEE80211_CHAN_NO_IR) + if (chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR)) return IEEE80211_PASSIVE_CHANNEL_TIME; return IEEE80211_PROBE_DELAY + IEEE80211_CHANNEL_TIME; } @@ -777,7 +777,8 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local, * * In any case, it is not necessary for a passive scan. */ - if (chan->flags & IEEE80211_CHAN_NO_IR || !scan_req->n_ssids) { + if ((chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR)) || + !scan_req->n_ssids) { *next_delay = IEEE80211_PASSIVE_CHANNEL_TIME; local->next_scan_state = SCAN_DECISION; return; From c1df932c0574c13ab3ce72e969c9647ff3aaad68 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 27 Nov 2015 21:59:41 +0100 Subject: [PATCH 087/110] mac80211: fix off-channel mgmt-tx uninitialized variable usage In the last change here, I neglected to update the cookie in one code path: when a mgmt-tx has no real cookie sent to userspace as it doesn't wait for a response, but is off-channel. The original code used the SKB pointer as the cookie and always assigned the cookie to the TX SKB in ieee80211_start_roc_work(), but my change turned this around and made the code rely on a valid cookie being passed in. Unfortunately, the off-channel no-wait TX path wasn't assigning one at all, resulting in an uninitialized stack value being used. This wasn't handed back to userspace as a cookie (since in the no-wait case there isn't a cookie), but it was tested for non-zero to distinguish between mgmt-tx and off-channel. Fix this by assigning a dummy non-zero cookie unconditionally, and get rid of a misleading comment and some dead code while at it. I'll clean up the ACK SKB handling separately later. Fixes: 3b79af973cf4 ("mac80211: stop using pointers as userspace cookies") Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index c2bd1b6a6922..da471eef07bb 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3454,8 +3454,12 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, goto out_unlock; } } else { - /* for cookie below */ - ack_skb = skb; + /* Assign a dummy non-zero cookie, it's not sent to + * userspace in this case but we rely on its value + * internally in the need_offchan case to distinguish + * mgmt-tx from remain-on-channel. + */ + *cookie = 0xffffffff; } if (!need_offchan) { From db6ba9a5371f173489df126739d0a1c2a50f347b Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Mon, 30 Nov 2015 13:27:41 +0100 Subject: [PATCH 088/110] net: mvneta: add configuration for MBUS windows access protection This commit adds missing configuration of MBUS windows access protection in mvneta_conf_mbus_windows function - a dedicated variable for that purpose remained there unused since v3.8 initial mvneta support. Because of that the register contents were inherited from the bootloader. Signed-off-by: Marcin Wojtas Reviewed-by: Gregory CLEMENT Fixes: c5aff18204da ("net: mvneta: driver for Marvell Armada 370/XP network unit") Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index e84c7f2634d3..2d8025603329 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -62,6 +62,7 @@ #define MVNETA_WIN_SIZE(w) (0x2204 + ((w) << 3)) #define MVNETA_WIN_REMAP(w) (0x2280 + ((w) << 2)) #define MVNETA_BASE_ADDR_ENABLE 0x2290 +#define MVNETA_ACCESS_PROTECT_ENABLE 0x2294 #define MVNETA_PORT_CONFIG 0x2400 #define MVNETA_UNI_PROMISC_MODE BIT(0) #define MVNETA_DEF_RXQ(q) ((q) << 1) @@ -3191,6 +3192,7 @@ static void mvneta_conf_mbus_windows(struct mvneta_port *pp, } mvreg_write(pp, MVNETA_BASE_ADDR_ENABLE, win_enable); + mvreg_write(pp, MVNETA_ACCESS_PROTECT_ENABLE, win_protect); } /* Power up the port */ From e5bdf689d32fcf3aaf548c71e715b303ba20b5d1 Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Mon, 30 Nov 2015 13:27:42 +0100 Subject: [PATCH 089/110] net: mvneta: fix bit assignment in MVNETA_RXQ_CONFIG_REG MVNETA_RXQ_HW_BUF_ALLOC bit which controls enabling hardware buffer allocation was mistakenly set as BIT(1). This commit fixes the assignment. Signed-off-by: Marcin Wojtas Reviewed-by: Gregory CLEMENT Fixes: c5aff18204da ("net: mvneta: driver for Marvell Armada 370/XP network unit") Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 2d8025603329..64c46f03978c 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -36,7 +36,7 @@ /* Registers */ #define MVNETA_RXQ_CONFIG_REG(q) (0x1400 + ((q) << 2)) -#define MVNETA_RXQ_HW_BUF_ALLOC BIT(1) +#define MVNETA_RXQ_HW_BUF_ALLOC BIT(0) #define MVNETA_RXQ_PKT_OFFSET_ALL_MASK (0xf << 8) #define MVNETA_RXQ_PKT_OFFSET_MASK(offs) ((offs) << 8) #define MVNETA_RXQ_THRESHOLD_REG(q) (0x14c0 + ((q) << 2)) From dc1aadf6f1e7609590fadf7a0252413732289b2e Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Mon, 30 Nov 2015 13:27:43 +0100 Subject: [PATCH 090/110] net: mvneta: fix bit assignment for RX packet irq enable A value originally defined in the driver was inappropriate. Even though the ingress was somehow working, writing MVNETA_RXQ_INTR_ENABLE_ALL_MASK to MVNETA_INTR_ENABLE didn't make any effect, because the bits [31:16] are reserved and read-only. This commit updates MVNETA_RXQ_INTR_ENABLE_ALL_MASK to be compliant with the controller's documentation. Signed-off-by: Marcin Wojtas Fixes: c5aff18204da ("net: mvneta: driver for Marvell Armada 370/XP network unit") Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 64c46f03978c..5dffb6831236 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -160,7 +160,7 @@ #define MVNETA_INTR_ENABLE 0x25b8 #define MVNETA_TXQ_INTR_ENABLE_ALL_MASK 0x0000ff00 -#define MVNETA_RXQ_INTR_ENABLE_ALL_MASK 0xff000000 // note: neta says it's 0x000000FF +#define MVNETA_RXQ_INTR_ENABLE_ALL_MASK 0x000000ff #define MVNETA_RXQ_CMD 0x2680 #define MVNETA_RXQ_DISABLE_SHIFT 8 From 26c17a179f3f64f92de6e837c14279a6431a7ab6 Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Mon, 30 Nov 2015 13:27:44 +0100 Subject: [PATCH 091/110] net: mvneta: fix error path for building skb In the actual RX processing, there is same error path for both descriptor ring refilling and building skb fails. This is not correct, because after successful refill, the ring is already updated with newly allocated buffer. Then, in case of build_skb() fail, hitherto code left the original buffer unmapped. This patch fixes above situation by swapping error check of skb build with DMA-unmap of original buffer. Signed-off-by: Marcin Wojtas Acked-by: Simon Guinot Cc: # v4.2+ Fixes a84e32894191 ("net: mvneta: fix refilling for Rx DMA buffers") Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 5dffb6831236..5a98c5d61a2b 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1580,12 +1580,16 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo, } skb = build_skb(data, pp->frag_size > PAGE_SIZE ? 0 : pp->frag_size); - if (!skb) - goto err_drop_frame; + /* After refill old buffer has to be unmapped regardless + * the skb is successfully built or not. + */ dma_unmap_single(dev->dev.parent, phys_addr, MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE); + if (!skb) + goto err_drop_frame; + rcvd_pkts++; rcvd_bytes += rx_bytes; From 9110ee07762a8f04835878863be2449362c63508 Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Mon, 30 Nov 2015 13:27:45 +0100 Subject: [PATCH 092/110] net: mvneta: enable setting custom TX IP checksum limit Since Armada 38x SoC can support IP checksum for jumbo frames only on a single port, it means that this feature should be enabled per-port, rather than for the whole SoC. This patch enables setting custom TX IP checksum limit by adding new optional property to the mvneta device tree node. If not used, by default 1600B is set for "marvell,armada-370-neta" and 9800B for other strings, which ensures backward compatibility. Binding documentation is updated accordingly. Signed-off-by: Marcin Wojtas Signed-off-by: David S. Miller --- .../bindings/net/marvell-armada-370-neta.txt | 6 ++++++ drivers/net/ethernet/marvell/mvneta.c | 19 +++++++++++++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt index f5a8ca29aff0..aeea50c84e92 100644 --- a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt +++ b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt @@ -8,6 +8,11 @@ Required properties: - phy-mode: See ethernet.txt file in the same directory - clocks: a pointer to the reference clock for this device. +Optional properties: +- tx-csum-limit: maximum mtu supported by port that allow TX checksum. + Value is presented in bytes. If not used, by default 1600B is set for + "marvell,armada-370-neta" and 9800B for others. + Example: ethernet@d0070000 { @@ -15,6 +20,7 @@ ethernet@d0070000 { reg = <0xd0070000 0x2500>; interrupts = <8>; clocks = <&gate_clk 4>; + tx-csum-limit = <9800> status = "okay"; phy = <&phy0>; phy-mode = "rgmii-id"; diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 5a98c5d61a2b..ed622fa29dfa 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -243,6 +243,7 @@ #define MVNETA_VLAN_TAG_LEN 4 #define MVNETA_CPU_D_CACHE_LINE_SIZE 32 +#define MVNETA_TX_CSUM_DEF_SIZE 1600 #define MVNETA_TX_CSUM_MAX_SIZE 9800 #define MVNETA_ACC_MODE_EXT 1 @@ -3256,6 +3257,7 @@ static int mvneta_probe(struct platform_device *pdev) char hw_mac_addr[ETH_ALEN]; const char *mac_from; const char *managed; + int tx_csum_limit; int phy_mode; int err; int cpu; @@ -3356,8 +3358,21 @@ static int mvneta_probe(struct platform_device *pdev) } } - if (of_device_is_compatible(dn, "marvell,armada-370-neta")) - pp->tx_csum_limit = 1600; + if (!of_property_read_u32(dn, "tx-csum-limit", &tx_csum_limit)) { + if (tx_csum_limit < 0 || + tx_csum_limit > MVNETA_TX_CSUM_MAX_SIZE) { + tx_csum_limit = MVNETA_TX_CSUM_DEF_SIZE; + dev_info(&pdev->dev, + "Wrong TX csum limit in DT, set to %dB\n", + MVNETA_TX_CSUM_DEF_SIZE); + } + } else if (of_device_is_compatible(dn, "marvell,armada-370-neta")) { + tx_csum_limit = MVNETA_TX_CSUM_DEF_SIZE; + } else { + tx_csum_limit = MVNETA_TX_CSUM_MAX_SIZE; + } + + pp->tx_csum_limit = tx_csum_limit; pp->tx_ring_size = MVNETA_MAX_TXD; pp->rx_ring_size = MVNETA_MAX_RXD; From c4a25007cfccbf19e6481885af378bca5e681683 Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Mon, 30 Nov 2015 13:27:46 +0100 Subject: [PATCH 093/110] mvebu: dts: enable IP checksum with jumbo frames for Armada 38x on Port0 The Ethernet controller found in the Armada 38x SoC's family support TCP/IP checksumming with frame sizes larger than 1600 bytes, however only on port 0. This commit enables it by setting 'tx-csum-limit' to 9800B in 'ethernet@70000' node. Signed-off-by: Marcin Wojtas Signed-off-by: David S. Miller --- arch/arm/boot/dts/armada-38x.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/armada-38x.dtsi b/arch/arm/boot/dts/armada-38x.dtsi index c6a0e9d7f1a9..e8b7f6726772 100644 --- a/arch/arm/boot/dts/armada-38x.dtsi +++ b/arch/arm/boot/dts/armada-38x.dtsi @@ -498,6 +498,7 @@ reg = <0x70000 0x4000>; interrupts-extended = <&mpic 8>; clocks = <&gateclk 4>; + tx-csum-limit = <9800>; status = "disabled"; }; From 01b3f52157ff5a47d6d8d796f396a4b34a53c61d Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Sun, 29 Nov 2015 16:59:35 -0800 Subject: [PATCH 094/110] bpf: fix allocation warnings in bpf maps and integer overflow For large map->value_size the user space can trigger memory allocation warnings like: WARNING: CPU: 2 PID: 11122 at mm/page_alloc.c:2989 __alloc_pages_nodemask+0x695/0x14e0() Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0x68/0x92 lib/dump_stack.c:50 [] warn_slowpath_common+0xd9/0x140 kernel/panic.c:460 [] warn_slowpath_null+0x29/0x30 kernel/panic.c:493 [< inline >] __alloc_pages_slowpath mm/page_alloc.c:2989 [] __alloc_pages_nodemask+0x695/0x14e0 mm/page_alloc.c:3235 [] alloc_pages_current+0xee/0x340 mm/mempolicy.c:2055 [< inline >] alloc_pages include/linux/gfp.h:451 [] alloc_kmem_pages+0x16/0xf0 mm/page_alloc.c:3414 [] kmalloc_order+0x19/0x60 mm/slab_common.c:1007 [] kmalloc_order_trace+0x1f/0xa0 mm/slab_common.c:1018 [< inline >] kmalloc_large include/linux/slab.h:390 [] __kmalloc+0x234/0x250 mm/slub.c:3525 [< inline >] kmalloc include/linux/slab.h:463 [< inline >] map_update_elem kernel/bpf/syscall.c:288 [< inline >] SYSC_bpf kernel/bpf/syscall.c:744 To avoid never succeeding kmalloc with order >= MAX_ORDER check that elem->value_size and computed elem_size are within limits for both hash and array type maps. Also add __GFP_NOWARN to kmalloc(value_size | elem_size) to avoid OOM warnings. Note kmalloc(key_size) is highly unlikely to trigger OOM, since key_size <= 512, so keep those kmalloc-s as-is. Large value_size can cause integer overflows in elem_size and map.pages formulas, so check for that as well. Fixes: aaac3ba95e4c ("bpf: charge user for creation of BPF maps and programs") Reported-by: Dmitry Vyukov Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/arraymap.c | 8 +++++++- kernel/bpf/hashtab.c | 34 +++++++++++++++++++++++++--------- kernel/bpf/syscall.c | 4 ++-- 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 4c67ce39732e..b0799bced518 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -28,11 +28,17 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) attr->value_size == 0) return ERR_PTR(-EINVAL); + if (attr->value_size >= 1 << (KMALLOC_SHIFT_MAX - 1)) + /* if value_size is bigger, the user space won't be able to + * access the elements. + */ + return ERR_PTR(-E2BIG); + elem_size = round_up(attr->value_size, 8); /* check round_up into zero and u32 overflow */ if (elem_size == 0 || - attr->max_entries > (U32_MAX - sizeof(*array)) / elem_size) + attr->max_entries > (U32_MAX - PAGE_SIZE - sizeof(*array)) / elem_size) return ERR_PTR(-ENOMEM); array_size = sizeof(*array) + attr->max_entries * elem_size; diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 19909b22b4f8..34777b3746fa 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -64,12 +64,35 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) */ goto free_htab; - err = -ENOMEM; + if (htab->map.value_size >= (1 << (KMALLOC_SHIFT_MAX - 1)) - + MAX_BPF_STACK - sizeof(struct htab_elem)) + /* if value_size is bigger, the user space won't be able to + * access the elements via bpf syscall. This check also makes + * sure that the elem_size doesn't overflow and it's + * kmalloc-able later in htab_map_update_elem() + */ + goto free_htab; + + htab->elem_size = sizeof(struct htab_elem) + + round_up(htab->map.key_size, 8) + + htab->map.value_size; + /* prevent zero size kmalloc and check for u32 overflow */ if (htab->n_buckets == 0 || htab->n_buckets > U32_MAX / sizeof(struct hlist_head)) goto free_htab; + if ((u64) htab->n_buckets * sizeof(struct hlist_head) + + (u64) htab->elem_size * htab->map.max_entries >= + U32_MAX - PAGE_SIZE) + /* make sure page count doesn't overflow */ + goto free_htab; + + htab->map.pages = round_up(htab->n_buckets * sizeof(struct hlist_head) + + htab->elem_size * htab->map.max_entries, + PAGE_SIZE) >> PAGE_SHIFT; + + err = -ENOMEM; htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct hlist_head), GFP_USER | __GFP_NOWARN); @@ -85,13 +108,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) raw_spin_lock_init(&htab->lock); htab->count = 0; - htab->elem_size = sizeof(struct htab_elem) + - round_up(htab->map.key_size, 8) + - htab->map.value_size; - - htab->map.pages = round_up(htab->n_buckets * sizeof(struct hlist_head) + - htab->elem_size * htab->map.max_entries, - PAGE_SIZE) >> PAGE_SHIFT; return &htab->map; free_htab: @@ -222,7 +238,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, WARN_ON_ONCE(!rcu_read_lock_held()); /* allocate new element outside of lock */ - l_new = kmalloc(htab->elem_size, GFP_ATOMIC); + l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN); if (!l_new) return -ENOMEM; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 4a8f3c1d7da6..3b39550d8485 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -240,7 +240,7 @@ static int map_lookup_elem(union bpf_attr *attr) goto free_key; err = -ENOMEM; - value = kmalloc(map->value_size, GFP_USER); + value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN); if (!value) goto free_key; @@ -299,7 +299,7 @@ static int map_update_elem(union bpf_attr *attr) goto free_key; err = -ENOMEM; - value = kmalloc(map->value_size, GFP_USER); + value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN); if (!value) goto free_key; From 45f6fad84cc305103b28d73482b344d7f5b76f39 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 29 Nov 2015 19:37:57 -0800 Subject: [PATCH 095/110] ipv6: add complete rcu protection around np->opt This patch addresses multiple problems : UDP/RAW sendmsg() need to get a stable struct ipv6_txoptions while socket is not locked : Other threads can change np->opt concurrently. Dmitry posted a syzkaller (http://github.com/google/syzkaller) program desmonstrating use-after-free. Starting with TCP/DCCP lockless listeners, tcp_v6_syn_recv_sock() and dccp_v6_request_recv_sock() also need to use RCU protection to dereference np->opt once (before calling ipv6_dup_options()) This patch adds full RCU protection to np->opt Reported-by: Dmitry Vyukov Signed-off-by: Eric Dumazet Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 +- include/net/ipv6.h | 21 +++++++++++++++++++- net/dccp/ipv6.c | 33 ++++++++++++++++++++------------ net/ipv6/af_inet6.c | 13 +++++++++---- net/ipv6/datagram.c | 4 +++- net/ipv6/exthdrs.c | 3 ++- net/ipv6/inet6_connection_sock.c | 11 ++++++++--- net/ipv6/ipv6_sockglue.c | 33 +++++++++++++++++++++----------- net/ipv6/raw.c | 8 ++++++-- net/ipv6/syncookies.c | 2 +- net/ipv6/tcp_ipv6.c | 28 ++++++++++++++++----------- net/ipv6/udp.c | 8 ++++++-- net/l2tp/l2tp_ip6.c | 8 ++++++-- 13 files changed, 122 insertions(+), 52 deletions(-) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 0ef2a97ccdb5..402753bccafa 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -227,7 +227,7 @@ struct ipv6_pinfo { struct ipv6_ac_socklist *ipv6_ac_list; struct ipv6_fl_socklist __rcu *ipv6_fl_list; - struct ipv6_txoptions *opt; + struct ipv6_txoptions __rcu *opt; struct sk_buff *pktoptions; struct sk_buff *rxpmtu; struct inet6_cork cork; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index ea5a13ef85a6..9a5c9f013784 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -205,6 +205,7 @@ extern rwlock_t ip6_ra_lock; */ struct ipv6_txoptions { + atomic_t refcnt; /* Length of this structure */ int tot_len; @@ -217,7 +218,7 @@ struct ipv6_txoptions { struct ipv6_opt_hdr *dst0opt; struct ipv6_rt_hdr *srcrt; /* Routing Header */ struct ipv6_opt_hdr *dst1opt; - + struct rcu_head rcu; /* Option buffer, as read by IPV6_PKTOPTIONS, starts here. */ }; @@ -252,6 +253,24 @@ struct ipv6_fl_socklist { struct rcu_head rcu; }; +static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np) +{ + struct ipv6_txoptions *opt; + + rcu_read_lock(); + opt = rcu_dereference(np->opt); + if (opt && !atomic_inc_not_zero(&opt->refcnt)) + opt = NULL; + rcu_read_unlock(); + return opt; +} + +static inline void txopt_put(struct ipv6_txoptions *opt) +{ + if (opt && atomic_dec_and_test(&opt->refcnt)) + kfree_rcu(opt, rcu); +} + struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label); struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, struct ip6_flowlabel *fl, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index db5fc2440a23..e7e0b9bc2a43 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -202,7 +202,9 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req security_req_classify_flow(req, flowi6_to_flowi(&fl6)); - final_p = fl6_update_dst(&fl6, np->opt, &final); + rcu_read_lock(); + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); + rcu_read_unlock(); dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { @@ -219,7 +221,10 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req &ireq->ir_v6_loc_addr, &ireq->ir_v6_rmt_addr); fl6.daddr = ireq->ir_v6_rmt_addr; - err = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass); + rcu_read_lock(); + err = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt), + np->tclass); + rcu_read_unlock(); err = net_xmit_eval(err); } @@ -387,6 +392,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *newnp; const struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_txoptions *opt; struct inet_sock *newinet; struct dccp6_sock *newdp6; struct sock *newsk; @@ -488,13 +494,15 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, * Yes, keeping reference count would be much more clever, but we make * one more one thing there: reattach optmem to newsk. */ - if (np->opt != NULL) - newnp->opt = ipv6_dup_options(newsk, np->opt); - + opt = rcu_dereference(np->opt); + if (opt) { + opt = ipv6_dup_options(newsk, opt); + RCU_INIT_POINTER(newnp->opt, opt); + } inet_csk(newsk)->icsk_ext_hdr_len = 0; - if (newnp->opt != NULL) - inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + - newnp->opt->opt_flen); + if (opt) + inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + + opt->opt_flen; dccp_sync_mss(newsk, dst_mtu(dst)); @@ -757,6 +765,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct ipv6_pinfo *np = inet6_sk(sk); struct dccp_sock *dp = dccp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; + struct ipv6_txoptions *opt; struct flowi6 fl6; struct dst_entry *dst; int addr_type; @@ -856,7 +865,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.fl6_sport = inet->inet_sport; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - final_p = fl6_update_dst(&fl6, np->opt, &final); + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + final_p = fl6_update_dst(&fl6, opt, &final); dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { @@ -876,9 +886,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, __ip6_dst_store(sk, dst, NULL, NULL); icsk->icsk_ext_hdr_len = 0; - if (np->opt != NULL) - icsk->icsk_ext_hdr_len = (np->opt->opt_flen + - np->opt->opt_nflen); + if (opt) + icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen; inet->inet_dport = usin->sin6_port; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 44bb66bde0e2..38d66ddfb937 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -428,9 +428,11 @@ void inet6_destroy_sock(struct sock *sk) /* Free tx options */ - opt = xchg(&np->opt, NULL); - if (opt) - sock_kfree_s(sk, opt, opt->tot_len); + opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL); + if (opt) { + atomic_sub(opt->tot_len, &sk->sk_omem_alloc); + txopt_put(opt); + } } EXPORT_SYMBOL_GPL(inet6_destroy_sock); @@ -659,7 +661,10 @@ int inet6_sk_rebuild_header(struct sock *sk) fl6.fl6_sport = inet->inet_sport; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - final_p = fl6_update_dst(&fl6, np->opt, &final); + rcu_read_lock(); + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), + &final); + rcu_read_unlock(); dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index d70b0238f468..517c55b01ba8 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -167,8 +167,10 @@ ipv4_connected: security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - opt = flowlabel ? flowlabel->opt : np->opt; + rcu_read_lock(); + opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt); final_p = fl6_update_dst(&fl6, opt, &final); + rcu_read_unlock(); dst = ip6_dst_lookup_flow(sk, &fl6, final_p); err = 0; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index ce203b0402be..ea7c4d64a00a 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -727,6 +727,7 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt) *((char **)&opt2->dst1opt) += dif; if (opt2->srcrt) *((char **)&opt2->srcrt) += dif; + atomic_set(&opt2->refcnt, 1); } return opt2; } @@ -790,7 +791,7 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, return ERR_PTR(-ENOBUFS); memset(opt2, 0, tot_len); - + atomic_set(&opt2->refcnt, 1); opt2->tot_len = tot_len; p = (char *)(opt2 + 1); diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 5d1c7cee2cb2..3ff5208772bb 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -78,7 +78,9 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk, memset(fl6, 0, sizeof(*fl6)); fl6->flowi6_proto = proto; fl6->daddr = ireq->ir_v6_rmt_addr; - final_p = fl6_update_dst(fl6, np->opt, &final); + rcu_read_lock(); + final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); + rcu_read_unlock(); fl6->saddr = ireq->ir_v6_loc_addr; fl6->flowi6_oif = ireq->ir_iif; fl6->flowi6_mark = ireq->ir_mark; @@ -142,7 +144,9 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, fl6->fl6_dport = inet->inet_dport; security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); - final_p = fl6_update_dst(fl6, np->opt, &final); + rcu_read_lock(); + final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); + rcu_read_unlock(); dst = __inet6_csk_dst_check(sk, np->dst_cookie); if (!dst) { @@ -175,7 +179,8 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused /* Restore final destination back after routing done */ fl6.daddr = sk->sk_v6_daddr; - res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass); + res = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt), + np->tclass); rcu_read_unlock(); return res; } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 63e6956917c9..4449ad1f8114 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -111,7 +111,8 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); } } - opt = xchg(&inet6_sk(sk)->opt, opt); + opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt, + opt); sk_dst_reset(sk); return opt; @@ -231,9 +232,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sk->sk_socket->ops = &inet_dgram_ops; sk->sk_family = PF_INET; } - opt = xchg(&np->opt, NULL); - if (opt) - sock_kfree_s(sk, opt, opt->tot_len); + opt = xchg((__force struct ipv6_txoptions **)&np->opt, + NULL); + if (opt) { + atomic_sub(opt->tot_len, &sk->sk_omem_alloc); + txopt_put(opt); + } pktopt = xchg(&np->pktoptions, NULL); kfree_skb(pktopt); @@ -403,7 +407,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW)) break; - opt = ipv6_renew_options(sk, np->opt, optname, + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + opt = ipv6_renew_options(sk, opt, optname, (struct ipv6_opt_hdr __user *)optval, optlen); if (IS_ERR(opt)) { @@ -432,8 +437,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, retv = 0; opt = ipv6_update_options(sk, opt); sticky_done: - if (opt) - sock_kfree_s(sk, opt, opt->tot_len); + if (opt) { + atomic_sub(opt->tot_len, &sk->sk_omem_alloc); + txopt_put(opt); + } break; } @@ -486,6 +493,7 @@ sticky_done: break; memset(opt, 0, sizeof(*opt)); + atomic_set(&opt->refcnt, 1); opt->tot_len = sizeof(*opt) + optlen; retv = -EFAULT; if (copy_from_user(opt+1, optval, optlen)) @@ -502,8 +510,10 @@ update: retv = 0; opt = ipv6_update_options(sk, opt); done: - if (opt) - sock_kfree_s(sk, opt, opt->tot_len); + if (opt) { + atomic_sub(opt->tot_len, &sk->sk_omem_alloc); + txopt_put(opt); + } break; } case IPV6_UNICAST_HOPS: @@ -1110,10 +1120,11 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, case IPV6_RTHDR: case IPV6_DSTOPTS: { + struct ipv6_txoptions *opt; lock_sock(sk); - len = ipv6_getsockopt_sticky(sk, np->opt, - optname, optval, len); + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + len = ipv6_getsockopt_sticky(sk, opt, optname, optval, len); release_sock(sk); /* check if ipv6_getsockopt_sticky() returns err code */ if (len < 0) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index dc65ec198f7c..99140986e887 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -733,6 +733,7 @@ static int raw6_getfrag(void *from, char *to, int offset, int len, int odd, static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { + struct ipv6_txoptions *opt_to_free = NULL; struct ipv6_txoptions opt_space; DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); struct in6_addr *daddr, *final_p, final; @@ -839,8 +840,10 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (!(opt->opt_nflen|opt->opt_flen)) opt = NULL; } - if (!opt) - opt = np->opt; + if (!opt) { + opt = txopt_get(np); + opt_to_free = opt; + } if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); @@ -906,6 +909,7 @@ done: dst_release(dst); out: fl6_sock_release(flowlabel); + txopt_put(opt_to_free); return err < 0 ? err : len; do_confirm: dst_confirm(dst); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index bb8f2fa1c7fb..eaf7ac496d50 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -222,7 +222,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_TCP; fl6.daddr = ireq->ir_v6_rmt_addr; - final_p = fl6_update_dst(&fl6, np->opt, &final); + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); fl6.saddr = ireq->ir_v6_loc_addr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = ireq->ir_mark; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c5429a636f1a..6a50bb4a0dae 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -120,6 +120,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; + struct ipv6_txoptions *opt; struct flowi6 fl6; struct dst_entry *dst; int addr_type; @@ -235,7 +236,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; - final_p = fl6_update_dst(&fl6, np->opt, &final); + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + final_p = fl6_update_dst(&fl6, opt, &final); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); @@ -263,9 +265,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, tcp_fetch_timewait_stamp(sk, dst); icsk->icsk_ext_hdr_len = 0; - if (np->opt) - icsk->icsk_ext_hdr_len = (np->opt->opt_flen + - np->opt->opt_nflen); + if (opt) + icsk->icsk_ext_hdr_len = opt->opt_flen + + opt->opt_nflen; tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); @@ -461,7 +463,8 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, if (np->repflow && ireq->pktopts) fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); - err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass); + err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt), + np->tclass); err = net_xmit_eval(err); } @@ -972,6 +975,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * struct inet_request_sock *ireq; struct ipv6_pinfo *newnp; const struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_txoptions *opt; struct tcp6_sock *newtcp6sk; struct inet_sock *newinet; struct tcp_sock *newtp; @@ -1098,13 +1102,15 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * but we make one more one thing there: reattach optmem to newsk. */ - if (np->opt) - newnp->opt = ipv6_dup_options(newsk, np->opt); - + opt = rcu_dereference(np->opt); + if (opt) { + opt = ipv6_dup_options(newsk, opt); + RCU_INIT_POINTER(newnp->opt, opt); + } inet_csk(newsk)->icsk_ext_hdr_len = 0; - if (newnp->opt) - inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + - newnp->opt->opt_flen); + if (opt) + inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + + opt->opt_flen; tcp_ca_openreq_child(newsk, dst); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 01bcb49619ee..9da3287a3923 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1110,6 +1110,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); struct in6_addr *daddr, *final_p, final; struct ipv6_txoptions *opt = NULL; + struct ipv6_txoptions *opt_to_free = NULL; struct ip6_flowlabel *flowlabel = NULL; struct flowi6 fl6; struct dst_entry *dst; @@ -1263,8 +1264,10 @@ do_udp_sendmsg: opt = NULL; connected = 0; } - if (!opt) - opt = np->opt; + if (!opt) { + opt = txopt_get(np); + opt_to_free = opt; + } if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); @@ -1373,6 +1376,7 @@ release_dst: out: dst_release(dst); fl6_sock_release(flowlabel); + txopt_put(opt_to_free); if (!err) return len; /* diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index aca38d8aed8e..a2c8747d2936 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -486,6 +486,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name); struct in6_addr *daddr, *final_p, final; struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_txoptions *opt_to_free = NULL; struct ipv6_txoptions *opt = NULL; struct ip6_flowlabel *flowlabel = NULL; struct dst_entry *dst = NULL; @@ -575,8 +576,10 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) opt = NULL; } - if (opt == NULL) - opt = np->opt; + if (!opt) { + opt = txopt_get(np); + opt_to_free = opt; + } if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); @@ -631,6 +634,7 @@ done: dst_release(dst); out: fl6_sock_release(flowlabel); + txopt_put(opt_to_free); return err < 0 ? err : len; From 38ee8fb67c3457f36f5137073c4b8ac2436d2393 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Mon, 30 Nov 2015 12:17:06 -0200 Subject: [PATCH 096/110] sctp: convert sack_needed and sack_generation to bits They don't need to be any bigger than that and with this we start a new bitfield for tracking association runtime stuff, like zero window situation. Signed-off-by: Marcelo Ricardo Leitner Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 495c87e367b3..7bbb71081aeb 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -775,10 +775,10 @@ struct sctp_transport { hb_sent:1, /* Is the Path MTU update pending on this tranport */ - pmtu_pending:1; + pmtu_pending:1, - /* Has this transport moved the ctsn since we last sacked */ - __u32 sack_generation; + /* Has this transport moved the ctsn since we last sacked */ + sack_generation:1; u32 dst_cookie; struct flowi fl; @@ -1482,19 +1482,19 @@ struct sctp_association { prsctp_capable:1, /* Can peer do PR-SCTP? */ auth_capable:1; /* Is peer doing SCTP-AUTH? */ - /* Ack State : This flag indicates if the next received + /* sack_needed : This flag indicates if the next received * : packet is to be responded to with a - * : SACK. This is initializedto 0. When a packet - * : is received it is incremented. If this value + * : SACK. This is initialized to 0. When a packet + * : is received sack_cnt is incremented. If this value * : reaches 2 or more, a SACK is sent and the * : value is reset to 0. Note: This is used only * : when no DATA chunks are received out of * : order. When DATA chunks are out of order, * : SACK's are not delayed (see Section 6). */ - __u8 sack_needed; /* Do we need to sack the peer? */ + __u8 sack_needed:1, /* Do we need to sack the peer? */ + sack_generation:1; __u32 sack_cnt; - __u32 sack_generation; __u32 adaptation_ind; /* Adaptation Code point. */ From cacc06215271104b40773c99547c506095db6ad4 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Mon, 30 Nov 2015 14:32:54 -0200 Subject: [PATCH 097/110] sctp: use GFP_USER for user-controlled kmalloc Dmitry Vyukov reported that the user could trigger a kernel warning by using a large len value for getsockopt SCTP_GET_LOCAL_ADDRS, as that value directly affects the value used as a kmalloc() parameter. This patch thus switches the allocation flags from all user-controllable kmalloc size to GFP_USER to put some more restrictions on it and also disables the warn, as they are not necessary. Signed-off-by: Marcelo Ricardo Leitner Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/sctp/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 5e35ef34008b..f6161e356734 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -972,7 +972,7 @@ static int sctp_setsockopt_bindx(struct sock *sk, return -EFAULT; /* Alloc space for the address array in kernel memory. */ - kaddrs = kmalloc(addrs_size, GFP_KERNEL); + kaddrs = kmalloc(addrs_size, GFP_USER | __GFP_NOWARN); if (unlikely(!kaddrs)) return -ENOMEM; @@ -4928,7 +4928,7 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, to = optval + offsetof(struct sctp_getaddrs, addrs); space_left = len - offsetof(struct sctp_getaddrs, addrs); - addrs = kmalloc(space_left, GFP_KERNEL); + addrs = kmalloc(space_left, GFP_USER | __GFP_NOWARN); if (!addrs) return -ENOMEM; From 6adc5fd6a142c6e2c80574c1db0c7c17dedaa42e Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 1 Dec 2015 01:14:48 +0300 Subject: [PATCH 098/110] net/neighbour: fix crash at dumping device-agnostic proxy entries Proxy entries could have null pointer to net-device. Signed-off-by: Konstantin Khlebnikov Fixes: 84920c1420e2 ("net: Allow ipv6 proxies and arp proxies be shown with iproute2") Signed-off-by: David S. Miller --- net/core/neighbour.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index e6af42da28d9..f18ae91b652e 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2215,7 +2215,7 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, ndm->ndm_pad2 = 0; ndm->ndm_flags = pn->flags | NTF_PROXY; ndm->ndm_type = RTN_UNICAST; - ndm->ndm_ifindex = pn->dev->ifindex; + ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0; ndm->ndm_state = NUD_NONE; if (nla_put(skb, NDA_DST, tbl->key_len, pn->key)) @@ -2333,7 +2333,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, if (h > s_h) s_idx = 0; for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) { - if (dev_net(n->dev) != net) + if (pneigh_net(n) != net) continue; if (idx < s_idx) goto next; From c836a8ba93869d6a0290a6ae0047fbef09066871 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Dec 2015 21:48:14 -0800 Subject: [PATCH 099/110] ipv6: sctp: add rcu protection around np->opt This patch completes the work I did in commit 45f6fad84cc3 ("ipv6: add complete rcu protection around np->opt"), as I missed sctp part. This simply makes sure np->opt is used with proper RCU locking and accessors. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index e917d27328ea..acb45b8c2a9d 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -209,6 +209,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) struct sock *sk = skb->sk; struct ipv6_pinfo *np = inet6_sk(sk); struct flowi6 *fl6 = &transport->fl.u.ip6; + int res; pr_debug("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb, skb->len, &fl6->saddr, &fl6->daddr); @@ -220,7 +221,10 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS); - return ip6_xmit(sk, skb, fl6, np->opt, np->tclass); + rcu_read_lock(); + res = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt), np->tclass); + rcu_read_unlock(); + return res; } /* Returns the dst cache entry for the given source and destination ip @@ -262,7 +266,10 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, pr_debug("src=%pI6 - ", &fl6->saddr); } - final_p = fl6_update_dst(fl6, np->opt, &final); + rcu_read_lock(); + final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); + rcu_read_unlock(); + dst = ip6_dst_lookup_flow(sk, fl6, final_p); if (!asoc || saddr) goto out; @@ -321,7 +328,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, if (baddr) { fl6->saddr = baddr->v6.sin6_addr; fl6->fl6_sport = baddr->v6.sin6_port; - final_p = fl6_update_dst(fl6, np->opt, &final); + final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); dst = ip6_dst_lookup_flow(sk, fl6, final_p); } From 6bd4f355df2eae80b8a5c7b097371cd1e05f20d5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Dec 2015 21:53:57 -0800 Subject: [PATCH 100/110] ipv6: kill sk_dst_lock While testing the np->opt RCU conversion, I found that UDP/IPv6 was using a mixture of xchg() and sk_dst_lock to protect concurrent changes to sk->sk_dst_cache, leading to possible corruptions and crashes. ip6_sk_dst_lookup_flow() uses sk_dst_check() anyway, so the simplest way to fix the mess is to remove sk_dst_lock completely, as we did for IPv4. __ip6_dst_store() and ip6_dst_store() share same implementation. sk_setup_caps() being called with socket lock being held or not, we have to use sk_dst_set() instead of __sk_dst_set() Note that I had to move the "np->dst_cookie = rt6_get_cookie(rt);" in ip6_dst_store() before the sk_setup_caps(sk, dst) call. This is because ip6_dst_store() can be called from process context, without any lock held. As soon as the dst is installed in sk->sk_dst_cache, dst can be freed from another cpu doing a concurrent ip6_dst_store() Doing the dst dereference before doing the install is needed to make sure no use after free would trigger. Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Signed-off-by: David S. Miller --- include/net/ip6_route.h | 17 ++++------------- include/net/sock.h | 3 +-- net/core/sock.c | 4 +--- net/dccp/ipv6.c | 4 ++-- net/ipv6/af_inet6.c | 2 +- net/ipv6/icmp.c | 14 -------------- net/ipv6/inet6_connection_sock.c | 10 +--------- net/ipv6/tcp_ipv6.c | 4 ++-- 8 files changed, 12 insertions(+), 46 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 2bfb2ad2fab1..877f682989b8 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -133,27 +133,18 @@ void rt6_clean_tohost(struct net *net, struct in6_addr *gateway); /* * Store a destination cache entry in a socket */ -static inline void __ip6_dst_store(struct sock *sk, struct dst_entry *dst, - const struct in6_addr *daddr, - const struct in6_addr *saddr) +static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, + const struct in6_addr *daddr, + const struct in6_addr *saddr) { struct ipv6_pinfo *np = inet6_sk(sk); - struct rt6_info *rt = (struct rt6_info *) dst; + np->dst_cookie = rt6_get_cookie((struct rt6_info *)dst); sk_setup_caps(sk, dst); np->daddr_cache = daddr; #ifdef CONFIG_IPV6_SUBTREES np->saddr_cache = saddr; #endif - np->dst_cookie = rt6_get_cookie(rt); -} - -static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, - struct in6_addr *daddr, struct in6_addr *saddr) -{ - spin_lock(&sk->sk_dst_lock); - __ip6_dst_store(sk, dst, daddr, saddr); - spin_unlock(&sk->sk_dst_lock); } static inline bool ipv6_unicast_destination(const struct sk_buff *skb) diff --git a/include/net/sock.h b/include/net/sock.h index 0434138c5f95..52d27ee924f4 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -254,7 +254,6 @@ struct cg_proto; * @sk_wq: sock wait queue and async head * @sk_rx_dst: receive input route used by early demux * @sk_dst_cache: destination cache - * @sk_dst_lock: destination cache lock * @sk_policy: flow policy * @sk_receive_queue: incoming packets * @sk_wmem_alloc: transmit queue bytes committed @@ -393,7 +392,7 @@ struct sock { #endif struct dst_entry *sk_rx_dst; struct dst_entry __rcu *sk_dst_cache; - spinlock_t sk_dst_lock; + /* Note: 32bit hole on 64bit arches */ atomic_t sk_wmem_alloc; atomic_t sk_omem_alloc; int sk_sndbuf; diff --git a/net/core/sock.c b/net/core/sock.c index 9d79569935a3..e31dfcee1729 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1530,7 +1530,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) skb_queue_head_init(&newsk->sk_receive_queue); skb_queue_head_init(&newsk->sk_write_queue); - spin_lock_init(&newsk->sk_dst_lock); rwlock_init(&newsk->sk_callback_lock); lockdep_set_class_and_name(&newsk->sk_callback_lock, af_callback_keys + newsk->sk_family, @@ -1607,7 +1606,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) { u32 max_segs = 1; - __sk_dst_set(sk, dst); + sk_dst_set(sk, dst); sk->sk_route_caps = dst->dev->features; if (sk->sk_route_caps & NETIF_F_GSO) sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; @@ -2388,7 +2387,6 @@ void sock_init_data(struct socket *sock, struct sock *sk) } else sk->sk_wq = NULL; - spin_lock_init(&sk->sk_dst_lock); rwlock_init(&sk->sk_callback_lock); lockdep_set_class_and_name(&sk->sk_callback_lock, af_callback_keys + sk->sk_family, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index e7e0b9bc2a43..9c6d0508e63a 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -459,7 +459,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, * comment in that function for the gory details. -acme */ - __ip6_dst_store(newsk, dst, NULL, NULL); + ip6_dst_store(newsk, dst, NULL, NULL); newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM | NETIF_F_TSO); newdp6 = (struct dccp6_sock *)newsk; @@ -883,7 +883,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, np->saddr = *saddr; inet->inet_rcv_saddr = LOOPBACK4_IPV6; - __ip6_dst_store(sk, dst, NULL, NULL); + ip6_dst_store(sk, dst, NULL, NULL); icsk->icsk_ext_hdr_len = 0; if (opt) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 38d66ddfb937..8ec0df75f1c4 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -673,7 +673,7 @@ int inet6_sk_rebuild_header(struct sock *sk) return PTR_ERR(dst); } - __ip6_dst_store(sk, dst, NULL, NULL); + ip6_dst_store(sk, dst, NULL, NULL); } return 0; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 36c5a98b0472..0a37ddc7af51 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -834,11 +834,6 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6, security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); } -/* - * Special lock-class for __icmpv6_sk: - */ -static struct lock_class_key icmpv6_socket_sk_dst_lock_key; - static int __net_init icmpv6_sk_init(struct net *net) { struct sock *sk; @@ -860,15 +855,6 @@ static int __net_init icmpv6_sk_init(struct net *net) net->ipv6.icmp_sk[i] = sk; - /* - * Split off their lock-class, because sk->sk_dst_lock - * gets used from softirqs, which is safe for - * __icmpv6_sk (because those never get directly used - * via userspace syscalls), but unsafe for normal sockets. - */ - lockdep_set_class(&sk->sk_dst_lock, - &icmpv6_socket_sk_dst_lock_key); - /* Enough space for 2 64K ICMP packets, including * sk_buff struct overhead. */ diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 3ff5208772bb..a7ca2cde2ecb 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -110,14 +110,6 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) } EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr); -static inline -void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst, - const struct in6_addr *daddr, - const struct in6_addr *saddr) -{ - __ip6_dst_store(sk, dst, daddr, saddr); -} - static inline struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) { @@ -153,7 +145,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, dst = ip6_dst_lookup_flow(sk, fl6, final_p); if (!IS_ERR(dst)) - __inet6_csk_dst_store(sk, dst, NULL, NULL); + ip6_dst_store(sk, dst, NULL, NULL); } return dst; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6a50bb4a0dae..e7aab561b7b4 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -257,7 +257,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, inet->inet_rcv_saddr = LOOPBACK4_IPV6; sk->sk_gso_type = SKB_GSO_TCPV6; - __ip6_dst_store(sk, dst, NULL, NULL); + ip6_dst_store(sk, dst, NULL, NULL); if (tcp_death_row.sysctl_tw_recycle && !tp->rx_opt.ts_recent_stamp && @@ -1060,7 +1060,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * */ newsk->sk_gso_type = SKB_GSO_TCPV6; - __ip6_dst_store(newsk, dst, NULL, NULL); + ip6_dst_store(newsk, dst, NULL, NULL); inet6_sk_rx_dst_set(newsk, skb); newtcp6sk = (struct tcp6_sock *)newsk; From df849ba3a88cdf4480decd7008f95ff627387c0f Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Mon, 30 Nov 2015 14:24:07 -0800 Subject: [PATCH 101/110] arm64: bpf: add 'store immediate' instruction aarch64 doesn't have native store immediate instruction, such operation has to be implemented by the below instruction sequence: Load immediate to register Store register Signed-off-by: Yang Shi CC: Zi Shen Lim CC: Xi Wang Reviewed-by: Zi Shen Lim Signed-off-by: David S. Miller --- arch/arm64/net/bpf_jit_comp.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 7cf032bebf8c..b162ad70effc 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -590,7 +590,25 @@ emit_cond_jmp: case BPF_ST | BPF_MEM | BPF_H: case BPF_ST | BPF_MEM | BPF_B: case BPF_ST | BPF_MEM | BPF_DW: - goto notyet; + /* Load imm to a register then store it */ + ctx->tmp_used = 1; + emit_a64_mov_i(1, tmp2, off, ctx); + emit_a64_mov_i(1, tmp, imm, ctx); + switch (BPF_SIZE(code)) { + case BPF_W: + emit(A64_STR32(tmp, dst, tmp2), ctx); + break; + case BPF_H: + emit(A64_STRH(tmp, dst, tmp2), ctx); + break; + case BPF_B: + emit(A64_STRB(tmp, dst, tmp2), ctx); + break; + case BPF_DW: + emit(A64_STR64(tmp, dst, tmp2), ctx); + break; + } + break; /* STX: *(size *)(dst + off) = src */ case BPF_STX | BPF_MEM | BPF_W: From 602dd62dfbda3e63a2d6a3cbde953ebe82bf5087 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 1 Dec 2015 07:20:07 -0800 Subject: [PATCH 102/110] ipv6: sctp: implement sctp_v6_destroy_sock() Dmitry Vyukov reported a memory leak using IPV6 SCTP sockets. We need to call inet6_destroy_sock() to properly release inet6 specific fields. Reported-by: Dmitry Vyukov Signed-off-by: Eric Dumazet Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/sctp/socket.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f6161e356734..03c8256063ec 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -7379,6 +7379,13 @@ struct proto sctp_prot = { #if IS_ENABLED(CONFIG_IPV6) +#include +static void sctp_v6_destroy_sock(struct sock *sk) +{ + sctp_destroy_sock(sk); + inet6_destroy_sock(sk); +} + struct proto sctpv6_prot = { .name = "SCTPv6", .owner = THIS_MODULE, @@ -7388,7 +7395,7 @@ struct proto sctpv6_prot = { .accept = sctp_accept, .ioctl = sctp_ioctl, .init = sctp_init_sock, - .destroy = sctp_destroy_sock, + .destroy = sctp_v6_destroy_sock, .shutdown = sctp_shutdown, .setsockopt = sctp_setsockopt, .getsockopt = sctp_getsockopt, From 4eba7bb1d72d9bde67d810d09bf62dc207b63c5c Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Tue, 1 Dec 2015 16:31:08 +0100 Subject: [PATCH 103/110] ipv4: igmp: Allow removing groups from a removed interface When a multicast group is joined on a socket, a struct ip_mc_socklist is appended to the sockets mc_list containing information about the joined group. If the interface is hot unplugged, this entry becomes stale. Prior to commit 52ad353a5344f ("igmp: fix the problem when mc leave group") it was possible to remove the stale entry by performing a IP_DROP_MEMBERSHIP, passing either the old ifindex or ip address on the interface. However, this fix enforces that the interface must still exist. Thus with time, the number of stale entries grows, until sysctl_igmp_max_memberships is reached and then it is not possible to join and more groups. The previous patch fixes an issue where a IP_DROP_MEMBERSHIP is performed without specifying the interface, either by ifindex or ip address. However here we do supply one of these. So loosen the restriction on device existence to only apply when the interface has not been specified. This then restores the ability to clean up the stale entries. Signed-off-by: Andrew Lunn Fixes: 52ad353a5344f "(igmp: fix the problem when mc leave group") Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 6baf36e11808..05e4cba14162 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2126,7 +2126,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) ASSERT_RTNL(); in_dev = ip_mc_find_dev(net, imr); - if (!in_dev) { + if (!imr->imr_ifindex && !imr->imr_address.s_addr && !in_dev) { ret = -ENODEV; goto out; } @@ -2147,7 +2147,8 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) *imlp = iml->next_rcu; - ip_mc_dec_group(in_dev, group); + if (in_dev) + ip_mc_dec_group(in_dev, group); /* decrease mem now to avoid the memleak warning */ atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); From 13175303024c8f4cd09e51079a8fcbbe572111ec Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 1 Dec 2015 18:33:36 +0100 Subject: [PATCH 104/110] openvswitch: fix hangup on vxlan/gre/geneve device deletion Each openvswitch tunnel vport (vxlan,gre,geneve) holds a reference to the underlying tunnel device, but never released it when such device is deleted. Deleting the underlying device via the ip tool cause the kernel to hangup in the netdev_wait_allrefs() loop. This commit ensure that on device unregistration dp_detach_port_notify() is called for all vports that hold the device reference, properly releasing it. Fixes: 614732eaa12d ("openvswitch: Use regular VXLAN net_device device") Fixes: b2acd1dc3949 ("openvswitch: Use regular GRE net_device instead of vport") Fixes: 6b001e682e90 ("openvswitch: Use Geneve device.") Signed-off-by: Paolo Abeni Acked-by: Flavio Leitner Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/dp_notify.c | 2 +- net/openvswitch/vport-netdev.c | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c index a7a80a6b77b0..653d073bae45 100644 --- a/net/openvswitch/dp_notify.c +++ b/net/openvswitch/dp_notify.c @@ -58,7 +58,7 @@ void ovs_dp_notify_wq(struct work_struct *work) struct hlist_node *n; hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) { - if (vport->ops->type != OVS_VPORT_TYPE_NETDEV) + if (vport->ops->type == OVS_VPORT_TYPE_INTERNAL) continue; if (!(vport->dev->priv_flags & IFF_OVS_DATAPATH)) diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index b327368a3848..6b0190b987ec 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -180,9 +180,13 @@ void ovs_netdev_tunnel_destroy(struct vport *vport) if (vport->dev->priv_flags & IFF_OVS_DATAPATH) ovs_netdev_detach_dev(vport); - /* Early release so we can unregister the device */ + /* We can be invoked by both explicit vport deletion and + * underlying netdev deregistration; delete the link only + * if it's not already shutting down. + */ + if (vport->dev->reg_state == NETREG_REGISTERED) + rtnl_delete_link(vport->dev); dev_put(vport->dev); - rtnl_delete_link(vport->dev); vport->dev = NULL; rtnl_unlock(); From 4eaf3b84f2881c9c028f1d5e76c52ab575fe3a66 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 1 Dec 2015 20:08:51 -0800 Subject: [PATCH 105/110] net_sched: fix qdisc_tree_decrease_qlen() races qdisc_tree_decrease_qlen() suffers from two problems on multiqueue devices. One problem is that it updates sch->q.qlen and sch->qstats.drops on the mq/mqprio root qdisc, while it should not : Daniele reported underflows errors : [ 681.774821] PAX: sch->q.qlen: 0 n: 1 [ 681.774825] PAX: size overflow detected in function qdisc_tree_decrease_qlen net/sched/sch_api.c:769 cicus.693_49 min, count: 72, decl: qlen; num: 0; context: sk_buff_head; [ 681.774954] CPU: 2 PID: 19 Comm: ksoftirqd/2 Tainted: G O 4.2.6.201511282239-1-grsec #1 [ 681.774955] Hardware name: ASUSTeK COMPUTER INC. X302LJ/X302LJ, BIOS X302LJ.202 03/05/2015 [ 681.774956] ffffffffa9a04863 0000000000000000 0000000000000000 ffffffffa990ff7c [ 681.774959] ffffc90000d3bc38 ffffffffa95d2810 0000000000000007 ffffffffa991002b [ 681.774960] ffffc90000d3bc68 ffffffffa91a44f4 0000000000000001 0000000000000001 [ 681.774962] Call Trace: [ 681.774967] [] dump_stack+0x4c/0x7f [ 681.774970] [] report_size_overflow+0x34/0x50 [ 681.774972] [] qdisc_tree_decrease_qlen+0x152/0x160 [ 681.774976] [] fq_codel_dequeue+0x7b1/0x820 [sch_fq_codel] [ 681.774978] [] ? qdisc_peek_dequeued+0xa0/0xa0 [sch_fq_codel] [ 681.774980] [] __qdisc_run+0x4d/0x1d0 [ 681.774983] [] net_tx_action+0xc2/0x160 [ 681.774985] [] __do_softirq+0xf1/0x200 [ 681.774987] [] run_ksoftirqd+0x1e/0x30 [ 681.774989] [] smpboot_thread_fn+0x150/0x260 [ 681.774991] [] ? sort_range+0x40/0x40 [ 681.774992] [] kthread+0xe4/0x100 [ 681.774994] [] ? kthread_worker_fn+0x170/0x170 [ 681.774995] [] ret_from_fork+0x3e/0x70 mq/mqprio have their own ways to report qlen/drops by folding stats on all their queues, with appropriate locking. A second problem is that qdisc_tree_decrease_qlen() calls qdisc_lookup() without proper locking : concurrent qdisc updates could corrupt the list that qdisc_match_from_root() parses to find a qdisc given its handle. Fix first problem adding a TCQ_F_NOPARENT qdisc flag that qdisc_tree_decrease_qlen() can use to abort its tree traversal, as soon as it meets a mq/mqprio qdisc children. Second problem can be fixed by RCU protection. Qdisc are already freed after RCU grace period, so qdisc_list_add() and qdisc_list_del() simply have to use appropriate rcu list variants. A future patch will add a per struct netdev_queue list anchor, so that qdisc_tree_decrease_qlen() can have more efficient lookups. Reported-by: Daniele Fucini Signed-off-by: Eric Dumazet Cc: Cong Wang Cc: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/sch_generic.h | 3 +++ net/sched/sch_api.c | 27 ++++++++++++++++++--------- net/sched/sch_generic.c | 2 +- net/sched/sch_mq.c | 4 ++-- net/sched/sch_mqprio.c | 4 ++-- 5 files changed, 26 insertions(+), 14 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 4c79ce8c1f92..b2a8e6338576 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -61,6 +61,9 @@ struct Qdisc { */ #define TCQ_F_WARN_NONWC (1 << 16) #define TCQ_F_CPUSTATS 0x20 /* run using percpu statistics */ +#define TCQ_F_NOPARENT 0x40 /* root of its hierarchy : + * qdisc_tree_decrease_qlen() should stop. + */ u32 limit; const struct Qdisc_ops *ops; struct qdisc_size_table __rcu *stab; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index f43c8f33f09e..7ec667dd4ce1 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -253,7 +253,8 @@ int qdisc_set_default(const char *name) } /* We know handle. Find qdisc among all qdisc's attached to device - (root qdisc, all its children, children of children etc.) + * (root qdisc, all its children, children of children etc.) + * Note: caller either uses rtnl or rcu_read_lock() */ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) @@ -264,7 +265,7 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) root->handle == handle) return root; - list_for_each_entry(q, &root->list, list) { + list_for_each_entry_rcu(q, &root->list, list) { if (q->handle == handle) return q; } @@ -277,15 +278,18 @@ void qdisc_list_add(struct Qdisc *q) struct Qdisc *root = qdisc_dev(q)->qdisc; WARN_ON_ONCE(root == &noop_qdisc); - list_add_tail(&q->list, &root->list); + ASSERT_RTNL(); + list_add_tail_rcu(&q->list, &root->list); } } EXPORT_SYMBOL(qdisc_list_add); void qdisc_list_del(struct Qdisc *q) { - if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) - list_del(&q->list); + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { + ASSERT_RTNL(); + list_del_rcu(&q->list); + } } EXPORT_SYMBOL(qdisc_list_del); @@ -750,14 +754,18 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) if (n == 0) return; drops = max_t(int, n, 0); + rcu_read_lock(); while ((parentid = sch->parent)) { if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS)) - return; + break; + if (sch->flags & TCQ_F_NOPARENT) + break; + /* TODO: perform the search on a per txq basis */ sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); if (sch == NULL) { - WARN_ON(parentid != TC_H_ROOT); - return; + WARN_ON_ONCE(parentid != TC_H_ROOT); + break; } cops = sch->ops->cl_ops; if (cops->qlen_notify) { @@ -768,6 +776,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) sch->q.qlen -= n; __qdisc_qstats_drop(sch, drops); } + rcu_read_unlock(); } EXPORT_SYMBOL(qdisc_tree_decrease_qlen); @@ -941,7 +950,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, } lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock); if (!netif_is_multiqueue(dev)) - sch->flags |= TCQ_F_ONETXQUEUE; + sch->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; } sch->handle = handle; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index cb5d4ad32946..e82a1ad80aa5 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -737,7 +737,7 @@ static void attach_one_default_qdisc(struct net_device *dev, return; } if (!netif_is_multiqueue(dev)) - qdisc->flags |= TCQ_F_ONETXQUEUE; + qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; dev_queue->qdisc_sleeping = qdisc; } diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index f3cbaecd283a..3e82f047caaf 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -63,7 +63,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt) if (qdisc == NULL) goto err; priv->qdiscs[ntx] = qdisc; - qdisc->flags |= TCQ_F_ONETXQUEUE; + qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; } sch->flags |= TCQ_F_MQROOT; @@ -156,7 +156,7 @@ static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, *old = dev_graft_qdisc(dev_queue, new); if (new) - new->flags |= TCQ_F_ONETXQUEUE; + new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; if (dev->flags & IFF_UP) dev_activate(dev); return 0; diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 3811a745452c..ad70ecf57ce7 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -132,7 +132,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) goto err; } priv->qdiscs[i] = qdisc; - qdisc->flags |= TCQ_F_ONETXQUEUE; + qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; } /* If the mqprio options indicate that hardware should own @@ -209,7 +209,7 @@ static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, *old = dev_graft_qdisc(dev_queue, new); if (new) - new->flags |= TCQ_F_ONETXQUEUE; + new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; if (dev->flags & IFF_UP) dev_activate(dev); From 39198ec98751477313f30569b935503b216f85d0 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Wed, 2 Dec 2015 08:12:13 +0200 Subject: [PATCH 106/110] net: lpc_eth: remove irq > NR_IRQS check from probe() If the driver is used on an ARM platform with SPARSE_IRQ defined, semantics of NR_IRQS is different (minimal value of virtual irqs) and by default it is set to 16, see arch/arm/include/asm/irq.h. This value may be less than the actual number of virtual irqs, which may break the driver initialization. The check removal allows to use the driver on such a platform, and, if irq controller driver works correctly, the check is not needed on legacy platforms. Fixes a runtime problem: lpc-eth 31060000.ethernet: error getting resources. lpc_eth: lpc-eth: not found (-6). Signed-off-by: Vladimir Zapolskiy Signed-off-by: David S. Miller --- drivers/net/ethernet/nxp/lpc_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index b159ef8303cc..057665180f13 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -1326,7 +1326,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) /* Get platform resources */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); irq = platform_get_irq(pdev, 0); - if ((!res) || (irq < 0) || (irq >= NR_IRQS)) { + if (!res || irq < 0) { dev_err(&pdev->dev, "error getting resources.\n"); ret = -ENXIO; goto err_exit; From 1fc2cfd03bbf8f1f8b6b90f0858faba8bd6631c4 Mon Sep 17 00:00:00 2001 From: Jeffrey Huang Date: Wed, 2 Dec 2015 01:54:06 -0500 Subject: [PATCH 107/110] bnxt_en: Fixed incorrect implementation of ndo_set_mac_address The existing ndo_set_mac_address only copies the new MAC addr and didn't set the new MAC addr to the HW. The correct way is to delete the existing default MAC filter from HW and add the new one. Because of RFS filters are also dependent on the default mac filter l2 context, the driver must go thru close_nic() to delete the default MAC and RFS filters, then open_nic() to set the default MAC address to HW. Signed-off-by: Jeffrey Huang Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index db15c5ee09c5..651b5878eba1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5212,13 +5212,22 @@ init_err: static int bnxt_change_mac_addr(struct net_device *dev, void *p) { struct sockaddr *addr = p; + struct bnxt *bp = netdev_priv(dev); + int rc = 0; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); + if (ether_addr_equal(addr->sa_data, dev->dev_addr)) + return 0; - return 0; + memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); + if (netif_running(dev)) { + bnxt_close_nic(bp, false, false); + rc = bnxt_open_nic(bp, false, false); + } + + return rc; } /* rtnl_lock held */ From bdd4347b33f480187b44699cf1caac9400496d6d Mon Sep 17 00:00:00 2001 From: Jeffrey Huang Date: Wed, 2 Dec 2015 01:54:07 -0500 Subject: [PATCH 108/110] bnxt_en: enforce proper storing of MAC address For PF, the bp->pf.mac_addr always holds the permanent MAC addr assigned by the HW. For VF, the bp->vf.mac_addr always holds the administrator assigned VF MAC addr. The random generated VF MAC addr should never get stored to bp->vf.mac_addr. This way, when the VF wants to change the MAC address, we can tell if the adminstrator has already set it and disallow the VF from changing it. v2: Fix compile error if CONFIG_BNXT_SRIOV is not set. Signed-off-by: Jeffrey Huang Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 20 ++++++++++++------- .../net/ethernet/broadcom/bnxt/bnxt_sriov.c | 7 +++---- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 651b5878eba1..f0481dc97ea0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3625,6 +3625,7 @@ static int bnxt_hwrm_func_qcaps(struct bnxt *bp) pf->fw_fid = le16_to_cpu(resp->fid); pf->port_id = le16_to_cpu(resp->port_id); memcpy(pf->mac_addr, resp->perm_mac_address, ETH_ALEN); + memcpy(bp->dev->dev_addr, pf->mac_addr, ETH_ALEN); pf->max_rsscos_ctxs = le16_to_cpu(resp->max_rsscos_ctx); pf->max_cp_rings = le16_to_cpu(resp->max_cmpl_rings); pf->max_tx_rings = le16_to_cpu(resp->max_tx_rings); @@ -3648,8 +3649,11 @@ static int bnxt_hwrm_func_qcaps(struct bnxt *bp) vf->fw_fid = le16_to_cpu(resp->fid); memcpy(vf->mac_addr, resp->perm_mac_address, ETH_ALEN); - if (!is_valid_ether_addr(vf->mac_addr)) - random_ether_addr(vf->mac_addr); + if (is_valid_ether_addr(vf->mac_addr)) + /* overwrite netdev dev_adr with admin VF MAC */ + memcpy(bp->dev->dev_addr, vf->mac_addr, ETH_ALEN); + else + random_ether_addr(bp->dev->dev_addr); vf->max_rsscos_ctxs = le16_to_cpu(resp->max_rsscos_ctx); vf->max_cp_rings = le16_to_cpu(resp->max_cmpl_rings); @@ -5218,6 +5222,11 @@ static int bnxt_change_mac_addr(struct net_device *dev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; +#ifdef CONFIG_BNXT_SRIOV + if (BNXT_VF(bp) && is_valid_ether_addr(bp->vf.mac_addr)) + return -EADDRNOTAVAIL; +#endif + if (ether_addr_equal(addr->sa_data, dev->dev_addr)) return 0; @@ -5695,15 +5704,12 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) bnxt_set_tpa_flags(bp); bnxt_set_ring_params(bp); dflt_rings = netif_get_num_default_rss_queues(); - if (BNXT_PF(bp)) { - memcpy(dev->dev_addr, bp->pf.mac_addr, ETH_ALEN); + if (BNXT_PF(bp)) bp->pf.max_irqs = max_irqs; - } else { #if defined(CONFIG_BNXT_SRIOV) - memcpy(dev->dev_addr, bp->vf.mac_addr, ETH_ALEN); + else bp->vf.max_irqs = max_irqs; #endif - } bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings); bp->rx_nr_rings = min_t(int, dflt_rings, max_rx_rings); bp->tx_nr_rings_per_tc = min_t(int, dflt_rings, max_tx_rings); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index f4cf68861069..7a9af2887d8e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -804,10 +804,9 @@ void bnxt_update_vf_mac(struct bnxt *bp) if (!is_valid_ether_addr(resp->perm_mac_address)) goto update_vf_mac_exit; - if (ether_addr_equal(resp->perm_mac_address, bp->vf.mac_addr)) - goto update_vf_mac_exit; - - memcpy(bp->vf.mac_addr, resp->perm_mac_address, ETH_ALEN); + if (!ether_addr_equal(resp->perm_mac_address, bp->vf.mac_addr)) + memcpy(bp->vf.mac_addr, resp->perm_mac_address, ETH_ALEN); + /* overwrite netdev dev_adr with admin VF MAC */ memcpy(bp->dev->dev_addr, bp->vf.mac_addr, ETH_ALEN); update_vf_mac_exit: mutex_unlock(&bp->hwrm_cmd_lock); From b664f008b0d885db1d5617ed1c51d29a8c04da93 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 2 Dec 2015 01:54:08 -0500 Subject: [PATCH 109/110] bnxt_en: Setup uc_list mac filters after resetting the chip. Call bnxt_cfg_rx_mode() in bnxt_init_chip() to setup uc_list and mc_list mac address filters. Before the patch, uc_list is not setup again after chip reset (such as ethtool ring size change) and macvlans don't work any more after that. Modify bnxt_cfg_rx_mode() to return error codes appropriately so that the init chip sequence can detect any failures. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index f0481dc97ea0..bdf094fb6ef9 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3884,6 +3884,8 @@ static int bnxt_alloc_rfs_vnics(struct bnxt *bp) #endif } +static int bnxt_cfg_rx_mode(struct bnxt *); + static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init) { int rc = 0; @@ -3950,11 +3952,9 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init) bp->vnic_info[0].rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS; - rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0); - if (rc) { - netdev_err(bp->dev, "HWRM cfa l2 rx mask failure rc: %x\n", rc); + rc = bnxt_cfg_rx_mode(bp); + if (rc) goto err_out; - } rc = bnxt_hwrm_set_coal(bp); if (rc) @@ -4869,7 +4869,7 @@ static void bnxt_set_rx_mode(struct net_device *dev) } } -static void bnxt_cfg_rx_mode(struct bnxt *bp) +static int bnxt_cfg_rx_mode(struct bnxt *bp) { struct net_device *dev = bp->dev; struct bnxt_vnic_info *vnic = &bp->vnic_info[0]; @@ -4918,6 +4918,7 @@ static void bnxt_cfg_rx_mode(struct bnxt *bp) netdev_err(bp->dev, "HWRM vnic filter failure rc: %x\n", rc); vnic->uc_filter_count = i; + return rc; } } @@ -4926,6 +4927,8 @@ skip_uc: if (rc) netdev_err(bp->dev, "HWRM cfa l2 rx mask failure rc: %x\n", rc); + + return rc; } static netdev_features_t bnxt_fix_features(struct net_device *dev, From cf18b7788fe1bf99e9c2ab580b065bf2d3cb1a34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Pouiller?= Date: Thu, 3 Dec 2015 10:02:35 +0100 Subject: [PATCH 110/110] net: phy: reset only targeted phy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is possible to address another chip on same MDIO bus. The case is correctly handled for media advertising. It is taken into account only if mii_data->phy_id == phydev->addr. However, this condition was missing for reset case. Signed-off-by: Jérôme Pouiller Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 48ce6ef400fe..47cd306dbb3c 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -448,7 +448,8 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd) mdiobus_write(phydev->bus, mii_data->phy_id, mii_data->reg_num, val); - if (mii_data->reg_num == MII_BMCR && + if (mii_data->phy_id == phydev->addr && + mii_data->reg_num == MII_BMCR && val & BMCR_RESET) return phy_init_hw(phydev);