From 180032973ee97daddf5c9d733e5b425b108f8679 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Thu, 29 Aug 2013 13:26:57 +0300 Subject: [PATCH 01/59] cfg80211: use the correct macro to check for active monitor support Use MONITOR_FLAG_ACTIVE, which is a flag mask, instead of NL80211_MNTR_FLAG_ACTIVE, which is a flag index, when checking if the hardware supports active monitoring. Cc: stable@vger.kernel.org Signed-off-by: Luciano Coelho Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index af8d84a4a5b2..626dc3b5fd8d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2421,7 +2421,7 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) change = true; } - if (flags && (*flags & NL80211_MNTR_FLAG_ACTIVE) && + if (flags && (*flags & MONITOR_FLAG_ACTIVE) && !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR)) return -EOPNOTSUPP; @@ -2483,7 +2483,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, &flags); - if (!err && (flags & NL80211_MNTR_FLAG_ACTIVE) && + if (!err && (flags & MONITOR_FLAG_ACTIVE) && !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR)) return -EOPNOTSUPP; From f478f33a93f9353dcd1fe55445343d76b1c3f84a Mon Sep 17 00:00:00 2001 From: Bruno Randolf Date: Thu, 26 Sep 2013 16:55:28 +0100 Subject: [PATCH 02/59] cfg80211: fix warning when using WEXT for IBSS Fix kernel warning when using WEXT for configuring ad-hoc mode, e.g. "iwconfig wlan0 essid test channel 1" WARNING: at net/wireless/chan.c:373 cfg80211_chandef_usable+0x50/0x21c [cfg80211]() The warning is caused by an uninitialized variable center_freq1. Cc: stable@vger.kernel.org Signed-off-by: Bruno Randolf Signed-off-by: Johannes Berg --- net/wireless/ibss.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 39bff7d36768..403fe29c024d 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -263,6 +263,8 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, if (chan->flags & IEEE80211_CHAN_DISABLED) continue; wdev->wext.ibss.chandef.chan = chan; + wdev->wext.ibss.chandef.center_freq1 = + chan->center_freq; break; } @@ -347,6 +349,7 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, if (chan) { wdev->wext.ibss.chandef.chan = chan; wdev->wext.ibss.chandef.width = NL80211_CHAN_WIDTH_20_NOHT; + wdev->wext.ibss.chandef.center_freq1 = freq; wdev->wext.ibss.channel_fixed = true; } else { /* cfg80211_ibss_wext_join will pick one if needed */ From 6329b8d917adc077caa60c2447385554130853a3 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 17 Sep 2013 11:15:43 +0200 Subject: [PATCH 03/59] mac80211: drop spoofed packets in ad-hoc mode If an Ad-Hoc node receives packets with the Cell ID or its own MAC address as source address, it hits a WARN_ON in sta_info_insert_check() With many packets, this can massively spam the logs. One way that this can easily happen is through having Cisco APs in the area with rouge AP detection and countermeasures enabled. Such Cisco APs will regularly send fake beacons, disassoc and deauth packets that trigger these warnings. To fix this issue, drop such spoofed packets early in the rx path. Cc: stable@vger.kernel.org Reported-by: Thomas Huehn Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 54395d7583ba..674eac1f996c 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3056,6 +3056,9 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx, case NL80211_IFTYPE_ADHOC: if (!bssid) return 0; + if (ether_addr_equal(sdata->vif.addr, hdr->addr2) || + ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2)) + return 0; if (ieee80211_is_beacon(hdr->frame_control)) { return 1; } else if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid)) { From cc63ec766b7821c8dc4dc1d1e980124aea96b553 Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Sat, 7 Sep 2013 23:40:44 -0700 Subject: [PATCH 04/59] mac80211: fix the setting of extended supported rate IE The patch "mac80211: select and adjust bitrates according to channel mode" causes regression and breaks the extended supported rate IE setting. Since "i" is starting with 8, so this is not necessary to introduce "skip" here. Signed-off-by: Chun-Yeow Yeoh Signed-off-by: Colleen Twitty Reviewed-by: Jason Abele Signed-off-by: Johannes Berg --- net/mac80211/util.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index e1b34a18b243..9c3200bcfc02 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2103,7 +2103,7 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; - int rate, skip, shift; + int rate, shift; u8 i, exrates, *pos; u32 basic_rates = sdata->vif.bss_conf.basic_rates; u32 rate_flags; @@ -2131,14 +2131,11 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, pos = skb_put(skb, exrates + 2); *pos++ = WLAN_EID_EXT_SUPP_RATES; *pos++ = exrates; - skip = 0; for (i = 8; i < sband->n_bitrates; i++) { u8 basic = 0; if ((rate_flags & sband->bitrates[i].flags) != rate_flags) continue; - if (skip++ < 8) - continue; if (need_basic && basic_rates & BIT(i)) basic = 0x80; rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, From aa5f66d5a1df1c2b04bccdcb19711675c765d7c4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 26 Sep 2013 20:03:45 +0200 Subject: [PATCH 05/59] cfg80211: fix sysfs registration race My locking rework/race fixes caused a regression in the registration, causing uevent notifications for wireless devices before the device is really fully registered and available in nl80211. Fix this by moving the device_add() under rtnl and move the rfkill to afterwards (it can't be under rtnl.) Reported-and-tested-by: Maxime Bizon Signed-off-by: Johannes Berg --- net/wireless/core.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/net/wireless/core.c b/net/wireless/core.c index 67153964aad2..fe8d4f2be49b 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -566,18 +566,13 @@ int wiphy_register(struct wiphy *wiphy) /* check and set up bitrates */ ieee80211_set_bitrate_flags(wiphy); - + rtnl_lock(); res = device_add(&rdev->wiphy.dev); - if (res) - return res; - - res = rfkill_register(rdev->rfkill); if (res) { - device_del(&rdev->wiphy.dev); + rtnl_unlock(); return res; } - rtnl_lock(); /* set up regulatory info */ wiphy_regulatory_register(wiphy); @@ -606,6 +601,15 @@ int wiphy_register(struct wiphy *wiphy) rdev->wiphy.registered = true; rtnl_unlock(); + + res = rfkill_register(rdev->rfkill); + if (res) { + rfkill_destroy(rdev->rfkill); + rdev->rfkill = NULL; + wiphy_unregister(&rdev->wiphy); + return res; + } + return 0; } EXPORT_SYMBOL(wiphy_register); @@ -640,7 +644,8 @@ void wiphy_unregister(struct wiphy *wiphy) rtnl_unlock(); __count == 0; })); - rfkill_unregister(rdev->rfkill); + if (rdev->rfkill) + rfkill_unregister(rdev->rfkill); rtnl_lock(); rdev->wiphy.registered = false; From dfb6b7c109a7f98d324a759599d1b4616f02c79f Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Mon, 23 Sep 2013 04:08:13 +0200 Subject: [PATCH 06/59] Revert "rt2x00pci: Use PCI MSIs whenever possible" This reverts commit 9483f40d8d01918b399b4e24d0c1111db0afffeb. Some devices stop to connect with above commit, see: https://bugzilla.kernel.org/show_bug.cgi?id=61621 Since there is no clear benefit of having MSI enabled, just revert change to fix the problem. Cc: stable@vger.kernel.org # 3.11+ Signed-off-by: Stanislaw Gruszka Acked-by: Jakub Kicinski Acked-by: Gertjan van Wingerde Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2x00pci.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/net/wireless/rt2x00/rt2x00pci.c b/drivers/net/wireless/rt2x00/rt2x00pci.c index 76d95deb274b..dc49e525ae5e 100644 --- a/drivers/net/wireless/rt2x00/rt2x00pci.c +++ b/drivers/net/wireless/rt2x00/rt2x00pci.c @@ -105,13 +105,11 @@ int rt2x00pci_probe(struct pci_dev *pci_dev, const struct rt2x00_ops *ops) goto exit_release_regions; } - pci_enable_msi(pci_dev); - hw = ieee80211_alloc_hw(sizeof(struct rt2x00_dev), ops->hw); if (!hw) { rt2x00_probe_err("Failed to allocate hardware\n"); retval = -ENOMEM; - goto exit_disable_msi; + goto exit_release_regions; } pci_set_drvdata(pci_dev, hw); @@ -152,9 +150,6 @@ exit_free_reg: exit_free_device: ieee80211_free_hw(hw); -exit_disable_msi: - pci_disable_msi(pci_dev); - exit_release_regions: pci_release_regions(pci_dev); @@ -179,8 +174,6 @@ void rt2x00pci_remove(struct pci_dev *pci_dev) rt2x00pci_free_reg(rt2x00dev); ieee80211_free_hw(hw); - pci_disable_msi(pci_dev); - /* * Free the PCI device data. */ From 453b0c3f6910672f79da354077af728d92f95c5b Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Fri, 27 Sep 2013 10:55:38 -0700 Subject: [PATCH 07/59] mwifiex: fix SDIO interrupt lost issue 601216e "mwifiex: process RX packets in SDIO IRQ thread directly" introduced a command timeout issue which can be reproduced easily on an AM33xx platform using a test application written by Daniel Mack: https://gist.github.com/zonque/6579314 mwifiex_main_process() is called from both the SDIO handler and the workqueue. In case an interrupt occurs right after the int_status check, but before updating the mwifiex_processing flag, this interrupt gets lost, resulting in a command timeout and consequently a card reset. Let main_proc_lock protect both int_status and mwifiex_processing flag. This fixes the interrupt lost issue. Cc: # 3.7+ Reported-by: Sven Neumann Reported-by: Andreas Fenkart Tested-by: Daniel Mack Reviewed-by: Dylan Reid Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao Signed-off-by: Paul Stewart Signed-off-by: John W. Linville --- drivers/net/wireless/mwifiex/main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mwifiex/main.c b/drivers/net/wireless/mwifiex/main.c index fd778337deee..c2b91f566e05 100644 --- a/drivers/net/wireless/mwifiex/main.c +++ b/drivers/net/wireless/mwifiex/main.c @@ -358,10 +358,12 @@ process_start: } } while (true); - if ((adapter->int_status) || IS_CARD_RX_RCVD(adapter)) - goto process_start; - spin_lock_irqsave(&adapter->main_proc_lock, flags); + if ((adapter->int_status) || IS_CARD_RX_RCVD(adapter)) { + spin_unlock_irqrestore(&adapter->main_proc_lock, flags); + goto process_start; + } + adapter->mwifiex_processing = false; spin_unlock_irqrestore(&adapter->main_proc_lock, flags); From f69727fd78fa761dc49ee3091c432a8c6ab81292 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 29 Sep 2013 13:06:31 +0200 Subject: [PATCH 08/59] ath9k: fix powersave response handling for BA session packets When a packet is passed from mac80211 to the driver with the IEEE80211_TX_CTL_PS_RESPONSE flag set, it bypasses the normal driver internal queueing and goes directly to the UAPSD queue. When that happens, packets that are part of a BlockAck session still need to be tracked as such inside the driver, otherwise it will create discrepancies in the receiver BA reorder window, causing traffic stalls. This only happens in AP mode with powersave-enabled clients. This patch fixes the regression introduced in the commit "ath9k: use software queues for un-aggregated data packets" Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/xmit.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index 5ac713d2ff5d..dd30452df966 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -1969,15 +1969,18 @@ static void ath_tx_txqaddbuf(struct ath_softc *sc, struct ath_txq *txq, static void ath_tx_send_normal(struct ath_softc *sc, struct ath_txq *txq, struct ath_atx_tid *tid, struct sk_buff *skb) { + struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb); struct ath_frame_info *fi = get_frame_info(skb); struct list_head bf_head; - struct ath_buf *bf; - - bf = fi->bf; + struct ath_buf *bf = fi->bf; INIT_LIST_HEAD(&bf_head); list_add_tail(&bf->list, &bf_head); bf->bf_state.bf_type = 0; + if (tid && (tx_info->flags & IEEE80211_TX_CTL_AMPDU)) { + bf->bf_state.bf_type = BUF_AMPDU; + ath_tx_addto_baw(sc, tid, bf); + } bf->bf_next = NULL; bf->bf_lastbf = bf; From 6c519bad7b19a2c14a075b400edabaa630330123 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Fri, 27 Sep 2013 18:03:39 +0200 Subject: [PATCH 09/59] batman-adv: set up network coding packet handlers during module init batman-adv saves its table of packet handlers as a global state, so handlers must be set up only once (and setting them up a second time will fail). The recently-added network coding support tries to set up its handler each time a new softif is registered, which obviously fails when more that one softif is used (and in consequence, the softif creation fails). Fix this by splitting up batadv_nc_init into batadv_nc_init (which is called only once) and batadv_nc_mesh_init (which is called for each softif); in addition batadv_nc_free is renamed to batadv_nc_mesh_free to keep naming consistent. Signed-off-by: Matthias Schiffer Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/main.c | 5 +++-- net/batman-adv/network-coding.c | 28 ++++++++++++++++++---------- net/batman-adv/network-coding.h | 14 ++++++++++---- 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index c72d1bcdcf49..1356af660b5b 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -65,6 +65,7 @@ static int __init batadv_init(void) batadv_recv_handler_init(); batadv_iv_init(); + batadv_nc_init(); batadv_event_workqueue = create_singlethread_workqueue("bat_events"); @@ -142,7 +143,7 @@ int batadv_mesh_init(struct net_device *soft_iface) if (ret < 0) goto err; - ret = batadv_nc_init(bat_priv); + ret = batadv_nc_mesh_init(bat_priv); if (ret < 0) goto err; @@ -167,7 +168,7 @@ void batadv_mesh_free(struct net_device *soft_iface) batadv_vis_quit(bat_priv); batadv_gw_node_purge(bat_priv); - batadv_nc_free(bat_priv); + batadv_nc_mesh_free(bat_priv); batadv_dat_free(bat_priv); batadv_bla_free(bat_priv); diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index a487d46e0aec..4ecc0b6bf8ab 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -34,6 +34,20 @@ static void batadv_nc_worker(struct work_struct *work); static int batadv_nc_recv_coded_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); +/** + * batadv_nc_init - one-time initialization for network coding + */ +int __init batadv_nc_init(void) +{ + int ret; + + /* Register our packet type */ + ret = batadv_recv_handler_register(BATADV_CODED, + batadv_nc_recv_coded_packet); + + return ret; +} + /** * batadv_nc_start_timer - initialise the nc periodic worker * @bat_priv: the bat priv with all the soft interface information @@ -45,10 +59,10 @@ static void batadv_nc_start_timer(struct batadv_priv *bat_priv) } /** - * batadv_nc_init - initialise coding hash table and start house keeping + * batadv_nc_mesh_init - initialise coding hash table and start house keeping * @bat_priv: the bat priv with all the soft interface information */ -int batadv_nc_init(struct batadv_priv *bat_priv) +int batadv_nc_mesh_init(struct batadv_priv *bat_priv) { bat_priv->nc.timestamp_fwd_flush = jiffies; bat_priv->nc.timestamp_sniffed_purge = jiffies; @@ -70,11 +84,6 @@ int batadv_nc_init(struct batadv_priv *bat_priv) batadv_hash_set_lock_class(bat_priv->nc.coding_hash, &batadv_nc_decoding_hash_lock_class_key); - /* Register our packet type */ - if (batadv_recv_handler_register(BATADV_CODED, - batadv_nc_recv_coded_packet) < 0) - goto err; - INIT_DELAYED_WORK(&bat_priv->nc.work, batadv_nc_worker); batadv_nc_start_timer(bat_priv); @@ -1721,12 +1730,11 @@ free_nc_packet: } /** - * batadv_nc_free - clean up network coding memory + * batadv_nc_mesh_free - clean up network coding memory * @bat_priv: the bat priv with all the soft interface information */ -void batadv_nc_free(struct batadv_priv *bat_priv) +void batadv_nc_mesh_free(struct batadv_priv *bat_priv) { - batadv_recv_handler_unregister(BATADV_CODED); cancel_delayed_work_sync(&bat_priv->nc.work); batadv_nc_purge_paths(bat_priv, bat_priv->nc.coding_hash, NULL); diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h index 85a4ec81ad50..ddfa618e80bf 100644 --- a/net/batman-adv/network-coding.h +++ b/net/batman-adv/network-coding.h @@ -22,8 +22,9 @@ #ifdef CONFIG_BATMAN_ADV_NC -int batadv_nc_init(struct batadv_priv *bat_priv); -void batadv_nc_free(struct batadv_priv *bat_priv); +int batadv_nc_init(void); +int batadv_nc_mesh_init(struct batadv_priv *bat_priv); +void batadv_nc_mesh_free(struct batadv_priv *bat_priv); void batadv_nc_update_nc_node(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, struct batadv_orig_node *orig_neigh_node, @@ -46,12 +47,17 @@ int batadv_nc_init_debugfs(struct batadv_priv *bat_priv); #else /* ifdef CONFIG_BATMAN_ADV_NC */ -static inline int batadv_nc_init(struct batadv_priv *bat_priv) +static inline int batadv_nc_init(void) { return 0; } -static inline void batadv_nc_free(struct batadv_priv *bat_priv) +static inline int batadv_nc_mesh_init(struct batadv_priv *bat_priv) +{ + return 0; +} + +static inline void batadv_nc_mesh_free(struct batadv_priv *bat_priv) { return; } From e727ca82e0e9616ab4844301e6bae60ca7327682 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Mon, 30 Sep 2013 22:03:06 +0200 Subject: [PATCH 10/59] proc connector: fix info leaks Initialize event_data for all possible message types to prevent leaking kernel stack contents to userland (up to 20 bytes). Also set the flags member of the connector message to 0 to prevent leaking two more stack bytes this way. Cc: stable@vger.kernel.org # v2.6.15+ Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- drivers/connector/cn_proc.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index 08ae128cce9b..c73fc2b74de2 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -65,6 +65,7 @@ void proc_fork_connector(struct task_struct *task) msg = (struct cn_msg *)buffer; ev = (struct proc_event *)msg->data; + memset(&ev->event_data, 0, sizeof(ev->event_data)); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); @@ -80,6 +81,7 @@ void proc_fork_connector(struct task_struct *task) memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ msg->len = sizeof(*ev); + msg->flags = 0; /* not used */ /* If cn_netlink_send() failed, the data is not sent */ cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); } @@ -96,6 +98,7 @@ void proc_exec_connector(struct task_struct *task) msg = (struct cn_msg *)buffer; ev = (struct proc_event *)msg->data; + memset(&ev->event_data, 0, sizeof(ev->event_data)); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); @@ -106,6 +109,7 @@ void proc_exec_connector(struct task_struct *task) memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ msg->len = sizeof(*ev); + msg->flags = 0; /* not used */ cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); } @@ -122,6 +126,7 @@ void proc_id_connector(struct task_struct *task, int which_id) msg = (struct cn_msg *)buffer; ev = (struct proc_event *)msg->data; + memset(&ev->event_data, 0, sizeof(ev->event_data)); ev->what = which_id; ev->event_data.id.process_pid = task->pid; ev->event_data.id.process_tgid = task->tgid; @@ -145,6 +150,7 @@ void proc_id_connector(struct task_struct *task, int which_id) memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ msg->len = sizeof(*ev); + msg->flags = 0; /* not used */ cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); } @@ -160,6 +166,7 @@ void proc_sid_connector(struct task_struct *task) msg = (struct cn_msg *)buffer; ev = (struct proc_event *)msg->data; + memset(&ev->event_data, 0, sizeof(ev->event_data)); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); @@ -170,6 +177,7 @@ void proc_sid_connector(struct task_struct *task) memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ msg->len = sizeof(*ev); + msg->flags = 0; /* not used */ cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); } @@ -185,6 +193,7 @@ void proc_ptrace_connector(struct task_struct *task, int ptrace_id) msg = (struct cn_msg *)buffer; ev = (struct proc_event *)msg->data; + memset(&ev->event_data, 0, sizeof(ev->event_data)); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); @@ -203,6 +212,7 @@ void proc_ptrace_connector(struct task_struct *task, int ptrace_id) memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ msg->len = sizeof(*ev); + msg->flags = 0; /* not used */ cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); } @@ -218,6 +228,7 @@ void proc_comm_connector(struct task_struct *task) msg = (struct cn_msg *)buffer; ev = (struct proc_event *)msg->data; + memset(&ev->event_data, 0, sizeof(ev->event_data)); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); @@ -229,6 +240,7 @@ void proc_comm_connector(struct task_struct *task) memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ msg->len = sizeof(*ev); + msg->flags = 0; /* not used */ cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); } @@ -244,6 +256,7 @@ void proc_coredump_connector(struct task_struct *task) msg = (struct cn_msg *)buffer; ev = (struct proc_event *)msg->data; + memset(&ev->event_data, 0, sizeof(ev->event_data)); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); @@ -254,6 +267,7 @@ void proc_coredump_connector(struct task_struct *task) memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ msg->len = sizeof(*ev); + msg->flags = 0; /* not used */ cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); } @@ -269,6 +283,7 @@ void proc_exit_connector(struct task_struct *task) msg = (struct cn_msg *)buffer; ev = (struct proc_event *)msg->data; + memset(&ev->event_data, 0, sizeof(ev->event_data)); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); @@ -281,6 +296,7 @@ void proc_exit_connector(struct task_struct *task) memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ msg->len = sizeof(*ev); + msg->flags = 0; /* not used */ cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); } @@ -304,6 +320,7 @@ static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack) msg = (struct cn_msg *)buffer; ev = (struct proc_event *)msg->data; + memset(&ev->event_data, 0, sizeof(ev->event_data)); msg->seq = rcvd_seq; ktime_get_ts(&ts); /* get high res monotonic timestamp */ put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); @@ -313,6 +330,7 @@ static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack) memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = rcvd_ack + 1; msg->len = sizeof(*ev); + msg->flags = 0; /* not used */ cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); } From 162b2bedc084d2d908a04c93383ba02348b648b0 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Mon, 30 Sep 2013 22:03:07 +0200 Subject: [PATCH 11/59] connector: use nlmsg_len() to check message length The current code tests the length of the whole netlink message to be at least as long to fit a cn_msg. This is wrong as nlmsg_len includes the length of the netlink message header. Use nlmsg_len() instead to fix this "off-by-NLMSG_HDRLEN" size check. Cc: stable@vger.kernel.org # v2.6.14+ Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- drivers/connector/connector.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c index 6ecfa758942c..0daa11e418b1 100644 --- a/drivers/connector/connector.c +++ b/drivers/connector/connector.c @@ -157,17 +157,18 @@ static int cn_call_callback(struct sk_buff *skb) static void cn_rx_skb(struct sk_buff *__skb) { struct nlmsghdr *nlh; - int err; struct sk_buff *skb; + int len, err; skb = skb_get(__skb); if (skb->len >= NLMSG_HDRLEN) { nlh = nlmsg_hdr(skb); + len = nlmsg_len(nlh); - if (nlh->nlmsg_len < sizeof(struct cn_msg) || + if (len < (int)sizeof(struct cn_msg) || skb->len < nlh->nlmsg_len || - nlh->nlmsg_len > CONNECTOR_MAX_MSG_SIZE) { + len > CONNECTOR_MAX_MSG_SIZE) { kfree_skb(skb); return; } From ac73bf50b709dea4e39635151b861b8a8f52bfbb Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Mon, 30 Sep 2013 22:03:08 +0200 Subject: [PATCH 12/59] connector: use 'size' everywhere in cn_netlink_send() We calculated the size for the netlink message buffer as size. Use size in the memcpy() call as well instead of recalculating it. Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- drivers/connector/connector.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c index 0daa11e418b1..a36749f1e44a 100644 --- a/drivers/connector/connector.c +++ b/drivers/connector/connector.c @@ -109,7 +109,7 @@ int cn_netlink_send(struct cn_msg *msg, u32 __group, gfp_t gfp_mask) data = nlmsg_data(nlh); - memcpy(data, msg, sizeof(*data) + msg->len); + memcpy(data, msg, size); NETLINK_CB(skb).dst_group = group; From 05742faf2512b94907f49b21581ed4667b9bd702 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Mon, 30 Sep 2013 22:03:09 +0200 Subject: [PATCH 13/59] connector - documentation: simplify netlink message length assignment Use the precalculated size instead of obfuscating the message length calculation by first subtracting the netlink header length from size and then use the NLMSG_LENGTH() macro to add it back again. Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- Documentation/connector/ucon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/connector/ucon.c b/Documentation/connector/ucon.c index 4848db8c71ff..8a4da64e02a8 100644 --- a/Documentation/connector/ucon.c +++ b/Documentation/connector/ucon.c @@ -71,7 +71,7 @@ static int netlink_send(int s, struct cn_msg *msg) nlh->nlmsg_seq = seq++; nlh->nlmsg_pid = getpid(); nlh->nlmsg_type = NLMSG_DONE; - nlh->nlmsg_len = NLMSG_LENGTH(size - sizeof(*nlh)); + nlh->nlmsg_len = size; nlh->nlmsg_flags = 0; m = NLMSG_DATA(nlh); From 6865d1e834be84ddd5808d93d5035b492346c64a Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Mon, 30 Sep 2013 22:05:40 +0200 Subject: [PATCH 14/59] unix_diag: fix info leak When filling the netlink message we miss to wipe the pad field, therefore leak one byte of heap memory to userland. Fix this by setting pad to 0. Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- net/unix/diag.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/unix/diag.c b/net/unix/diag.c index d591091603bf..86fa0f3b2caf 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -124,6 +124,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r rep->udiag_family = AF_UNIX; rep->udiag_type = sk->sk_type; rep->udiag_state = sk->sk_state; + rep->pad = 0; rep->udiag_ino = sk_ino; sock_diag_save_cookie(sk, rep->udiag_cookie); From 8c1c58ec70ce45cd5c7866811e90df131e7a3005 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 30 Sep 2013 15:12:15 -0500 Subject: [PATCH 15/59] net: calxedaxgmac: fix clearing of old filter addresses In commit 2ee68f621af280 (net: calxedaxgmac: fix various errors in xgmac_set_rx_mode), a fix to clean-up old address entries was added. However, the loop to zero out the entries failed to increment the register address resulting in only 1 entry getting cleared. Fix this to correctly use the loop index. Also, the end of the loop condition was off by 1 and should have been <= rather than <. Signed-off-by: Rob Herring Signed-off-by: David S. Miller --- drivers/net/ethernet/calxeda/xgmac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index 78d6d6b970e1..94358d234e73 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -1342,8 +1342,8 @@ static void xgmac_set_rx_mode(struct net_device *dev) } out: - for (i = reg; i < XGMAC_MAX_FILTER_ADDR; i++) - xgmac_set_mac_addr(ioaddr, NULL, reg); + for (i = reg; i <= XGMAC_MAX_FILTER_ADDR; i++) + xgmac_set_mac_addr(ioaddr, NULL, i); for (i = 0; i < XGMAC_NUM_HASH; i++) writel(hash_filter[i], ioaddr + XGMAC_HASH(i)); From 8a8c3f5bebe2455200cd488aea8546d01645b06b Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 30 Sep 2013 15:12:16 -0500 Subject: [PATCH 16/59] net: calxedaxgmac: add uc and mc filter addresses in promiscuous mode Even in promiscuous mode, we need to add filter addresses for correct operation. This fixes silent failures when using a bridge and adding addresses using the "bridge fdb add" command. Signed-off-by: Rob Herring Signed-off-by: David S. Miller --- drivers/net/ethernet/calxeda/xgmac.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index 94358d234e73..35da09b80c7c 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -1291,10 +1291,8 @@ static void xgmac_set_rx_mode(struct net_device *dev) netdev_dbg(priv->dev, "# mcasts %d, # unicast %d\n", netdev_mc_count(dev), netdev_uc_count(dev)); - if (dev->flags & IFF_PROMISC) { - writel(XGMAC_FRAME_FILTER_PR, ioaddr + XGMAC_FRAME_FILTER); - return; - } + if (dev->flags & IFF_PROMISC) + value |= XGMAC_FRAME_FILTER_PR; memset(hash_filter, 0, sizeof(hash_filter)); From 0cf2f380073867e5d0cfb526fdf9bee0e7f8a9c6 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 30 Sep 2013 15:12:17 -0500 Subject: [PATCH 17/59] net: calxedaxgmac: determine number of address filters at runtime Highbank and Midway xgmac h/w have different number of MAC address filter registers with 7 and 31, respectively. Highbank has been wrong, so fix it and detect the number of filter registers at run-time. Unfortunately, the version register is the same on both SOCs, so simply test if write to the last filter register will take a value. It always reads as 0 if not. Signed-off-by: Rob Herring Signed-off-by: David S. Miller --- drivers/net/ethernet/calxeda/xgmac.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index 35da09b80c7c..48f52882a22b 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -106,7 +106,6 @@ #define XGMAC_DMA_HW_FEATURE 0x00000f58 /* Enabled Hardware Features */ #define XGMAC_ADDR_AE 0x80000000 -#define XGMAC_MAX_FILTER_ADDR 31 /* PMT Control and Status */ #define XGMAC_PMT_POINTER_RESET 0x80000000 @@ -384,6 +383,7 @@ struct xgmac_priv { struct device *device; struct napi_struct napi; + int max_macs; struct xgmac_extra_stats xstats; spinlock_t stats_lock; @@ -1296,7 +1296,7 @@ static void xgmac_set_rx_mode(struct net_device *dev) memset(hash_filter, 0, sizeof(hash_filter)); - if (netdev_uc_count(dev) > XGMAC_MAX_FILTER_ADDR) { + if (netdev_uc_count(dev) > priv->max_macs) { use_hash = true; value |= XGMAC_FRAME_FILTER_HUC | XGMAC_FRAME_FILTER_HPF; } @@ -1319,7 +1319,7 @@ static void xgmac_set_rx_mode(struct net_device *dev) goto out; } - if ((netdev_mc_count(dev) + reg - 1) > XGMAC_MAX_FILTER_ADDR) { + if ((netdev_mc_count(dev) + reg - 1) > priv->max_macs) { use_hash = true; value |= XGMAC_FRAME_FILTER_HMC | XGMAC_FRAME_FILTER_HPF; } else { @@ -1340,7 +1340,7 @@ static void xgmac_set_rx_mode(struct net_device *dev) } out: - for (i = reg; i <= XGMAC_MAX_FILTER_ADDR; i++) + for (i = reg; i <= priv->max_macs; i++) xgmac_set_mac_addr(ioaddr, NULL, i); for (i = 0; i < XGMAC_NUM_HASH; i++) writel(hash_filter[i], ioaddr + XGMAC_HASH(i)); @@ -1759,6 +1759,13 @@ static int xgmac_probe(struct platform_device *pdev) uid = readl(priv->base + XGMAC_VERSION); netdev_info(ndev, "h/w version is 0x%x\n", uid); + /* Figure out how many valid mac address filter registers we have */ + writel(1, priv->base + XGMAC_ADDR_HIGH(31)); + if (readl(priv->base + XGMAC_ADDR_HIGH(31)) == 1) + priv->max_macs = 31; + else + priv->max_macs = 7; + writel(0, priv->base + XGMAC_DMA_INTR_ENA); ndev->irq = platform_get_irq(pdev, 0); if (ndev->irq == -ENXIO) { From 58e4e1f6cacddb7823c44bcfb272174553f6c645 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 30 Sep 2013 13:29:08 -0700 Subject: [PATCH 18/59] igb: Avoid uninitialized advertised variable in eee_set_cur eee_get_cur assumes that the output data is already zeroed. It can read-modify-write the advertised field: if (ipcnfg & E1000_IPCNFG_EEE_100M_AN) 2594 edata->advertised |= ADVERTISED_100baseT_Full; This is ok for the normal ethtool eee_get call, which always zeroes the input data before. But eee_set_cur also calls eee_get_cur and it did not zero the input field. Later on it then compares agsinst the field, which can contain partial stack garbage. Zero the input field in eee_set_cur() too. Cc: jeffrey.t.kirsher@intel.com Cc: netdev@vger.kernel.org Signed-off-by: Andi Kleen Acked-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/igb/igb_ethtool.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 86d51429a189..151e00cad113 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2655,6 +2655,8 @@ static int igb_set_eee(struct net_device *netdev, (hw->phy.media_type != e1000_media_type_copper)) return -EOPNOTSUPP; + memset(&eee_curr, 0, sizeof(struct ethtool_eee)); + ret_val = igb_get_eee(netdev, &eee_curr); if (ret_val) return ret_val; From 5843ef421311c34f06d5ab82bced5aebfe185b2c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 30 Sep 2013 13:29:11 -0700 Subject: [PATCH 19/59] tcp: Always set options to 0 before calling tcp_established_options tcp_established_options assumes opts->options is 0 before calling, as it read modify writes it. For the tcp_current_mss() case the opts structure is not zeroed, so this can be done with uninitialized values. This is ok, because ->options is not read in this path. But it's still better to avoid the operation on the uninitialized field. This shuts up a static code analyzer, and presumably may help the optimizer. Cc: netdev@vger.kernel.org Signed-off-by: Andi Kleen Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e6bb8256e59f..c6f01f2cdb32 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -637,6 +637,8 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb unsigned int size = 0; unsigned int eff_sacks; + opts->options = 0; + #ifdef CONFIG_TCP_MD5SIG *md5 = tp->af_specific->md5_lookup(sk, sk); if (unlikely(*md5)) { From 5bc3db5c9ca8407f52918b6504d3b27230defedc Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 30 Sep 2013 21:30:22 -0700 Subject: [PATCH 20/59] tc: export tc_defact.h to userspace Jamal sent patch to add tc user simple actions to iproute2 but required header was not being exported. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/uapi/linux/tc_act/Kbuild | 1 + include/{ => uapi}/linux/tc_act/tc_defact.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) rename include/{ => uapi}/linux/tc_act/tc_defact.h (75%) diff --git a/include/uapi/linux/tc_act/Kbuild b/include/uapi/linux/tc_act/Kbuild index 0623ec4e728f..56f121605c99 100644 --- a/include/uapi/linux/tc_act/Kbuild +++ b/include/uapi/linux/tc_act/Kbuild @@ -1,5 +1,6 @@ # UAPI Header export list header-y += tc_csum.h +header-y += tc_defact.h header-y += tc_gact.h header-y += tc_ipt.h header-y += tc_mirred.h diff --git a/include/linux/tc_act/tc_defact.h b/include/uapi/linux/tc_act/tc_defact.h similarity index 75% rename from include/linux/tc_act/tc_defact.h rename to include/uapi/linux/tc_act/tc_defact.h index 6f65d07c7ce2..17dddb40f740 100644 --- a/include/linux/tc_act/tc_defact.h +++ b/include/uapi/linux/tc_act/tc_defact.h @@ -6,7 +6,7 @@ struct tc_defact { tc_gen; }; - + enum { TCA_DEF_UNSPEC, TCA_DEF_TM, From 28ad7b06f4a7670d5794d3751f5c68a7e5e437a8 Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Tue, 1 Oct 2013 21:23:25 -0400 Subject: [PATCH 21/59] bonding: update MAINTAINERS Veaceslav has been doing a significant amount of work on bonding lately and reached out to me about being a maintainer. After discussing this with him, I think he would be a good fit as a bonding maintainer. Signed-off-by: Andy Gospodarek Acked-by: Veaceslav Falico Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 744a23954a34..43dfc821c584 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1785,6 +1785,7 @@ F: include/net/bluetooth/ BONDING DRIVER M: Jay Vosburgh +M: Veaceslav Falico M: Andy Gospodarek L: netdev@vger.kernel.org W: http://sourceforge.net/projects/bonding/ From 80ad1d61e72d626e30ebe8529a0455e660ca4693 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 1 Oct 2013 21:04:11 -0700 Subject: [PATCH 22/59] net: do not call sock_put() on TIMEWAIT sockets commit 3ab5aee7fe84 ("net: Convert TCP & DCCP hash tables to use RCU / hlist_nulls") incorrectly used sock_put() on TIMEWAIT sockets. We should instead use inet_twsk_put() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_hashtables.c | 2 +- net/ipv6/inet6_hashtables.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 7bd8983dbfcf..96da9c77deca 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -287,7 +287,7 @@ begintw: if (unlikely(!INET_TW_MATCH(sk, net, acookie, saddr, daddr, ports, dif))) { - sock_put(sk); + inet_twsk_put(inet_twsk(sk)); goto begintw; } goto out; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 32b4a1675d82..066640e0ba8e 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -116,7 +116,7 @@ begintw: } if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr, ports, dif))) { - sock_put(sk); + inet_twsk_put(inet_twsk(sk)); goto begintw; } goto out; From e18503f41f9b12132c95d7c31ca6ee5155e44e5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Cachereul?= Date: Wed, 2 Oct 2013 10:16:02 +0200 Subject: [PATCH 23/59] l2tp: fix kernel panic when using IPv4-mapped IPv6 addresses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit IPv4 mapped addresses cause kernel panic. The patch juste check whether the IPv6 address is an IPv4 mapped address. If so, use IPv4 API instead of IPv6. [ 940.026915] general protection fault: 0000 [#1] [ 940.026915] Modules linked in: l2tp_ppp l2tp_netlink l2tp_core pppox ppp_generic slhc loop psmouse [ 940.026915] CPU: 0 PID: 3184 Comm: memcheck-amd64- Not tainted 3.11.0+ #1 [ 940.026915] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 [ 940.026915] task: ffff880007130e20 ti: ffff88000737e000 task.ti: ffff88000737e000 [ 940.026915] RIP: 0010:[] [] ip6_xmit+0x276/0x326 [ 940.026915] RSP: 0018:ffff88000737fd28 EFLAGS: 00010286 [ 940.026915] RAX: c748521a75ceff48 RBX: ffff880000c30800 RCX: 0000000000000000 [ 940.026915] RDX: ffff88000075cc4e RSI: 0000000000000028 RDI: ffff8800060e5a40 [ 940.026915] RBP: ffff8800060e5a40 R08: 0000000000000000 R09: ffff88000075cc90 [ 940.026915] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88000737fda0 [ 940.026915] R13: 0000000000000000 R14: 0000000000002000 R15: ffff880005d3b580 [ 940.026915] FS: 00007f163dc5e800(0000) GS:ffffffff81623000(0000) knlGS:0000000000000000 [ 940.026915] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 940.026915] CR2: 00000004032dc940 CR3: 0000000005c25000 CR4: 00000000000006f0 [ 940.026915] Stack: [ 940.026915] ffff88000075cc4e ffffffff81694e90 ffff880000c30b38 0000000000000020 [ 940.026915] 11000000523c4bac ffff88000737fdb4 0000000000000000 ffff880000c30800 [ 940.026915] ffff880005d3b580 ffff880000c30b38 ffff8800060e5a40 0000000000000020 [ 940.026915] Call Trace: [ 940.026915] [] ? inet6_csk_xmit+0xa4/0xc4 [ 940.026915] [] ? l2tp_xmit_skb+0x503/0x55a [l2tp_core] [ 940.026915] [] ? pskb_expand_head+0x161/0x214 [ 940.026915] [] ? pppol2tp_xmit+0xf2/0x143 [l2tp_ppp] [ 940.026915] [] ? ppp_channel_push+0x36/0x8b [ppp_generic] [ 940.026915] [] ? ppp_write+0xaf/0xc5 [ppp_generic] [ 940.026915] [] ? vfs_write+0xa2/0x106 [ 940.026915] [] ? SyS_write+0x56/0x8a [ 940.026915] [] ? system_call_fastpath+0x16/0x1b [ 940.026915] Code: 00 49 8b 8f d8 00 00 00 66 83 7c 11 02 00 74 60 49 8b 47 58 48 83 e0 fe 48 8b 80 18 01 00 00 48 85 c0 74 13 48 8b 80 78 02 00 00 <48> ff 40 28 41 8b 57 68 48 01 50 30 48 8b 54 24 08 49 c7 c1 51 [ 940.026915] RIP [] ip6_xmit+0x276/0x326 [ 940.026915] RSP [ 940.057945] ---[ end trace be8aba9a61c8b7f3 ]--- [ 940.058583] Kernel panic - not syncing: Fatal exception in interrupt Signed-off-by: François CACHEREUL Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 27 +++++++++++++++++++++++---- net/l2tp/l2tp_core.h | 3 +++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index feae495a0a30..aedaa2cd4237 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -496,6 +496,7 @@ out: static inline int l2tp_verify_udp_checksum(struct sock *sk, struct sk_buff *skb) { + struct l2tp_tunnel *tunnel = (struct l2tp_tunnel *)sk->sk_user_data; struct udphdr *uh = udp_hdr(skb); u16 ulen = ntohs(uh->len); __wsum psum; @@ -504,7 +505,7 @@ static inline int l2tp_verify_udp_checksum(struct sock *sk, return 0; #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET6) { + if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) { if (!uh->check) { LIMIT_NETDEBUG(KERN_INFO "L2TP: IPv6: checksum is 0\n"); return 1; @@ -1128,7 +1129,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, /* Queue the packet to IP for output */ skb->local_df = 1; #if IS_ENABLED(CONFIG_IPV6) - if (skb->sk->sk_family == PF_INET6) + if (skb->sk->sk_family == PF_INET6 && !tunnel->v4mapped) error = inet6_csk_xmit(skb, NULL); else #endif @@ -1255,7 +1256,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len /* Calculate UDP checksum if configured to do so */ #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET6) + if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) l2tp_xmit_ipv6_csum(sk, skb, udp_len); else #endif @@ -1704,6 +1705,24 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 if (cfg != NULL) tunnel->debug = cfg->debug; +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == PF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + + if (ipv6_addr_v4mapped(&np->saddr) && + ipv6_addr_v4mapped(&np->daddr)) { + struct inet_sock *inet = inet_sk(sk); + + tunnel->v4mapped = true; + inet->inet_saddr = np->saddr.s6_addr32[3]; + inet->inet_rcv_saddr = np->rcv_saddr.s6_addr32[3]; + inet->inet_daddr = np->daddr.s6_addr32[3]; + } else { + tunnel->v4mapped = false; + } + } +#endif + /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ tunnel->encap = encap; if (encap == L2TP_ENCAPTYPE_UDP) { @@ -1712,7 +1731,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv; udp_sk(sk)->encap_destroy = l2tp_udp_encap_destroy; #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET6) + if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) udpv6_encap_enable(); else #endif diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 66a559b104b6..6f251cbc2ed7 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -194,6 +194,9 @@ struct l2tp_tunnel { struct sock *sock; /* Parent socket */ int fd; /* Parent fd, if tunnel socket * was created by userspace */ +#if IS_ENABLED(CONFIG_IPV6) + bool v4mapped; +#endif struct work_struct del_work; From 041b4ddb84989f06ff1df0ca869b950f1ee3cb1c Mon Sep 17 00:00:00 2001 From: Sebastian Hesselbarth Date: Wed, 2 Oct 2013 12:57:20 +0200 Subject: [PATCH 24/59] net: mv643xx_eth: update statistics timer from timer context only Each port driver installs a periodic timer to update port statistics by calling mib_counters_update. As mib_counters_update is also called from non-timer context, we should not reschedule the timer there but rather move it to timer-only context. Signed-off-by: Sebastian Hesselbarth Acked-by: Jason Cooper Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mv643xx_eth.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 7fb5677451f9..44a87e4469e0 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -1131,15 +1131,13 @@ static void mib_counters_update(struct mv643xx_eth_private *mp) p->rx_discard += rdlp(mp, RX_DISCARD_FRAME_CNT); p->rx_overrun += rdlp(mp, RX_OVERRUN_FRAME_CNT); spin_unlock_bh(&mp->mib_counters_lock); - - mod_timer(&mp->mib_counters_timer, jiffies + 30 * HZ); } static void mib_counters_timer_wrapper(unsigned long _mp) { struct mv643xx_eth_private *mp = (void *)_mp; - mib_counters_update(mp); + mod_timer(&mp->mib_counters_timer, jiffies + 30 * HZ); } From f564412c935111c583b787bcc18157377b208e2e Mon Sep 17 00:00:00 2001 From: Sebastian Hesselbarth Date: Wed, 2 Oct 2013 12:57:21 +0200 Subject: [PATCH 25/59] net: mv643xx_eth: fix orphaned statistics timer crash The periodic statistics timer gets started at port _probe() time, but is stopped on _stop() only. In a modular environment, this can cause the timer to access already deallocated memory, if the module is unloaded without starting the eth device. To fix this, we add the timer right before the port is started, instead of at _probe() time. Signed-off-by: Sebastian Hesselbarth Acked-by: Jason Cooper Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mv643xx_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 44a87e4469e0..2364707d6a13 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2235,6 +2235,7 @@ static int mv643xx_eth_open(struct net_device *dev) mp->int_mask |= INT_TX_END_0 << i; } + add_timer(&mp->mib_counters_timer); port_start(mp); wrlp(mp, INT_MASK_EXT, INT_EXT_LINK_PHY | INT_EXT_TX); @@ -2914,7 +2915,6 @@ static int mv643xx_eth_probe(struct platform_device *pdev) mp->mib_counters_timer.data = (unsigned long)mp; mp->mib_counters_timer.function = mib_counters_timer_wrapper; mp->mib_counters_timer.expires = jiffies + 30 * HZ; - add_timer(&mp->mib_counters_timer); spin_lock_init(&mp->mib_counters_lock); From b5d82db83c1d0b327b599508a0830d25cc1f15db Mon Sep 17 00:00:00 2001 From: Sebastian Hesselbarth Date: Wed, 2 Oct 2013 12:57:22 +0200 Subject: [PATCH 26/59] net: mv643xx_eth: fix missing device_node for port devices DT-based mv643xx_eth probes and creates platform_devices for the port devices on its own. To allow fixups for ports based on the device_node, we need to set .of_node of the corresponding device with the correct node. Signed-off-by: Sebastian Hesselbarth Acked-by: Jason Cooper Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mv643xx_eth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 2364707d6a13..2c210ec35d59 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2533,6 +2533,7 @@ static int mv643xx_eth_shared_of_add_port(struct platform_device *pdev, if (!ppdev) return -ENOMEM; ppdev->dev.coherent_dma_mask = DMA_BIT_MASK(32); + ppdev->dev.of_node = pnp; ret = platform_device_add_resources(ppdev, &res, 1); if (ret) From 1661bf364ae9c506bc8795fef70d1532931be1e8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 3 Oct 2013 00:27:20 +0300 Subject: [PATCH 27/59] net: heap overflow in __audit_sockaddr() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to cap ->msg_namelen or it leads to a buffer overflow when we to the memcpy() in __audit_sockaddr(). It requires CAP_AUDIT_CONTROL to exploit this bug. The call tree is: ___sys_recvmsg() move_addr_to_user() audit_sockaddr() __audit_sockaddr() Reported-by: Jüri Aedla Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/compat.c | 2 ++ net/socket.c | 24 ++++++++++++++++++++---- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/net/compat.c b/net/compat.c index f0a1ba6c8086..89032580bd1d 100644 --- a/net/compat.c +++ b/net/compat.c @@ -71,6 +71,8 @@ int get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg) __get_user(kmsg->msg_controllen, &umsg->msg_controllen) || __get_user(kmsg->msg_flags, &umsg->msg_flags)) return -EFAULT; + if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) + return -EINVAL; kmsg->msg_name = compat_ptr(tmp1); kmsg->msg_iov = compat_ptr(tmp2); kmsg->msg_control = compat_ptr(tmp3); diff --git a/net/socket.c b/net/socket.c index ebed4b68f768..c226aceee65b 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1964,6 +1964,16 @@ struct used_address { unsigned int name_len; }; +static int copy_msghdr_from_user(struct msghdr *kmsg, + struct msghdr __user *umsg) +{ + if (copy_from_user(kmsg, umsg, sizeof(struct msghdr))) + return -EFAULT; + if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) + return -EINVAL; + return 0; +} + static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg, struct msghdr *msg_sys, unsigned int flags, struct used_address *used_address) @@ -1982,8 +1992,11 @@ static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg, if (MSG_CMSG_COMPAT & flags) { if (get_compat_msghdr(msg_sys, msg_compat)) return -EFAULT; - } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) - return -EFAULT; + } else { + err = copy_msghdr_from_user(msg_sys, msg); + if (err) + return err; + } if (msg_sys->msg_iovlen > UIO_FASTIOV) { err = -EMSGSIZE; @@ -2191,8 +2204,11 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg, if (MSG_CMSG_COMPAT & flags) { if (get_compat_msghdr(msg_sys, msg_compat)) return -EFAULT; - } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) - return -EFAULT; + } else { + err = copy_msghdr_from_user(msg_sys, msg); + if (err) + return err; + } if (msg_sys->msg_iovlen > UIO_FASTIOV) { err = -EMSGSIZE; From 5e8a402f831dbe7ee831340a91439e46f0d38acd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 4 Oct 2013 10:31:41 -0700 Subject: [PATCH 28/59] tcp: do not forget FIN in tcp_shifted_skb() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Yuchung found following problem : There are bugs in the SACK processing code, merging part in tcp_shift_skb_data(), that incorrectly resets or ignores the sacked skbs FIN flag. When a receiver first SACK the FIN sequence, and later throw away ofo queue (e.g., sack-reneging), the sender will stop retransmitting the FIN flag, and hangs forever. Following packetdrill test can be used to reproduce the bug. $ cat sack-merge-bug.pkt `sysctl -q net.ipv4.tcp_fack=0` // Establish a connection and send 10 MSS. 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +.000 bind(3, ..., ...) = 0 +.000 listen(3, 1) = 0 +.050 < S 0:0(0) win 32792 +.000 > S. 0:0(0) ack 1 +.001 < . 1:1(0) ack 1 win 1024 +.000 accept(3, ..., ...) = 4 +.100 write(4, ..., 12000) = 12000 +.000 shutdown(4, SHUT_WR) = 0 +.000 > . 1:10001(10000) ack 1 +.050 < . 1:1(0) ack 2001 win 257 +.000 > FP. 10001:12001(2000) ack 1 +.050 < . 1:1(0) ack 2001 win 257 +.050 < . 1:1(0) ack 2001 win 257 // SACK reneg +.050 < . 1:1(0) ack 12001 win 257 +0 %{ print "unacked: ",tcpi_unacked }% +5 %{ print "" }% First, a typo inverted left/right of one OR operation, then code forgot to advance end_seq if the merged skb carried FIN. Bug was added in 2.6.29 by commit 832d11c5cd076ab ("tcp: Try to restore large SKBs while SACK processing") Signed-off-by: Eric Dumazet Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Cc: Ilpo Järvinen Acked-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 25a89eaa669d..113dc5f17d47 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1284,7 +1284,10 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, tp->lost_cnt_hint -= tcp_skb_pcount(prev); } - TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags; + TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) + TCP_SKB_CB(prev)->end_seq++; + if (skb == tcp_highest_sack(sk)) tcp_advance_highest_sack(sk, skb); From d546a89362dcc35bc1aacf0e8e3b0da5a7d2dc89 Mon Sep 17 00:00:00 2001 From: Jon Cooper Date: Fri, 27 Sep 2013 18:26:30 +0100 Subject: [PATCH 29/59] sfc: Add rmb() between reading stats and generation count to ensure consistency Signed-off-by: Ben Hutchings --- drivers/net/ethernet/sfc/ef10.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 9f18ae984f9e..a4fbb383ac63 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -545,6 +545,7 @@ static int efx_ef10_try_update_nic_stats(struct efx_nic *efx) rmb(); efx_nic_update_stats(efx_ef10_stat_desc, EF10_STAT_COUNT, stats_mask, stats, efx->stats_buffer.addr, false); + rmb(); generation_start = dma_stats[MC_CMD_MAC_GENERATION_START]; if (generation_end != generation_start) return -EAGAIN; From 87648cc925222f16c3bd119af8ae8e8fb2f3fe1d Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Wed, 25 Sep 2013 17:34:12 +0100 Subject: [PATCH 30/59] sfc: Fix internal indices of ethtool stats for EF10 The indices in nic_data->stats need to match the EF10_STAT_whatever enum values. In efx_nic_update_stats, only mask; gaps are removed in efx_ef10_update_stats. Signed-off-by: Ben Hutchings --- drivers/net/ethernet/sfc/nic.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c index e7dbd2dd202e..9826594c8a48 100644 --- a/drivers/net/ethernet/sfc/nic.c +++ b/drivers/net/ethernet/sfc/nic.c @@ -469,8 +469,7 @@ size_t efx_nic_describe_stats(const struct efx_hw_stat_desc *desc, size_t count, * @count: Length of the @desc array * @mask: Bitmask of which elements of @desc are enabled * @stats: Buffer to update with the converted statistics. The length - * of this array must be at least the number of set bits in the - * first @count bits of @mask. + * of this array must be at least @count. * @dma_buf: DMA buffer containing hardware statistics * @accumulate: If set, the converted values will be added rather than * directly stored to the corresponding elements of @stats @@ -503,11 +502,9 @@ void efx_nic_update_stats(const struct efx_hw_stat_desc *desc, size_t count, } if (accumulate) - *stats += val; + stats[index] += val; else - *stats = val; + stats[index] = val; } - - ++stats; } } From 4bae913bd372aab90fdbd91e7e5fb841a2a0dc15 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 27 Sep 2013 18:52:49 +0100 Subject: [PATCH 31/59] sfc: Refactor EF10 stat mask code to allow for more conditional stats Previously, efx_ef10_stat_mask returned a static const unsigned long[], which meant that each possible mask had to be declared statically with STAT_MASK_BITMAP. Since adding a condition would double the size of the decision tree, we now create the bitmask dynamically. To do this, we have two functions efx_ef10_raw_stat_mask, which returns a u64, and efx_ef10_get_stat_mask, which fills in an unsigned long * argument. Signed-off-by: Ben Hutchings --- drivers/net/ethernet/sfc/ef10.c | 49 +++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index a4fbb383ac63..c00a5d69c94a 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -498,44 +498,49 @@ static const struct efx_hw_stat_desc efx_ef10_stat_desc[EF10_STAT_COUNT] = { #define HUNT_40G_EXTRA_STAT_MASK ((1ULL << EF10_STAT_rx_align_error) | \ (1ULL << EF10_STAT_rx_length_error)) -#if BITS_PER_LONG == 64 -#define STAT_MASK_BITMAP(bits) (bits) -#else -#define STAT_MASK_BITMAP(bits) (bits) & 0xffffffff, (bits) >> 32 -#endif - -static const unsigned long *efx_ef10_stat_mask(struct efx_nic *efx) +static u64 efx_ef10_raw_stat_mask(struct efx_nic *efx) { - static const unsigned long hunt_40g_stat_mask[] = { - STAT_MASK_BITMAP(HUNT_COMMON_STAT_MASK | - HUNT_40G_EXTRA_STAT_MASK) - }; - static const unsigned long hunt_10g_only_stat_mask[] = { - STAT_MASK_BITMAP(HUNT_COMMON_STAT_MASK | - HUNT_10G_ONLY_STAT_MASK) - }; + u64 raw_mask = HUNT_COMMON_STAT_MASK; u32 port_caps = efx_mcdi_phy_get_caps(efx); if (port_caps & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) - return hunt_40g_stat_mask; + raw_mask |= HUNT_40G_EXTRA_STAT_MASK; else - return hunt_10g_only_stat_mask; + raw_mask |= HUNT_10G_ONLY_STAT_MASK; + return raw_mask; +} + +static void efx_ef10_get_stat_mask(struct efx_nic *efx, unsigned long *mask) +{ + u64 raw_mask = efx_ef10_raw_stat_mask(efx); + +#if BITS_PER_LONG == 64 + mask[0] = raw_mask; +#else + mask[0] = raw_mask & 0xffffffff; + mask[1] = raw_mask >> 32; +#endif } static size_t efx_ef10_describe_stats(struct efx_nic *efx, u8 *names) { + DECLARE_BITMAP(mask, EF10_STAT_COUNT); + + efx_ef10_get_stat_mask(efx, mask); return efx_nic_describe_stats(efx_ef10_stat_desc, EF10_STAT_COUNT, - efx_ef10_stat_mask(efx), names); + mask, names); } static int efx_ef10_try_update_nic_stats(struct efx_nic *efx) { struct efx_ef10_nic_data *nic_data = efx->nic_data; - const unsigned long *stats_mask = efx_ef10_stat_mask(efx); + DECLARE_BITMAP(mask, EF10_STAT_COUNT); __le64 generation_start, generation_end; u64 *stats = nic_data->stats; __le64 *dma_stats; + efx_ef10_get_stat_mask(efx, mask); + dma_stats = efx->stats_buffer.addr; nic_data = efx->nic_data; @@ -543,7 +548,7 @@ static int efx_ef10_try_update_nic_stats(struct efx_nic *efx) if (generation_end == EFX_MC_STATS_GENERATION_INVALID) return 0; rmb(); - efx_nic_update_stats(efx_ef10_stat_desc, EF10_STAT_COUNT, stats_mask, + efx_nic_update_stats(efx_ef10_stat_desc, EF10_STAT_COUNT, mask, stats, efx->stats_buffer.addr, false); rmb(); generation_start = dma_stats[MC_CMD_MAC_GENERATION_START]; @@ -564,12 +569,14 @@ static int efx_ef10_try_update_nic_stats(struct efx_nic *efx) static size_t efx_ef10_update_stats(struct efx_nic *efx, u64 *full_stats, struct rtnl_link_stats64 *core_stats) { - const unsigned long *mask = efx_ef10_stat_mask(efx); + DECLARE_BITMAP(mask, EF10_STAT_COUNT); struct efx_ef10_nic_data *nic_data = efx->nic_data; u64 *stats = nic_data->stats; size_t stats_count = 0, index; int retry; + efx_ef10_get_stat_mask(efx, mask); + /* If we're unlucky enough to read statistics during the DMA, wait * up to 10ms for it to finish (typically takes <500us) */ From 2ca10a75d897c6cd3cfcf87567551d775c9b10bb Mon Sep 17 00:00:00 2001 From: Matthew Slattery Date: Tue, 10 Sep 2013 19:06:27 +0100 Subject: [PATCH 32/59] sfc: Add definitions for new stats counters and capability flag Signed-off-by: Ben Hutchings --- drivers/net/ethernet/sfc/mcdi_pcol.h | 56 +++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/sfc/mcdi_pcol.h b/drivers/net/ethernet/sfc/mcdi_pcol.h index b5cf62492f8e..e0a63ddb7a6c 100644 --- a/drivers/net/ethernet/sfc/mcdi_pcol.h +++ b/drivers/net/ethernet/sfc/mcdi_pcol.h @@ -2574,8 +2574,58 @@ #define MC_CMD_MAC_RX_LANES01_DISP_ERR 0x39 /* enum */ #define MC_CMD_MAC_RX_LANES23_DISP_ERR 0x3a /* enum */ #define MC_CMD_MAC_RX_MATCH_FAULT 0x3b /* enum */ -#define MC_CMD_GMAC_DMABUF_START 0x40 /* enum */ -#define MC_CMD_GMAC_DMABUF_END 0x5f /* enum */ +/* enum: PM trunc_bb_overflow counter. Valid for EF10 with PM_AND_RXDP_COUNTERS + * capability only. + */ +#define MC_CMD_MAC_PM_TRUNC_BB_OVERFLOW 0x3c +/* enum: PM discard_bb_overflow counter. Valid for EF10 with + * PM_AND_RXDP_COUNTERS capability only. + */ +#define MC_CMD_MAC_PM_DISCARD_BB_OVERFLOW 0x3d +/* enum: PM trunc_vfifo_full counter. Valid for EF10 with PM_AND_RXDP_COUNTERS + * capability only. + */ +#define MC_CMD_MAC_PM_TRUNC_VFIFO_FULL 0x3e +/* enum: PM discard_vfifo_full counter. Valid for EF10 with + * PM_AND_RXDP_COUNTERS capability only. + */ +#define MC_CMD_MAC_PM_DISCARD_VFIFO_FULL 0x3f +/* enum: PM trunc_qbb counter. Valid for EF10 with PM_AND_RXDP_COUNTERS + * capability only. + */ +#define MC_CMD_MAC_PM_TRUNC_QBB 0x40 +/* enum: PM discard_qbb counter. Valid for EF10 with PM_AND_RXDP_COUNTERS + * capability only. + */ +#define MC_CMD_MAC_PM_DISCARD_QBB 0x41 +/* enum: PM discard_mapping counter. Valid for EF10 with PM_AND_RXDP_COUNTERS + * capability only. + */ +#define MC_CMD_MAC_PM_DISCARD_MAPPING 0x42 +/* enum: RXDP counter: Number of packets dropped due to the queue being + * disabled. Valid for EF10 with PM_AND_RXDP_COUNTERS capability only. + */ +#define MC_CMD_MAC_RXDP_Q_DISABLED_PKTS 0x43 +/* enum: RXDP counter: Number of packets dropped by the DICPU. Valid for EF10 + * with PM_AND_RXDP_COUNTERS capability only. + */ +#define MC_CMD_MAC_RXDP_DI_DROPPED_PKTS 0x45 +/* enum: RXDP counter: Number of non-host packets. Valid for EF10 with + * PM_AND_RXDP_COUNTERS capability only. + */ +#define MC_CMD_MAC_RXDP_STREAMING_PKTS 0x46 +/* enum: RXDP counter: Number of times an emergency descriptor fetch was + * performed. Valid for EF10 with PM_AND_RXDP_COUNTERS capability only. + */ +#define MC_CMD_MAC_RXDP_EMERGENCY_FETCH_CONDITIONS 0x47 +/* enum: RXDP counter: Number of times the DPCPU waited for an existing + * descriptor fetch. Valid for EF10 with PM_AND_RXDP_COUNTERS capability only. + */ +#define MC_CMD_MAC_RXDP_EMERGENCY_WAIT_CONDITIONS 0x48 +/* enum: Start of GMAC stats buffer space, for Siena only. */ +#define MC_CMD_GMAC_DMABUF_START 0x40 +/* enum: End of GMAC stats buffer space, for Siena only. */ +#define MC_CMD_GMAC_DMABUF_END 0x5f #define MC_CMD_MAC_GENERATION_END 0x60 /* enum */ #define MC_CMD_MAC_NSTATS 0x61 /* enum */ @@ -5065,6 +5115,8 @@ #define MC_CMD_GET_CAPABILITIES_OUT_RX_BATCHING_WIDTH 1 #define MC_CMD_GET_CAPABILITIES_OUT_MCAST_FILTER_CHAINING_LBN 26 #define MC_CMD_GET_CAPABILITIES_OUT_MCAST_FILTER_CHAINING_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_OUT_PM_AND_RXDP_COUNTERS_LBN 27 +#define MC_CMD_GET_CAPABILITIES_OUT_PM_AND_RXDP_COUNTERS_WIDTH 1 /* RxDPCPU firmware id. */ #define MC_CMD_GET_CAPABILITIES_OUT_RX_DPCPU_FW_ID_OFST 4 #define MC_CMD_GET_CAPABILITIES_OUT_RX_DPCPU_FW_ID_LEN 2 From 568d7a001bd5612a280e1026cce7e6e2e16b7687 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Wed, 25 Sep 2013 17:32:09 +0100 Subject: [PATCH 33/59] sfc: Add PM and RXDP drop counters to ethtool stats Recognise the new Packet Memory and RX Data Path counters. The following counters are added: rx_pm_{trunc,discard}_bb_overflow - burst buffer overflowed. This should not occur if BB correctly configured. rx_pm_{trunc,discard}_vfifo_full - not enough space in packet memory. May indicate RX performance problems. rx_pm_{trunc,discard}_qbb - dropped by 802.1Qbb early discard mechanism. Since Qbb is not supported at present, this should not occur. rx_pm_discard_mapping - 802.1p priority configured to be dropped. This should not occur in normal operation. rx_dp_q_disabled_packets - packet was to be delivered to a queue but queue is disabled. May indicate misconfiguration by the driver. rx_dp_di_dropped_packets - parser-dispatcher indicated that a packet should be dropped. rx_dp_streaming_packets - packet was sent to the RXDP streaming bus, ie. a filter directed the packet to the MCPU. rx_dp_emerg_{fetch,wait} - RX datapath had to wait for descriptors to be loaded. Indicates performance problems but not drops. These are only provided if the MC firmware has the PM_AND_RXDP_COUNTERS capability. Otherwise, mask them out. Signed-off-by: Ben Hutchings --- drivers/net/ethernet/sfc/ef10.c | 35 +++++++++++++++++++++++++++++++++ drivers/net/ethernet/sfc/nic.h | 12 +++++++++++ 2 files changed, 47 insertions(+) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index c00a5d69c94a..21f9ad6392e9 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -444,6 +444,18 @@ static const struct efx_hw_stat_desc efx_ef10_stat_desc[EF10_STAT_COUNT] = { EF10_DMA_STAT(rx_align_error, RX_ALIGN_ERROR_PKTS), EF10_DMA_STAT(rx_length_error, RX_LENGTH_ERROR_PKTS), EF10_DMA_STAT(rx_nodesc_drops, RX_NODESC_DROPS), + EF10_DMA_STAT(rx_pm_trunc_bb_overflow, PM_TRUNC_BB_OVERFLOW), + EF10_DMA_STAT(rx_pm_discard_bb_overflow, PM_DISCARD_BB_OVERFLOW), + EF10_DMA_STAT(rx_pm_trunc_vfifo_full, PM_TRUNC_VFIFO_FULL), + EF10_DMA_STAT(rx_pm_discard_vfifo_full, PM_DISCARD_VFIFO_FULL), + EF10_DMA_STAT(rx_pm_trunc_qbb, PM_TRUNC_QBB), + EF10_DMA_STAT(rx_pm_discard_qbb, PM_DISCARD_QBB), + EF10_DMA_STAT(rx_pm_discard_mapping, PM_DISCARD_MAPPING), + EF10_DMA_STAT(rx_dp_q_disabled_packets, RXDP_Q_DISABLED_PKTS), + EF10_DMA_STAT(rx_dp_di_dropped_packets, RXDP_DI_DROPPED_PKTS), + EF10_DMA_STAT(rx_dp_streaming_packets, RXDP_STREAMING_PKTS), + EF10_DMA_STAT(rx_dp_emerg_fetch, RXDP_EMERGENCY_FETCH_CONDITIONS), + EF10_DMA_STAT(rx_dp_emerg_wait, RXDP_EMERGENCY_WAIT_CONDITIONS), }; #define HUNT_COMMON_STAT_MASK ((1ULL << EF10_STAT_tx_bytes) | \ @@ -498,15 +510,38 @@ static const struct efx_hw_stat_desc efx_ef10_stat_desc[EF10_STAT_COUNT] = { #define HUNT_40G_EXTRA_STAT_MASK ((1ULL << EF10_STAT_rx_align_error) | \ (1ULL << EF10_STAT_rx_length_error)) +/* These statistics are only provided if the firmware supports the + * capability PM_AND_RXDP_COUNTERS. + */ +#define HUNT_PM_AND_RXDP_STAT_MASK ( \ + (1ULL << EF10_STAT_rx_pm_trunc_bb_overflow) | \ + (1ULL << EF10_STAT_rx_pm_discard_bb_overflow) | \ + (1ULL << EF10_STAT_rx_pm_trunc_vfifo_full) | \ + (1ULL << EF10_STAT_rx_pm_discard_vfifo_full) | \ + (1ULL << EF10_STAT_rx_pm_trunc_qbb) | \ + (1ULL << EF10_STAT_rx_pm_discard_qbb) | \ + (1ULL << EF10_STAT_rx_pm_discard_mapping) | \ + (1ULL << EF10_STAT_rx_dp_q_disabled_packets) | \ + (1ULL << EF10_STAT_rx_dp_di_dropped_packets) | \ + (1ULL << EF10_STAT_rx_dp_streaming_packets) | \ + (1ULL << EF10_STAT_rx_dp_emerg_fetch) | \ + (1ULL << EF10_STAT_rx_dp_emerg_wait)) + static u64 efx_ef10_raw_stat_mask(struct efx_nic *efx) { u64 raw_mask = HUNT_COMMON_STAT_MASK; u32 port_caps = efx_mcdi_phy_get_caps(efx); + struct efx_ef10_nic_data *nic_data = efx->nic_data; if (port_caps & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) raw_mask |= HUNT_40G_EXTRA_STAT_MASK; else raw_mask |= HUNT_10G_ONLY_STAT_MASK; + + if (nic_data->datapath_caps & + (1 << MC_CMD_GET_CAPABILITIES_OUT_PM_AND_RXDP_COUNTERS_LBN)) + raw_mask |= HUNT_PM_AND_RXDP_STAT_MASK; + return raw_mask; } diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index fda29d39032f..890bbbe8320e 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -386,6 +386,18 @@ enum { EF10_STAT_rx_align_error, EF10_STAT_rx_length_error, EF10_STAT_rx_nodesc_drops, + EF10_STAT_rx_pm_trunc_bb_overflow, + EF10_STAT_rx_pm_discard_bb_overflow, + EF10_STAT_rx_pm_trunc_vfifo_full, + EF10_STAT_rx_pm_discard_vfifo_full, + EF10_STAT_rx_pm_trunc_qbb, + EF10_STAT_rx_pm_discard_qbb, + EF10_STAT_rx_pm_discard_mapping, + EF10_STAT_rx_dp_q_disabled_packets, + EF10_STAT_rx_dp_di_dropped_packets, + EF10_STAT_rx_dp_streaming_packets, + EF10_STAT_rx_dp_emerg_fetch, + EF10_STAT_rx_dp_emerg_wait, EF10_STAT_COUNT }; From 3573540cafa4296dd60f8be02f2aecaa31047525 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 2 Oct 2013 09:14:06 +0300 Subject: [PATCH 34/59] netif_set_xps_queue: make cpu mask const virtio wants to pass in cpumask_of(cpu), make parameter const to avoid build warnings. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++-- net/core/dev.c | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3de49aca4519..25f5d2d11e7c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2264,11 +2264,12 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) } #ifdef CONFIG_XPS -extern int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, +extern int netif_set_xps_queue(struct net_device *dev, + const struct cpumask *mask, u16 index); #else static inline int netif_set_xps_queue(struct net_device *dev, - struct cpumask *mask, + const struct cpumask *mask, u16 index) { return 0; diff --git a/net/core/dev.c b/net/core/dev.c index 65f829cfd928..3430b1ed12e5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1917,7 +1917,8 @@ static struct xps_map *expand_xps_map(struct xps_map *map, return new_map; } -int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, u16 index) +int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, + u16 index) { struct xps_dev_maps *dev_maps, *new_dev_maps = NULL; struct xps_map *map, *new_map; From 582442d6d5bc74c1e11a2515f9387d3d227278e2 Mon Sep 17 00:00:00 2001 From: Oussama Ghorbel Date: Thu, 3 Oct 2013 14:49:26 +0100 Subject: [PATCH 35/59] ipv6: Allow the MTU of ipip6 tunnel to be set below 1280 The (inner) MTU of a ipip6 (IPv4-in-IPv6) tunnel cannot be set below 1280, which is the minimum MTU in IPv6. However, there should be no IPv6 on the tunnel interface at all, so the IPv6 rules should not apply. More info at https://bugzilla.kernel.org/show_bug.cgi?id=15530 This patch allows to check the minimum MTU for ipv6 tunnel according to these rules: -In case the tunnel is configured with ipip6 mode the minimum MTU is 68. -In case the tunnel is configured with ip6ip6 or any mode the minimum MTU is 1280. Signed-off-by: Oussama Ghorbel Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index a791552e0422..583b77e2f69b 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1430,9 +1430,17 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) static int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) { - if (new_mtu < IPV6_MIN_MTU) { - return -EINVAL; + struct ip6_tnl *tnl = netdev_priv(dev); + + if (tnl->parms.proto == IPPROTO_IPIP) { + if (new_mtu < 68) + return -EINVAL; + } else { + if (new_mtu < IPV6_MIN_MTU) + return -EINVAL; } + if (new_mtu > 0xFFF8 - dev->hard_header_len) + return -EINVAL; dev->mtu = new_mtu; return 0; } From ecb1c9cc215cb5a4390b714d8b09de637f54fa3f Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 7 Oct 2013 20:10:11 +0100 Subject: [PATCH 36/59] sfc: Only bind to EF10 functions with the LinkCtrl and Trusted flags Although we do not yet enable multiple PFs per port, it is possible that a board will be reconfigured to enable them while the driver has not yet been updated to fully support this. The most obvious problem is that multiple functions may try to set conflicting link settings. But we will also run into trouble if the firmware doesn't consider us fully trusted. So, abort probing unless both the LinkCtrl and Trusted flags are set for this function. Signed-off-by: Ben Hutchings --- drivers/net/ethernet/sfc/mcdi.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index c082562dbf4e..366c8e3e3784 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -963,7 +963,7 @@ static int efx_mcdi_drv_attach(struct efx_nic *efx, bool driver_operating, bool *was_attached) { MCDI_DECLARE_BUF(inbuf, MC_CMD_DRV_ATTACH_IN_LEN); - MCDI_DECLARE_BUF(outbuf, MC_CMD_DRV_ATTACH_OUT_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_DRV_ATTACH_EXT_OUT_LEN); size_t outlen; int rc; @@ -981,6 +981,22 @@ static int efx_mcdi_drv_attach(struct efx_nic *efx, bool driver_operating, goto fail; } + /* We currently assume we have control of the external link + * and are completely trusted by firmware. Abort probing + * if that's not true for this function. + */ + if (driver_operating && + outlen >= MC_CMD_DRV_ATTACH_EXT_OUT_LEN && + (MCDI_DWORD(outbuf, DRV_ATTACH_EXT_OUT_FUNC_FLAGS) & + (1 << MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_LINKCTRL | + 1 << MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_TRUSTED)) != + (1 << MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_LINKCTRL | + 1 << MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_TRUSTED)) { + netif_err(efx, probe, efx->net_dev, + "This driver version only supports one function per port\n"); + return -ENODEV; + } + if (was_attached != NULL) *was_attached = MCDI_DWORD(outbuf, DRV_ATTACH_OUT_OLD_STATE); return 0; From d45ed4a4e33ae103053c0a53d280014e7101bb5c Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 4 Oct 2013 00:14:06 -0700 Subject: [PATCH 37/59] net: fix unsafe set_memory_rw from softirq on x86 system with net.core.bpf_jit_enable = 1 sudo tcpdump -i eth1 'tcp port 22' causes the warning: [ 56.766097] Possible unsafe locking scenario: [ 56.766097] [ 56.780146] CPU0 [ 56.786807] ---- [ 56.793188] lock(&(&vb->lock)->rlock); [ 56.799593] [ 56.805889] lock(&(&vb->lock)->rlock); [ 56.812266] [ 56.812266] *** DEADLOCK *** [ 56.812266] [ 56.830670] 1 lock held by ksoftirqd/1/13: [ 56.836838] #0: (rcu_read_lock){.+.+..}, at: [] vm_unmap_aliases+0x8c/0x380 [ 56.849757] [ 56.849757] stack backtrace: [ 56.862194] CPU: 1 PID: 13 Comm: ksoftirqd/1 Not tainted 3.12.0-rc3+ #45 [ 56.868721] Hardware name: System manufacturer System Product Name/P8Z77 WS, BIOS 3007 07/26/2012 [ 56.882004] ffffffff821944c0 ffff88080bbdb8c8 ffffffff8175a145 0000000000000007 [ 56.895630] ffff88080bbd5f40 ffff88080bbdb928 ffffffff81755b14 0000000000000001 [ 56.909313] ffff880800000001 ffff880800000000 ffffffff8101178f 0000000000000001 [ 56.923006] Call Trace: [ 56.929532] [] dump_stack+0x55/0x76 [ 56.936067] [] print_usage_bug+0x1f7/0x208 [ 56.942445] [] ? save_stack_trace+0x2f/0x50 [ 56.948932] [] ? check_usage_backwards+0x150/0x150 [ 56.955470] [] mark_lock+0x282/0x2c0 [ 56.961945] [] __lock_acquire+0x45d/0x1d50 [ 56.968474] [] ? __lock_acquire+0x2de/0x1d50 [ 56.975140] [] ? cpumask_next_and+0x55/0x90 [ 56.981942] [] lock_acquire+0x92/0x1d0 [ 56.988745] [] ? vm_unmap_aliases+0x16a/0x380 [ 56.995619] [] _raw_spin_lock+0x41/0x50 [ 57.002493] [] ? vm_unmap_aliases+0x16a/0x380 [ 57.009447] [] vm_unmap_aliases+0x16a/0x380 [ 57.016477] [] ? vm_unmap_aliases+0x8c/0x380 [ 57.023607] [] change_page_attr_set_clr+0xc0/0x460 [ 57.030818] [] ? trace_hardirqs_on+0xd/0x10 [ 57.037896] [] ? kmem_cache_free+0xb0/0x2b0 [ 57.044789] [] ? free_object_rcu+0x93/0xa0 [ 57.051720] [] set_memory_rw+0x2f/0x40 [ 57.058727] [] bpf_jit_free+0x2c/0x40 [ 57.065577] [] sk_filter_release_rcu+0x1a/0x30 [ 57.072338] [] rcu_process_callbacks+0x202/0x7c0 [ 57.078962] [] __do_softirq+0xf7/0x3f0 [ 57.085373] [] run_ksoftirqd+0x35/0x70 cannot reuse jited filter memory, since it's readonly, so use original bpf insns memory to hold work_struct defer kfree of sk_filter until jit completed freeing tested on x86_64 and i386 Signed-off-by: Alexei Starovoitov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- arch/arm/net/bpf_jit_32.c | 1 + arch/powerpc/net/bpf_jit_comp.c | 1 + arch/s390/net/bpf_jit_comp.c | 4 +++- arch/sparc/net/bpf_jit_comp.c | 1 + arch/x86/net/bpf_jit_comp.c | 18 +++++++++++++----- include/linux/filter.h | 15 +++++++++++---- include/net/sock.h | 6 ++---- net/core/filter.c | 8 ++++---- 8 files changed, 36 insertions(+), 18 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index f50d223a0bd3..99b44e0e8d86 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -930,4 +930,5 @@ void bpf_jit_free(struct sk_filter *fp) { if (fp->bpf_func != sk_run_filter) module_free(NULL, fp->bpf_func); + kfree(fp); } diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index bf56e33f8257..2345bdb4d917 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -691,4 +691,5 @@ void bpf_jit_free(struct sk_filter *fp) { if (fp->bpf_func != sk_run_filter) module_free(NULL, fp->bpf_func); + kfree(fp); } diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 709239285869..a5df511e27a2 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -881,7 +881,9 @@ void bpf_jit_free(struct sk_filter *fp) struct bpf_binary_header *header = (void *)addr; if (fp->bpf_func == sk_run_filter) - return; + goto free_filter; set_memory_rw(addr, header->pages); module_free(NULL, header); +free_filter: + kfree(fp); } diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index 9c7be59e6f5a..218b6b23c378 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -808,4 +808,5 @@ void bpf_jit_free(struct sk_filter *fp) { if (fp->bpf_func != sk_run_filter) module_free(NULL, fp->bpf_func); + kfree(fp); } diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 79c216aa0e2b..516593e1ce33 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -772,13 +772,21 @@ out: return; } +static void bpf_jit_free_deferred(struct work_struct *work) +{ + struct sk_filter *fp = container_of(work, struct sk_filter, work); + unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; + struct bpf_binary_header *header = (void *)addr; + + set_memory_rw(addr, header->pages); + module_free(NULL, header); + kfree(fp); +} + void bpf_jit_free(struct sk_filter *fp) { if (fp->bpf_func != sk_run_filter) { - unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; - struct bpf_binary_header *header = (void *)addr; - - set_memory_rw(addr, header->pages); - module_free(NULL, header); + INIT_WORK(&fp->work, bpf_jit_free_deferred); + schedule_work(&fp->work); } } diff --git a/include/linux/filter.h b/include/linux/filter.h index a6ac84871d6d..ff4e40cd45b1 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -6,6 +6,7 @@ #include #include +#include #include #ifdef CONFIG_COMPAT @@ -25,15 +26,19 @@ struct sk_filter { atomic_t refcnt; unsigned int len; /* Number of filter blocks */ + struct rcu_head rcu; unsigned int (*bpf_func)(const struct sk_buff *skb, const struct sock_filter *filter); - struct rcu_head rcu; - struct sock_filter insns[0]; + union { + struct sock_filter insns[0]; + struct work_struct work; + }; }; -static inline unsigned int sk_filter_len(const struct sk_filter *fp) +static inline unsigned int sk_filter_size(unsigned int proglen) { - return fp->len * sizeof(struct sock_filter) + sizeof(*fp); + return max(sizeof(struct sk_filter), + offsetof(struct sk_filter, insns[proglen])); } extern int sk_filter(struct sock *sk, struct sk_buff *skb); @@ -67,11 +72,13 @@ static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, } #define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns) #else +#include static inline void bpf_jit_compile(struct sk_filter *fp) { } static inline void bpf_jit_free(struct sk_filter *fp) { + kfree(fp); } #define SK_RUN_FILTER(FILTER, SKB) sk_run_filter(SKB, FILTER->insns) #endif diff --git a/include/net/sock.h b/include/net/sock.h index 1d37a8086bed..808cbc2ec6c1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1630,16 +1630,14 @@ static inline void sk_filter_release(struct sk_filter *fp) static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) { - unsigned int size = sk_filter_len(fp); - - atomic_sub(size, &sk->sk_omem_alloc); + atomic_sub(sk_filter_size(fp->len), &sk->sk_omem_alloc); sk_filter_release(fp); } static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp) { atomic_inc(&fp->refcnt); - atomic_add(sk_filter_len(fp), &sk->sk_omem_alloc); + atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc); } /* diff --git a/net/core/filter.c b/net/core/filter.c index 6438f29ff266..01b780856db2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -644,7 +644,6 @@ void sk_filter_release_rcu(struct rcu_head *rcu) struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); bpf_jit_free(fp); - kfree(fp); } EXPORT_SYMBOL(sk_filter_release_rcu); @@ -683,7 +682,7 @@ int sk_unattached_filter_create(struct sk_filter **pfp, if (fprog->filter == NULL) return -EINVAL; - fp = kmalloc(fsize + sizeof(*fp), GFP_KERNEL); + fp = kmalloc(sk_filter_size(fprog->len), GFP_KERNEL); if (!fp) return -ENOMEM; memcpy(fp->insns, fprog->filter, fsize); @@ -723,6 +722,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) { struct sk_filter *fp, *old_fp; unsigned int fsize = sizeof(struct sock_filter) * fprog->len; + unsigned int sk_fsize = sk_filter_size(fprog->len); int err; if (sock_flag(sk, SOCK_FILTER_LOCKED)) @@ -732,11 +732,11 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) if (fprog->filter == NULL) return -EINVAL; - fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL); + fp = sock_kmalloc(sk, sk_fsize, GFP_KERNEL); if (!fp) return -ENOMEM; if (copy_from_user(fp->insns, fprog->filter, fsize)) { - sock_kfree_s(sk, fp, fsize+sizeof(*fp)); + sock_kfree_s(sk, fp, sk_fsize); return -EFAULT; } From f468b10e3de6c5f90a14b9c82f1403a2651cef35 Mon Sep 17 00:00:00 2001 From: Markus Pargmann Date: Fri, 4 Oct 2013 14:44:39 +0200 Subject: [PATCH 38/59] net: ethernet: cpsw: Search childs for slave nodes The current implementation searches the whole DT for nodes named "slave". This patch changes it to search only child nodes for slaves. Signed-off-by: Markus Pargmann Acked-by: Mugunthan V N Acked-by: Peter Korsgaard Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 79974e31187a..5df50a96a1da 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1782,7 +1782,7 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, if (ret) pr_warn("Doesn't have any child node\n"); - for_each_node_by_name(slave_node, "slave") { + for_each_child_of_node(node, slave_node) { struct cpsw_slave_data *slave_data = data->slave_data + i; const void *mac_addr = NULL; u32 phyid; @@ -1791,6 +1791,10 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, struct device_node *mdio_node; struct platform_device *mdio; + /* This is no slave child node, continue */ + if (strcmp(slave_node->name, "slave")) + continue; + parp = of_get_property(slave_node, "phy_id", &lenp); if ((parp == NULL) || (lenp != (sizeof(void *) * 2))) { pr_err("Missing slave[%d] phy_id property\n", i); From 281abd965fedfe7c31927d84e01a5bfa554b5502 Mon Sep 17 00:00:00 2001 From: Markus Pargmann Date: Fri, 4 Oct 2013 14:44:40 +0200 Subject: [PATCH 39/59] net/ethernet: cpsw: DT read bool dual_emac Signed-off-by: Markus Pargmann Acked-by: Mugunthan V N Acked-by: Peter Korsgaard Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 5df50a96a1da..804846eb5fc2 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1771,8 +1771,8 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, } data->mac_control = prop; - if (!of_property_read_u32(node, "dual_emac", &prop)) - data->dual_emac = prop; + if (of_property_read_bool(node, "dual_emac")) + data->dual_emac = 1; /* * Populate all the child nodes here... From 0a7e22609067ff524fc7bbd45c6951dd08561667 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Fri, 4 Oct 2013 17:04:48 +0200 Subject: [PATCH 40/59] ipv4: fix ineffective source address selection When sending out multicast messages, the source address in inet->mc_addr is ignored and rewritten by an autoselected one. This is caused by a typo in commit 813b3b5db831 ("ipv4: Use caller's on-stack flowi as-is in output route lookups"). Signed-off-by: Jiri Benc Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 727f4365bcdf..6011615e810d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2072,7 +2072,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) RT_SCOPE_LINK); goto make_route; } - if (fl4->saddr) { + if (!fl4->saddr) { if (ipv4_is_multicast(fl4->daddr)) fl4->saddr = inet_select_addr(dev_out, 0, fl4->flowi4_scope); From fe119a05f8ca481623a8d02efcc984332e612528 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sat, 5 Oct 2013 21:25:17 +0200 Subject: [PATCH 41/59] can: dev: fix nlmsg size calculation in can_get_size() This patch fixes the calculation of the nlmsg size, by adding the missing nla_total_size(). Signed-off-by: Marc Kleine-Budde Signed-off-by: David S. Miller --- drivers/net/can/dev.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index f9cba4123c66..1870c4731a57 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -705,14 +705,14 @@ static size_t can_get_size(const struct net_device *dev) size_t size; size = nla_total_size(sizeof(u32)); /* IFLA_CAN_STATE */ - size += sizeof(struct can_ctrlmode); /* IFLA_CAN_CTRLMODE */ + size += nla_total_size(sizeof(struct can_ctrlmode)); /* IFLA_CAN_CTRLMODE */ size += nla_total_size(sizeof(u32)); /* IFLA_CAN_RESTART_MS */ - size += sizeof(struct can_bittiming); /* IFLA_CAN_BITTIMING */ - size += sizeof(struct can_clock); /* IFLA_CAN_CLOCK */ + size += nla_total_size(sizeof(struct can_bittiming)); /* IFLA_CAN_BITTIMING */ + size += nla_total_size(sizeof(struct can_clock)); /* IFLA_CAN_CLOCK */ if (priv->do_get_berr_counter) /* IFLA_CAN_BERR_COUNTER */ - size += sizeof(struct can_berr_counter); + size += nla_total_size(sizeof(struct can_berr_counter)); if (priv->bittiming_const) /* IFLA_CAN_BITTIMING_CONST */ - size += sizeof(struct can_bittiming_const); + size += nla_total_size(sizeof(struct can_bittiming_const)); return size; } From 88ba09df236c618b5466d35a0165df0dc865eac5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 5 Oct 2013 13:15:30 -0700 Subject: [PATCH 42/59] net: Update the sysctl permissions handler to test effective uid/gid On Tue, 20 Aug 2013 11:40:04 -0500 Eric Sandeen wrote: > This was brought up in a Red Hat bug (which may be marked private, I'm sorry): > > Bug 987055 - open O_WRONLY succeeds on some root owned files in /proc for process running with unprivileged EUID > > "On RHEL7 some of the files in /proc can be opened for writing by an unprivileged EUID." > > The flaw existed upstream as well last I checked. > > This commit in kernel v3.8 caused the regression: > > commit cff109768b2d9c03095848f4cd4b0754117262aa > Author: Eric W. Biederman > Date: Fri Nov 16 03:03:01 2012 +0000 > > net: Update the per network namespace sysctls to be available to the network namespace owner > > - Allow anyone with CAP_NET_ADMIN rights in the user namespace of the > the netowrk namespace to change sysctls. > - Allow anyone the uid of the user namespace root the same > permissions over the network namespace sysctls as the global root. > - Allow anyone with gid of the user namespace root group the same > permissions over the network namespace sysctl as the global root group. > > Signed-off-by: "Eric W. Biederman" > Signed-off-by: David S. Miller > > because it changed /sys/net's special permission handler to test current_uid, not > current_euid; same for current_gid/current_egid. > > So in this case, root cannot drop privs via set[ug]id, and retains all privs > in this codepath. Modify the code to use current_euid(), and in_egroup_p, as in done in fs/proc/proc_sysctl.c:test_perm() Cc: stable@vger.kernel.org Reviewed-by: Eric Sandeen Reported-by: Eric Sandeen Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/sysctl_net.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 9bc6db04be3e..e7000be321b0 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -47,12 +47,12 @@ static int net_ctl_permissions(struct ctl_table_header *head, /* Allow network administrator to have same access as root. */ if (ns_capable(net->user_ns, CAP_NET_ADMIN) || - uid_eq(root_uid, current_uid())) { + uid_eq(root_uid, current_euid())) { int mode = (table->mode >> 6) & 7; return (mode << 6) | (mode << 3) | mode; } /* Allow netns root group to have the same access as the root group */ - if (gid_eq(root_gid, current_gid())) { + if (in_egroup_p(root_gid)) { int mode = (table->mode >> 3) & 7; return (mode << 3) | mode; } From 7adac1ec8198e9f5c802918e58a967b92db4a66f Mon Sep 17 00:00:00 2001 From: Alan Ott Date: Sat, 5 Oct 2013 23:15:18 -0400 Subject: [PATCH 43/59] 6lowpan: Only make 6lowpan links to IEEE802154 devices Refuse to create 6lowpan links if the actual hardware interface is of any type other than ARPHRD_IEEE802154. Signed-off-by: Alan Ott Suggested-by: Alexander Aring Signed-off-by: David S. Miller --- net/ieee802154/6lowpan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index c85e71e0c7ff..8f56b2b63bfb 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -1372,6 +1372,8 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev, real_dev = dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); if (!real_dev) return -ENODEV; + if (real_dev->type != ARPHRD_IEEE802154) + return -EINVAL; lowpan_dev_info(dev)->real_dev = real_dev; lowpan_dev_info(dev)->fragment_tag = 0; From ab2d95df9c19f45805977be1c869ba3df9a10835 Mon Sep 17 00:00:00 2001 From: Alan Ott Date: Sat, 5 Oct 2013 23:15:19 -0400 Subject: [PATCH 44/59] 6lowpan: Sync default hardware address of lowpan links to their wpan When a lowpan link to a wpan device is created, set the hardware address of the lowpan link to that of the wpan device. Signed-off-by: Alan Ott Signed-off-by: David S. Miller --- net/ieee802154/6lowpan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index 8f56b2b63bfb..ff41b4d60d30 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -1388,6 +1388,9 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev, entry->ldev = dev; + /* Set the lowpan harware address to the wpan hardware address. */ + memcpy(dev->dev_addr, real_dev->dev_addr, IEEE802154_ADDR_LEN); + mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx); INIT_LIST_HEAD(&entry->list); list_add_tail(&entry->list, &lowpan_devices); From 9757f1d2e0b4e7ff59e76c2fadbf819e18f3f884 Mon Sep 17 00:00:00 2001 From: Alan Ott Date: Sat, 5 Oct 2013 23:52:22 -0400 Subject: [PATCH 45/59] mrf24j40: Move INIT_COMPLETION() to before packet transmission This avoids a race condition where complete(tx_complete) could be called before tx_complete is initialized. Signed-off-by: Alan Ott Signed-off-by: David S. Miller --- drivers/net/ieee802154/mrf24j40.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c index 42e6deee6db5..66bb4ceaabe8 100644 --- a/drivers/net/ieee802154/mrf24j40.c +++ b/drivers/net/ieee802154/mrf24j40.c @@ -344,6 +344,8 @@ static int mrf24j40_tx(struct ieee802154_dev *dev, struct sk_buff *skb) if (ret) goto err; + INIT_COMPLETION(devrec->tx_complete); + /* Set TXNTRIG bit of TXNCON to send packet */ ret = read_short_reg(devrec, REG_TXNCON, &val); if (ret) @@ -354,8 +356,6 @@ static int mrf24j40_tx(struct ieee802154_dev *dev, struct sk_buff *skb) val |= 0x4; write_short_reg(devrec, REG_TXNCON, val); - INIT_COMPLETION(devrec->tx_complete); - /* Wait for the device to send the TX complete interrupt. */ ret = wait_for_completion_interruptible_timeout( &devrec->tx_complete, From 4a4e1da83cc1ff1b8a5171d9b16d4a6c2a6936db Mon Sep 17 00:00:00 2001 From: Alan Ott Date: Sat, 5 Oct 2013 23:52:23 -0400 Subject: [PATCH 46/59] mrf24j40: Use threaded IRQ handler Eliminate all the workqueue and interrupt enable/disable. Signed-off-by: Alan Ott Signed-off-by: David S. Miller --- drivers/net/ieee802154/mrf24j40.c | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c index 66bb4ceaabe8..c1bc68836346 100644 --- a/drivers/net/ieee802154/mrf24j40.c +++ b/drivers/net/ieee802154/mrf24j40.c @@ -82,7 +82,6 @@ struct mrf24j40 { struct mutex buffer_mutex; /* only used to protect buf */ struct completion tx_complete; - struct work_struct irqwork; u8 *buf; /* 3 bytes. Used for SPI single-register transfers. */ }; @@ -590,17 +589,6 @@ static struct ieee802154_ops mrf24j40_ops = { static irqreturn_t mrf24j40_isr(int irq, void *data) { struct mrf24j40 *devrec = data; - - disable_irq_nosync(irq); - - schedule_work(&devrec->irqwork); - - return IRQ_HANDLED; -} - -static void mrf24j40_isrwork(struct work_struct *work) -{ - struct mrf24j40 *devrec = container_of(work, struct mrf24j40, irqwork); u8 intstat; int ret; @@ -618,7 +606,7 @@ static void mrf24j40_isrwork(struct work_struct *work) mrf24j40_handle_rx(devrec); out: - enable_irq(devrec->spi->irq); + return IRQ_HANDLED; } static int mrf24j40_probe(struct spi_device *spi) @@ -642,7 +630,6 @@ static int mrf24j40_probe(struct spi_device *spi) mutex_init(&devrec->buffer_mutex); init_completion(&devrec->tx_complete); - INIT_WORK(&devrec->irqwork, mrf24j40_isrwork); devrec->spi = spi; spi_set_drvdata(spi, devrec); @@ -688,11 +675,12 @@ static int mrf24j40_probe(struct spi_device *spi) val &= ~0x3; /* Clear RX mode (normal) */ write_short_reg(devrec, REG_RXMCR, val); - ret = request_irq(spi->irq, - mrf24j40_isr, - IRQF_TRIGGER_FALLING, - dev_name(&spi->dev), - devrec); + ret = request_threaded_irq(spi->irq, + NULL, + mrf24j40_isr, + IRQF_TRIGGER_FALLING|IRQF_ONESHOT, + dev_name(&spi->dev), + devrec); if (ret) { dev_err(printdev(devrec), "Unable to get IRQ"); @@ -721,7 +709,6 @@ static int mrf24j40_remove(struct spi_device *spi) dev_dbg(printdev(devrec), "remove\n"); free_irq(spi->irq, devrec); - flush_work(&devrec->irqwork); /* TODO: Is this the right call? */ ieee802154_unregister_device(devrec->dev); ieee802154_free_device(devrec->dev); /* TODO: Will ieee802154_free_device() wait until ->xmit() is From 40afbb657352e92726c40cc4ddf3cf0be6800dba Mon Sep 17 00:00:00 2001 From: Alan Ott Date: Sat, 5 Oct 2013 23:52:24 -0400 Subject: [PATCH 47/59] mrf24j40: Use level-triggered interrupts The mrf24j40 generates level interrupts. There are rare cases where it appears that the interrupt line never gets de-asserted between interrupts, causing interrupts to be lost, and causing a hung device from the driver's perspective. Switching the driver to interpret these interrupts as level-triggered fixes this issue. Signed-off-by: Alan Ott Signed-off-by: David S. Miller --- drivers/net/ieee802154/mrf24j40.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c index c1bc68836346..0632d34905c7 100644 --- a/drivers/net/ieee802154/mrf24j40.c +++ b/drivers/net/ieee802154/mrf24j40.c @@ -678,7 +678,7 @@ static int mrf24j40_probe(struct spi_device *spi) ret = request_threaded_irq(spi->irq, NULL, mrf24j40_isr, - IRQF_TRIGGER_FALLING|IRQF_ONESHOT, + IRQF_TRIGGER_LOW|IRQF_ONESHOT, dev_name(&spi->dev), devrec); From 5c0c52c9102f90c9d5674fb6c5f4f23e3019dd9f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 6 Oct 2013 21:25:12 +0300 Subject: [PATCH 48/59] tun: don't look at current when non-blocking We play with a wait queue even if socket is non blocking. This is an obvious waste. Besides, it will prevent calling the non blocking variant when current is not valid. Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/tun.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 807815fc9968..7cb105c103fe 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1293,7 +1293,8 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, if (unlikely(!noblock)) add_wait_queue(&tfile->wq.wait, &wait); while (len) { - current->state = TASK_INTERRUPTIBLE; + if (unlikely(!noblock)) + current->state = TASK_INTERRUPTIBLE; /* Read frames from the queue */ if (!(skb = skb_dequeue(&tfile->socket.sk->sk_receive_queue))) { @@ -1320,9 +1321,10 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, break; } - current->state = TASK_RUNNING; - if (unlikely(!noblock)) + if (unlikely(!noblock)) { + current->state = TASK_RUNNING; remove_wait_queue(&tfile->wq.wait, &wait); + } return ret; } From 8d8a51e26a6d415e1470759f2cf5f3ee3ee86196 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Oct 2013 15:44:26 -0400 Subject: [PATCH 49/59] l2tp: Fix build warning with ipv6 disabled. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit net/l2tp/l2tp_core.c: In function ‘l2tp_verify_udp_checksum’: net/l2tp/l2tp_core.c:499:22: warning: unused variable ‘tunnel’ [-Wunused-variable] Create a helper "l2tp_tunnel()" to facilitate this, and as a side effect get rid of a bunch of unnecessary void pointer casts. Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index aedaa2cd4237..b076e8309bc2 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -115,6 +115,11 @@ struct l2tp_net { static void l2tp_session_set_header_len(struct l2tp_session *session, int version); static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel); +static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk) +{ + return sk->sk_user_data; +} + static inline struct l2tp_net *l2tp_pernet(struct net *net) { BUG_ON(!net); @@ -496,7 +501,6 @@ out: static inline int l2tp_verify_udp_checksum(struct sock *sk, struct sk_buff *skb) { - struct l2tp_tunnel *tunnel = (struct l2tp_tunnel *)sk->sk_user_data; struct udphdr *uh = udp_hdr(skb); u16 ulen = ntohs(uh->len); __wsum psum; @@ -505,7 +509,7 @@ static inline int l2tp_verify_udp_checksum(struct sock *sk, return 0; #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) { + if (sk->sk_family == PF_INET6 && !l2tp_tunnel(sk)->v4mapped) { if (!uh->check) { LIMIT_NETDEBUG(KERN_INFO "L2TP: IPv6: checksum is 0\n"); return 1; @@ -1305,10 +1309,9 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb); */ static void l2tp_tunnel_destruct(struct sock *sk) { - struct l2tp_tunnel *tunnel; + struct l2tp_tunnel *tunnel = l2tp_tunnel(sk); struct l2tp_net *pn; - tunnel = sk->sk_user_data; if (tunnel == NULL) goto end; @@ -1676,7 +1679,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 } /* Check if this socket has already been prepped */ - tunnel = (struct l2tp_tunnel *)sk->sk_user_data; + tunnel = l2tp_tunnel(sk); if (tunnel != NULL) { /* This socket has already been prepped */ err = -EBUSY; From 2c6221e4a5aab417cb18bef6f1130d1374240258 Mon Sep 17 00:00:00 2001 From: Nguyen Hong Ky Date: Mon, 7 Oct 2013 15:29:25 +0900 Subject: [PATCH 50/59] net: sh_eth: Fix RX packets errors on R8A7740 This patch will fix RX packets errors when receiving big size of data by set bit RNC = 1. RNC - Receive Enable Control 0: Upon completion of reception of one frame, the E-DMAC writes the receive status to the descriptor and clears the RR bit in EDRRR to 0. 1: Upon completion of reception of one frame, the E-DMAC writes (writes back) the receive status to the descriptor. In addition, the E-DMAC reads the next descriptor and prepares for reception of the next frame. In addition, for get more stable when receiving packets, I set maximum size for the transmit/receive FIFO and inserts padding in receive data. Signed-off-by: Nguyen Hong Ky Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 5cd831ebfa83..c7e1b8f333ed 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -620,12 +620,16 @@ static struct sh_eth_cpu_data sh7734_data = { .eesr_err_check = EESR_TWB1 | EESR_TWB | EESR_TABT | EESR_RABT | EESR_RFE | EESR_RDE | EESR_RFRMER | EESR_TFE | EESR_TDE | EESR_ECI, + .fdr_value = 0x0000070f, + .rmcr_value = 0x00000001, .apr = 1, .mpr = 1, .tpauser = 1, .bculr = 1, .hw_swap = 1, + .rpadir = 1, + .rpadir_value = 2 << 16, .no_trimd = 1, .no_ade = 1, .tsu = 1, From 70fbe0794393829d9acd686428d87c27b6f6984b Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Mon, 7 Oct 2013 13:38:12 +0200 Subject: [PATCH 51/59] net/mlx4_en: Rename name of mlx4_en_rx_alloc members Add page prefix to page related members: @size and @offset into @page_size and @page_offset CC: Eric Dumazet Signed-off-by: Amir Vadai Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 40 +++++++++++--------- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 4 +- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index dec455c8f627..066fc2747854 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -70,14 +70,15 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv, put_page(page); return -ENOMEM; } - page_alloc->size = PAGE_SIZE << order; + page_alloc->page_size = PAGE_SIZE << order; page_alloc->page = page; page_alloc->dma = dma; - page_alloc->offset = frag_info->frag_align; + page_alloc->page_offset = frag_info->frag_align; /* Not doing get_page() for each frag is a big win * on asymetric workloads. */ - atomic_set(&page->_count, page_alloc->size / frag_info->frag_stride); + atomic_set(&page->_count, + page_alloc->page_size / frag_info->frag_stride); return 0; } @@ -96,16 +97,19 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, for (i = 0; i < priv->num_frags; i++) { frag_info = &priv->frag_info[i]; page_alloc[i] = ring_alloc[i]; - page_alloc[i].offset += frag_info->frag_stride; - if (page_alloc[i].offset + frag_info->frag_stride <= ring_alloc[i].size) + page_alloc[i].page_offset += frag_info->frag_stride; + + if (page_alloc[i].page_offset + frag_info->frag_stride <= + ring_alloc[i].page_size) continue; + if (mlx4_alloc_pages(priv, &page_alloc[i], frag_info, gfp)) goto out; } for (i = 0; i < priv->num_frags; i++) { frags[i] = ring_alloc[i]; - dma = ring_alloc[i].dma + ring_alloc[i].offset; + dma = ring_alloc[i].dma + ring_alloc[i].page_offset; ring_alloc[i] = page_alloc[i]; rx_desc->data[i].addr = cpu_to_be64(dma); } @@ -117,7 +121,7 @@ out: frag_info = &priv->frag_info[i]; if (page_alloc[i].page != ring_alloc[i].page) { dma_unmap_page(priv->ddev, page_alloc[i].dma, - page_alloc[i].size, PCI_DMA_FROMDEVICE); + page_alloc[i].page_size, PCI_DMA_FROMDEVICE); page = page_alloc[i].page; atomic_set(&page->_count, 1); put_page(page); @@ -132,9 +136,10 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv, { const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; - if (frags[i].offset + frag_info->frag_stride > frags[i].size) - dma_unmap_page(priv->ddev, frags[i].dma, frags[i].size, - PCI_DMA_FROMDEVICE); + if (frags[i].page_offset + frag_info->frag_stride > + frags[i].page_size) + dma_unmap_page(priv->ddev, frags[i].dma, frags[i].page_size, + PCI_DMA_FROMDEVICE); if (frags[i].page) put_page(frags[i].page); @@ -161,7 +166,7 @@ out: page_alloc = &ring->page_alloc[i]; dma_unmap_page(priv->ddev, page_alloc->dma, - page_alloc->size, PCI_DMA_FROMDEVICE); + page_alloc->page_size, PCI_DMA_FROMDEVICE); page = page_alloc->page; atomic_set(&page->_count, 1); put_page(page); @@ -184,10 +189,11 @@ static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv, i, page_count(page_alloc->page)); dma_unmap_page(priv->ddev, page_alloc->dma, - page_alloc->size, PCI_DMA_FROMDEVICE); - while (page_alloc->offset + frag_info->frag_stride < page_alloc->size) { + page_alloc->page_size, PCI_DMA_FROMDEVICE); + while (page_alloc->page_offset + frag_info->frag_stride < + page_alloc->page_size) { put_page(page_alloc->page); - page_alloc->offset += frag_info->frag_stride; + page_alloc->page_offset += frag_info->frag_stride; } page_alloc->page = NULL; } @@ -478,7 +484,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, /* Save page reference in skb */ __skb_frag_set_page(&skb_frags_rx[nr], frags[nr].page); skb_frag_size_set(&skb_frags_rx[nr], frag_info->frag_size); - skb_frags_rx[nr].page_offset = frags[nr].offset; + skb_frags_rx[nr].page_offset = frags[nr].page_offset; skb->truesize += frag_info->frag_stride; frags[nr].page = NULL; } @@ -517,7 +523,7 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, /* Get pointer to first fragment so we could copy the headers into the * (linear part of the) skb */ - va = page_address(frags[0].page) + frags[0].offset; + va = page_address(frags[0].page) + frags[0].page_offset; if (length <= SMALL_PACKET_SIZE) { /* We are copying all relevant data to the skb - temporarily @@ -645,7 +651,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud dma_sync_single_for_cpu(priv->ddev, dma, sizeof(*ethh), DMA_FROM_DEVICE); ethh = (struct ethhdr *)(page_address(frags[0].page) + - frags[0].offset); + frags[0].page_offset); if (is_multicast_ether_addr(ethh->h_dest)) { struct mlx4_mac_entry *entry; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 5e0aa569306a..bf06e3610d27 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -237,8 +237,8 @@ struct mlx4_en_tx_desc { struct mlx4_en_rx_alloc { struct page *page; dma_addr_t dma; - u32 offset; - u32 size; + u32 page_offset; + u32 page_size; }; struct mlx4_en_tx_ring { From 021f1107ffdae7a82af6c53f4c52654062e365c6 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Mon, 7 Oct 2013 13:38:13 +0200 Subject: [PATCH 52/59] net/mlx4_en: Fix pages never dma unmapped on rx This patch fixes a bug introduced by commit 51151a16 (mlx4: allow order-0 memory allocations in RX path). dma_unmap_page never reached because condition to detect last fragment in page is wrong. offset+frag_stride can't be greater than size, need to make sure no additional frag will fit in page => compare offset + frag_stride + next_frag_size instead. next_frag_size is the same as the current one, since page is shared only with frags of the same size. CC: Eric Dumazet Signed-off-by: Amir Vadai Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 066fc2747854..afe2efa69c86 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -135,9 +135,10 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv, int i) { const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; + u32 next_frag_end = frags[i].page_offset + 2 * frag_info->frag_stride; - if (frags[i].page_offset + frag_info->frag_stride > - frags[i].page_size) + + if (next_frag_end > frags[i].page_size) dma_unmap_page(priv->ddev, frags[i].dma, frags[i].page_size, PCI_DMA_FROMDEVICE); From dc62ccaccfb139d9b04bbc5a2688a4402adbfab3 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Mon, 7 Oct 2013 13:55:19 +0100 Subject: [PATCH 53/59] xen-netback: transition to CLOSED when removing a VIF If a guest is destroyed without transitioning its frontend to CLOSED, the domain becomes a zombie as netback was not grant unmapping the shared rings. When removing a VIF, transition the backend to CLOSED so the VIF is disconnected if necessary (which will unmap the shared rings etc). This fixes a regression introduced by 279f438e36c0a70b23b86d2090aeec50155034a9 (xen-netback: Don't destroy the netdev until the vif is shut down). Signed-off-by: David Vrabel Cc: Ian Campbell Cc: Wei Liu Cc: Paul Durrant Acked-by: Wei Liu Reviewed-by: Paul Durrant Signed-off-by: David S. Miller --- drivers/net/xen-netback/xenbus.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index b45bce20ad76..1b08d8798372 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -39,11 +39,15 @@ static int connect_rings(struct backend_info *); static void connect(struct backend_info *); static void backend_create_xenvif(struct backend_info *be); static void unregister_hotplug_status_watch(struct backend_info *be); +static void set_backend_state(struct backend_info *be, + enum xenbus_state state); static int netback_remove(struct xenbus_device *dev) { struct backend_info *be = dev_get_drvdata(&dev->dev); + set_backend_state(be, XenbusStateClosed); + unregister_hotplug_status_watch(be); if (be->vif) { kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); From 0e719e3a53cc03bc1bafbb2cae1ddf99a0d73ab0 Mon Sep 17 00:00:00 2001 From: Oussama Ghorbel Date: Mon, 7 Oct 2013 18:50:05 +0100 Subject: [PATCH 54/59] ipv6: Fix the upper MTU limit in GRE tunnel Unlike ipv4, the struct member hlen holds the length of the GRE and ipv6 headers. This length is also counted in dev->hard_header_len. Perhaps, it's more clean to modify the hlen to count only the GRE header without ipv6 header as the variable name suggest, but the simple way to fix this without regression risk is simply modify the calculation of the limit in ip6gre_tunnel_change_mtu function. Verified in kernel version v3.11. Signed-off-by: Oussama Ghorbel Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 7bb5446b9d73..1ef1fa2b22a6 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1173,9 +1173,8 @@ done: static int ip6gre_tunnel_change_mtu(struct net_device *dev, int new_mtu) { - struct ip6_tnl *tunnel = netdev_priv(dev); if (new_mtu < 68 || - new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen) + new_mtu > 0xFFF8 - dev->hard_header_len) return -EINVAL; dev->mtu = new_mtu; return 0; From ede869cd0f45488195d56267d0c09ed511611d66 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Oct 2013 12:50:18 -0700 Subject: [PATCH 55/59] pkt_sched: fq: fix typo for initial_quantum TCA_FQ_INITIAL_QUANTUM should set q->initial_quantum Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_fq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index a2fef8b10b96..48501a2baf75 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -656,7 +656,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) q->quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]); if (tb[TCA_FQ_INITIAL_QUANTUM]) - q->quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]); + q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]); if (tb[TCA_FQ_FLOW_DEFAULT_RATE]) q->flow_default_rate = nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]); From c33a39c575068c2ea9bffb22fd6de2df19c74b89 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 7 Oct 2013 23:19:58 +0200 Subject: [PATCH 56/59] net: vlan: fix nlmsg size calculation in vlan_get_size() This patch fixes the calculation of the nlmsg size, by adding the missing nla_total_size(). Cc: Patrick McHardy Signed-off-by: Marc Kleine-Budde Signed-off-by: David S. Miller --- net/8021q/vlan_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 309129732285..c7e634af8516 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -171,7 +171,7 @@ static size_t vlan_get_size(const struct net_device *dev) return nla_total_size(2) + /* IFLA_VLAN_PROTOCOL */ nla_total_size(2) + /* IFLA_VLAN_ID */ - sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */ + nla_total_size(sizeof(struct ifla_vlan_flags)) + /* IFLA_VLAN_FLAGS */ vlan_qos_map_size(vlan->nr_ingress_mappings) + vlan_qos_map_size(vlan->nr_egress_mappings); } From bdfd6304c8e2439fc047fafccf05f145004260bd Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 8 Oct 2013 11:19:19 +0800 Subject: [PATCH 57/59] moxa: fix the error handling in moxart_mac_probe() This patch fix the error handling in moxart_mac_probe(): - return -ENOMEM in some memory alloc fail cases - add missing free_netdev() in the error handling case Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/moxa/moxart_ether.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c index bd1a2d2bc2ae..ea54d95e5b9f 100644 --- a/drivers/net/ethernet/moxa/moxart_ether.c +++ b/drivers/net/ethernet/moxa/moxart_ether.c @@ -448,7 +448,8 @@ static int moxart_mac_probe(struct platform_device *pdev) irq = irq_of_parse_and_map(node, 0); if (irq <= 0) { netdev_err(ndev, "irq_of_parse_and_map failed\n"); - return -EINVAL; + ret = -EINVAL; + goto irq_map_fail; } priv = netdev_priv(ndev); @@ -472,24 +473,32 @@ static int moxart_mac_probe(struct platform_device *pdev) priv->tx_desc_base = dma_alloc_coherent(NULL, TX_REG_DESC_SIZE * TX_DESC_NUM, &priv->tx_base, GFP_DMA | GFP_KERNEL); - if (priv->tx_desc_base == NULL) + if (priv->tx_desc_base == NULL) { + ret = -ENOMEM; goto init_fail; + } priv->rx_desc_base = dma_alloc_coherent(NULL, RX_REG_DESC_SIZE * RX_DESC_NUM, &priv->rx_base, GFP_DMA | GFP_KERNEL); - if (priv->rx_desc_base == NULL) + if (priv->rx_desc_base == NULL) { + ret = -ENOMEM; goto init_fail; + } priv->tx_buf_base = kmalloc(priv->tx_buf_size * TX_DESC_NUM, GFP_ATOMIC); - if (!priv->tx_buf_base) + if (!priv->tx_buf_base) { + ret = -ENOMEM; goto init_fail; + } priv->rx_buf_base = kmalloc(priv->rx_buf_size * RX_DESC_NUM, GFP_ATOMIC); - if (!priv->rx_buf_base) + if (!priv->rx_buf_base) { + ret = -ENOMEM; goto init_fail; + } platform_set_drvdata(pdev, ndev); @@ -522,7 +531,8 @@ static int moxart_mac_probe(struct platform_device *pdev) init_fail: netdev_err(ndev, "init failed\n"); moxart_mac_free_memory(ndev); - +irq_map_fail: + free_netdev(ndev); return ret; } From 2b1f18a4d6ae8057a93e736a34cdcca925279403 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 8 Oct 2013 11:32:17 +0800 Subject: [PATCH 58/59] qlcnic: add missing destroy_workqueue() on error path in qlcnic_probe() Add the missing destroy_workqueue() before return from qlcnic_probe() in the error handling case. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 21d00a0449a1..f07f2b0fefa0 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -2257,7 +2257,7 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = qlcnic_alloc_adapter_resources(adapter); if (err) - goto err_out_free_netdev; + goto err_out_free_wq; adapter->dev_rst_time = jiffies; adapter->ahw->revision_id = pdev->revision; @@ -2396,6 +2396,9 @@ err_out_disable_msi: err_out_free_hw: qlcnic_free_adapter_resources(adapter); +err_out_free_wq: + destroy_workqueue(adapter->qlcnic_wq); + err_out_free_netdev: free_netdev(netdev); From 7eec4174ff29cd42f2acfae8112f51c228545d40 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 8 Oct 2013 15:16:00 -0700 Subject: [PATCH 59/59] pkt_sched: fq: fix non TCP flows pacing Steinar reported FQ pacing was not working for UDP flows. It looks like the initial sk->sk_pacing_rate value of 0 was a wrong choice. We should init it to ~0U (unlimited) Then, TCA_FQ_FLOW_DEFAULT_RATE should be removed because it makes no real sense. The default rate is really unlimited, and we need to avoid a zero divide. Reported-by: Steinar H. Gunderson Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 1 + net/sched/sch_fq.c | 20 +++++++++----------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 5b6beba494a3..0b39e7ae4383 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2319,6 +2319,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_ll_usec = sysctl_net_busy_read; #endif + sk->sk_pacing_rate = ~0U; /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 48501a2baf75..a9dfdda9ed1d 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -472,20 +472,16 @@ begin: if (f->credit > 0 || !q->rate_enable) goto out; - if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) { - rate = skb->sk->sk_pacing_rate ?: q->flow_default_rate; + rate = q->flow_max_rate; + if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) + rate = min(skb->sk->sk_pacing_rate, rate); - rate = min(rate, q->flow_max_rate); - } else { - rate = q->flow_max_rate; - if (rate == ~0U) - goto out; - } - if (rate) { + if (rate != ~0U) { u32 plen = max(qdisc_pkt_len(skb), q->quantum); u64 len = (u64)plen * NSEC_PER_SEC; - do_div(len, rate); + if (likely(rate)) + do_div(len, rate); /* Since socket rate can change later, * clamp the delay to 125 ms. * TODO: maybe segment the too big skb, as in commit @@ -735,12 +731,14 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) if (opts == NULL) goto nla_put_failure; + /* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore, + * do not bother giving its value + */ if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) || nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) || nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) || nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) || nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) || - nla_put_u32(skb, TCA_FQ_FLOW_DEFAULT_RATE, q->flow_default_rate) || nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) || nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log)) goto nla_put_failure;