From f5eeb5fa191fd7b634cbc4883ac58f3b2184dbc5 Mon Sep 17 00:00:00 2001 From: Adrien Schildknecht Date: Tue, 28 Jul 2015 10:30:16 +0200 Subject: [PATCH 01/17] mac80211: fix invalid read in minstrel_sort_best_tp_rates() At the last iteration of the loop, j may equal zero and thus tp_list[j - 1] causes an invalid read. Change the logic of the loop so that j - 1 is always >= 0. Cc: stable@vger.kernel.org Signed-off-by: Adrien Schildknecht Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 247552a7f6c2..3ece7d1034c8 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -92,14 +92,15 @@ int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_ewma) static inline void minstrel_sort_best_tp_rates(struct minstrel_sta_info *mi, int i, u8 *tp_list) { - int j = MAX_THR_RATES; - struct minstrel_rate_stats *tmp_mrs = &mi->r[j - 1].stats; + int j; + struct minstrel_rate_stats *tmp_mrs; struct minstrel_rate_stats *cur_mrs = &mi->r[i].stats; - while (j > 0 && (minstrel_get_tp_avg(&mi->r[i], cur_mrs->prob_ewma) > - minstrel_get_tp_avg(&mi->r[tp_list[j - 1]], tmp_mrs->prob_ewma))) { - j--; + for (j = MAX_THR_RATES; j > 0; --j) { tmp_mrs = &mi->r[tp_list[j - 1]].stats; + if (minstrel_get_tp_avg(&mi->r[i], cur_mrs->prob_ewma) <= + minstrel_get_tp_avg(&mi->r[tp_list[j - 1]], tmp_mrs->prob_ewma)) + break; } if (j < MAX_THR_RATES - 1) From a516993f0ac1694673412eb2d16a091eafa77d2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Thu, 13 Aug 2015 05:54:07 +0200 Subject: [PATCH 02/17] net: fix wrong skb_get() usage / crash in IGMP/MLD parsing code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The recent refactoring of the IGMP and MLD parsing code into ipv6_mc_check_mld() / ip_mc_check_igmp() introduced a potential crash / BUG() invocation for bridges: I wrongly assumed that skb_get() could be used as a simple reference counter for an skb which is not the case. skb_get() bears additional semantics, a user count. This leads to a BUG() invocation in pskb_expand_head() / kernel panic if pskb_may_pull() is called on an skb with a user count greater than one - unfortunately the refactoring did just that. Fixing this by removing the skb_get() call and changing the API: The caller of ipv6_mc_check_mld() / ip_mc_check_igmp() now needs to additionally check whether the returned skb_trimmed is a clone. Fixes: 9afd85c9e455 ("net: Export IGMP/MLD message validation code") Reported-by: Brenden Blanco Signed-off-by: Linus Lüssing Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 4 ++-- net/core/skbuff.c | 37 ++++++++++++++++++------------------- net/ipv4/igmp.c | 33 ++++++++++++++++++--------------- net/ipv6/mcast_snoop.c | 33 ++++++++++++++++++--------------- 4 files changed, 56 insertions(+), 51 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 0b39dcc65b94..1285eaf5dc22 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1591,7 +1591,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, break; } - if (skb_trimmed) + if (skb_trimmed && skb_trimmed != skb) kfree_skb(skb_trimmed); return err; @@ -1636,7 +1636,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, break; } - if (skb_trimmed) + if (skb_trimmed && skb_trimmed != skb) kfree_skb(skb_trimmed); return err; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b6a19ca0f99e..bf9a5d93c2d1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4022,8 +4022,8 @@ EXPORT_SYMBOL(skb_checksum_setup); * Otherwise returns the provided skb. Returns NULL in error cases * (e.g. transport_len exceeds skb length or out-of-memory). * - * Caller needs to set the skb transport header and release the returned skb. - * Provided skb is consumed. + * Caller needs to set the skb transport header and free any returned skb if it + * differs from the provided skb. */ static struct sk_buff *skb_checksum_maybe_trim(struct sk_buff *skb, unsigned int transport_len) @@ -4032,16 +4032,12 @@ static struct sk_buff *skb_checksum_maybe_trim(struct sk_buff *skb, unsigned int len = skb_transport_offset(skb) + transport_len; int ret; - if (skb->len < len) { - kfree_skb(skb); + if (skb->len < len) return NULL; - } else if (skb->len == len) { + else if (skb->len == len) return skb; - } skb_chk = skb_clone(skb, GFP_ATOMIC); - kfree_skb(skb); - if (!skb_chk) return NULL; @@ -4066,8 +4062,8 @@ static struct sk_buff *skb_checksum_maybe_trim(struct sk_buff *skb, * If the skb has data beyond the given transport length, then a * trimmed & cloned skb is checked and returned. * - * Caller needs to set the skb transport header and release the returned skb. - * Provided skb is consumed. + * Caller needs to set the skb transport header and free any returned skb if it + * differs from the provided skb. */ struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb, unsigned int transport_len, @@ -4079,23 +4075,26 @@ struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb, skb_chk = skb_checksum_maybe_trim(skb, transport_len); if (!skb_chk) - return NULL; + goto err; - if (!pskb_may_pull(skb_chk, offset)) { - kfree_skb(skb_chk); - return NULL; - } + if (!pskb_may_pull(skb_chk, offset)) + goto err; __skb_pull(skb_chk, offset); ret = skb_chkf(skb_chk); __skb_push(skb_chk, offset); - if (ret) { - kfree_skb(skb_chk); - return NULL; - } + if (ret) + goto err; return skb_chk; + +err: + if (skb_chk && skb_chk != skb) + kfree_skb(skb_chk); + + return NULL; + } EXPORT_SYMBOL(skb_checksum_trimmed); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 651cdf648ec4..9fdfd9deac11 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1435,33 +1435,35 @@ static int __ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed) struct sk_buff *skb_chk; unsigned int transport_len; unsigned int len = skb_transport_offset(skb) + sizeof(struct igmphdr); - int ret; + int ret = -EINVAL; transport_len = ntohs(ip_hdr(skb)->tot_len) - ip_hdrlen(skb); - skb_get(skb); skb_chk = skb_checksum_trimmed(skb, transport_len, ip_mc_validate_checksum); if (!skb_chk) - return -EINVAL; + goto err; - if (!pskb_may_pull(skb_chk, len)) { - kfree_skb(skb_chk); - return -EINVAL; - } + if (!pskb_may_pull(skb_chk, len)) + goto err; ret = ip_mc_check_igmp_msg(skb_chk); - if (ret) { - kfree_skb(skb_chk); - return ret; - } + if (ret) + goto err; if (skb_trimmed) *skb_trimmed = skb_chk; - else + /* free now unneeded clone */ + else if (skb_chk != skb) kfree_skb(skb_chk); - return 0; + ret = 0; + +err: + if (ret && skb_chk && skb_chk != skb) + kfree_skb(skb_chk); + + return ret; } /** @@ -1470,7 +1472,7 @@ static int __ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed) * @skb_trimmed: to store an skb pointer trimmed to IPv4 packet tail (optional) * * Checks whether an IPv4 packet is a valid IGMP packet. If so sets - * skb network and transport headers accordingly and returns zero. + * skb transport header accordingly and returns zero. * * -EINVAL: A broken packet was detected, i.e. it violates some internet * standard @@ -1485,7 +1487,8 @@ static int __ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed) * to leave the original skb and its full frame unchanged (which might be * desirable for layer 2 frame jugglers). * - * The caller needs to release a reference count from any returned skb_trimmed. + * Caller needs to set the skb network header and free any returned skb if it + * differs from the provided skb. */ int ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed) { diff --git a/net/ipv6/mcast_snoop.c b/net/ipv6/mcast_snoop.c index df8afe5ab31e..9405b04eecc6 100644 --- a/net/ipv6/mcast_snoop.c +++ b/net/ipv6/mcast_snoop.c @@ -143,34 +143,36 @@ static int __ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff *skb_chk = NULL; unsigned int transport_len; unsigned int len = skb_transport_offset(skb) + sizeof(struct mld_msg); - int ret; + int ret = -EINVAL; transport_len = ntohs(ipv6_hdr(skb)->payload_len); transport_len -= skb_transport_offset(skb) - sizeof(struct ipv6hdr); - skb_get(skb); skb_chk = skb_checksum_trimmed(skb, transport_len, ipv6_mc_validate_checksum); if (!skb_chk) - return -EINVAL; + goto err; - if (!pskb_may_pull(skb_chk, len)) { - kfree_skb(skb_chk); - return -EINVAL; - } + if (!pskb_may_pull(skb_chk, len)) + goto err; ret = ipv6_mc_check_mld_msg(skb_chk); - if (ret) { - kfree_skb(skb_chk); - return ret; - } + if (ret) + goto err; if (skb_trimmed) *skb_trimmed = skb_chk; - else + /* free now unneeded clone */ + else if (skb_chk != skb) kfree_skb(skb_chk); - return 0; + ret = 0; + +err: + if (ret && skb_chk && skb_chk != skb) + kfree_skb(skb_chk); + + return ret; } /** @@ -179,7 +181,7 @@ static int __ipv6_mc_check_mld(struct sk_buff *skb, * @skb_trimmed: to store an skb pointer trimmed to IPv6 packet tail (optional) * * Checks whether an IPv6 packet is a valid MLD packet. If so sets - * skb network and transport headers accordingly and returns zero. + * skb transport header accordingly and returns zero. * * -EINVAL: A broken packet was detected, i.e. it violates some internet * standard @@ -194,7 +196,8 @@ static int __ipv6_mc_check_mld(struct sk_buff *skb, * to leave the original skb and its full frame unchanged (which might be * desirable for layer 2 frame jugglers). * - * The caller needs to release a reference count from any returned skb_trimmed. + * Caller needs to set the skb network header and free any returned skb if it + * differs from the provided skb. */ int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed) { From 25b97c016b26039982daaa2c11d83979f93b71ab Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Thu, 13 Aug 2015 20:49:01 +0100 Subject: [PATCH 03/17] ipv4: off-by-one in continuation handling in /proc/net/route When generating /proc/net/route we emit a header followed by a line for each route. When a short read is performed we will restart this process based on the open file descriptor. When calculating the start point we fail to take into account that the 0th entry is the header. This leads us to skip the first entry when doing a continuation read. This can be easily seen with the comparison below: while read l; do echo "$l"; done A cat /proc/net/route >B diff -bu A B | grep '^[+-]' On my example machine I have approximatly 10KB of route output. There we see the very first non-title element is lost in the while read case, and an entry around the 8K mark in the cat case: +wlan0 00000000 02021EAC 0003 0 0 400 00000000 0 0 0 -tun1 00C0AC0A 00000000 0001 0 0 950 00C0FFFF 0 0 0 Fix up the off-by-one when reaquiring position on continuation. Fixes: 8be33e955cb9 ("fib_trie: Fib walk rcu should take a tnode and key instead of a trie and a leaf") BugLink: http://bugs.launchpad.net/bugs/1483440 Acked-by: Alexander Duyck Signed-off-by: Andy Whitcroft Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 37c4bb89a708..b0c6258ffb79 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2465,7 +2465,7 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, key = l->key + 1; iter->pos++; - if (pos-- <= 0) + if (--pos <= 0) break; l = NULL; From 2a4eebf0c485d8e90bdd2e33e75c4b3b1e1673ac Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Thu, 13 Aug 2015 16:50:37 +0300 Subject: [PATCH 04/17] gianfar: Restore link state settings after MAC reset There are some MAC registers that need to be kept in sync with the link state parameters, see adjust_link(). However, after a MAC soft reset default values for these registers are assumed. In some cases (excepting if down/ if up for example) adjust_link() does not see that these values were reset to default because the priv->old* link parameters were left unchanged. So, reset the priv->old* link params as well during a MAC reset to let adjust_link() restore the MAC link settings to the actual link state values. Fixes following case, for example: Setting link to 100M, changing MTU (implies MAC reset), link state remains unchanged to 100M but MAC registers were reset to default (1G) breaking the connectivity w/ the PHY. Closing and re-opening the interface would restore the MAC link parameters to the correct values. Signed-off-by: Claudiu Manoil Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/gianfar.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 2b7610f341b0..10b3bbbbac8e 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -2102,6 +2102,11 @@ int startup_gfar(struct net_device *ndev) /* Start Rx/Tx DMA and enable the interrupts */ gfar_start(priv); + /* force link state update after mac reset */ + priv->oldlink = 0; + priv->oldspeed = 0; + priv->oldduplex = -1; + phy_start(priv->phydev); enable_napi(priv); From 83fccfc3940c4a2db90fd7e7079f5b465cd8c6af Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Aug 2015 15:44:51 -0700 Subject: [PATCH 05/17] inet: fix potential deadlock in reqsk_queue_unlink() When replacing del_timer() with del_timer_sync(), I introduced a deadlock condition : reqsk_queue_unlink() is called from inet_csk_reqsk_queue_drop() inet_csk_reqsk_queue_drop() can be called from many contexts, one being the timer handler itself (reqsk_timer_handler()). In this case, del_timer_sync() loops forever. Simple fix is to test if timer is pending. Fixes: 2235f2ac75fd ("inet: fix races with reqsk timers") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 05e3145f7dc3..134957159c27 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -593,7 +593,7 @@ static bool reqsk_queue_unlink(struct request_sock_queue *queue, } spin_unlock(&queue->syn_wait_lock); - if (del_timer_sync(&req->rsk_timer)) + if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer)) reqsk_put(req); return found; } From 5d37852bf7d48e5afb5238a658cc167e7b78b381 Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Thu, 13 Aug 2015 14:21:34 -0700 Subject: [PATCH 06/17] Revert "net: limit tcp/udp rmem/wmem to SOCK_{RCV,SND}BUF_MIN" Commit 8133534c760d4083 ("net: limit tcp/udp rmem/wmem to SOCK_{RCV,SND}BUF_MIN") modified four sysctls to enforce that the values written to them are not less than SOCK_MIN_{RCV,SND}BUF. That change causes 4096 to no longer be accepted as a valid value for 'min' in tcp_wmem and udp_wmem_min. 4096 has been the default for both of those sysctls for a long time, and unfortunately seems to be an extremely popular setting. This change breaks a large number of sysctl configurations at Facebook. That commit referred to b1cb59cf2efe7971 ("net: sysctl_net_core: check SNDBUF and RCVBUF for min length"), which choose to use the SOCK_MIN constants as the lower limits to avoid nasty bugs. But AFAICS, a limit of SOCK_MIN_SNDBUF isn't necessary to do that: the BUG_ON cited in the commit message seems to have happened because unix_stream_sendmsg() expects a minimum of a full page (ie SK_MEM_QUANTUM) and the math broke, not because it had less than SOCK_MIN_SNDBUF allocated. This particular issue doesn't seem to affect TCP however: using a setting of "1 1 1" for tcp_{r,w}mem works, although it's obviously suboptimal. SK_MEM_QUANTUM would be a nice minimum, but it's 64K on some archs, so there would still be breakage. Since a value of one doesn't seem to cause any problems, we can drop the minimum 8133534c added to fix this. This reverts commit 8133534c760d4083f79d2cde42c636ccc0b2792e. Fixes: 8133534c760d4083 ("net: limit tcp/udp rmem/wmem to SOCK_MIN...") Cc: Eric Dumazet Cc: Sorin Dumitru Signed-off-by: Calvin Owens Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/sysctl_net_ipv4.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 433231ccfb17..0330ab2e2b63 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -41,8 +41,6 @@ static int tcp_syn_retries_min = 1; static int tcp_syn_retries_max = MAX_TCP_SYNCNT; static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; -static int min_sndbuf = SOCK_MIN_SNDBUF; -static int min_rcvbuf = SOCK_MIN_RCVBUF; /* Update system visible IP port range */ static void set_local_port_range(struct net *net, int range[2]) @@ -530,7 +528,7 @@ static struct ctl_table ipv4_table[] = { .maxlen = sizeof(sysctl_tcp_wmem), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &min_sndbuf, + .extra1 = &one, }, { .procname = "tcp_notsent_lowat", @@ -545,7 +543,7 @@ static struct ctl_table ipv4_table[] = { .maxlen = sizeof(sysctl_tcp_rmem), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &min_rcvbuf, + .extra1 = &one, }, { .procname = "tcp_app_win", @@ -758,7 +756,7 @@ static struct ctl_table ipv4_table[] = { .maxlen = sizeof(sysctl_udp_rmem_min), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &min_rcvbuf, + .extra1 = &one }, { .procname = "udp_wmem_min", @@ -766,7 +764,7 @@ static struct ctl_table ipv4_table[] = { .maxlen = sizeof(sysctl_udp_wmem_min), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &min_sndbuf, + .extra1 = &one }, { } }; From 11e122cbe90ea5079622fb57bdf2dffe8cf68e57 Mon Sep 17 00:00:00 2001 From: Shaohui Xie Date: Fri, 14 Aug 2015 12:23:40 +0800 Subject: [PATCH 07/17] net: phy: fix PHY_RUNNING in phy_state_machine Currently, if phy state is PHY_RUNNING, we always register a CHANGE when phy works in polling or interrupt ignored, this will make the adjust_link being called even the phy link did Not changed. checking the phy link to make sure the link did changed before we register a CHANGE, if link did not changed, we do nothing. Signed-off-by: Shaohui Xie Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index b2197b506acb..1e1fbb049ec6 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -811,6 +811,7 @@ void phy_state_machine(struct work_struct *work) bool needs_aneg = false, do_suspend = false; enum phy_state old_state; int err = 0; + int old_link; mutex_lock(&phydev->lock); @@ -896,11 +897,18 @@ void phy_state_machine(struct work_struct *work) phydev->adjust_link(phydev->attached_dev); break; case PHY_RUNNING: - /* Only register a CHANGE if we are - * polling or ignoring interrupts + /* Only register a CHANGE if we are polling or ignoring + * interrupts and link changed since latest checking. */ - if (!phy_interrupt_is_valid(phydev)) - phydev->state = PHY_CHANGELINK; + if (!phy_interrupt_is_valid(phydev)) { + old_link = phydev->link; + err = phy_read_status(phydev); + if (err) + break; + + if (old_link != phydev->link) + phydev->state = PHY_CHANGELINK; + } break; case PHY_CHANGELINK: err = phy_read_status(phydev); From 8cb775bc0a34dc596837e7da03fd22c747be618b Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 14 Aug 2015 10:42:56 +0200 Subject: [PATCH 08/17] ppp: fix device unregistration upon netns deletion PPP devices may get automatically unregistered when their network namespace is getting removed. This happens if the ppp control plane daemon (e.g. pppd) exits while it is the last user of this namespace. This leads to several races: * ppp_exit_net() may destroy the per namespace idr (pn->units_idr) before all file descriptors were released. Successive ppp_release() calls may then cleanup PPP devices with ppp_shutdown_interface() and try to use the already destroyed idr. * Automatic device unregistration may also happen before the ppp_release() call for that device gets executed. Once called on the file owning the device, ppp_release() will then clean it up and try to unregister it a second time. To fix these issues, operations defined in ppp_shutdown_interface() are moved to the PPP device's ndo_uninit() callback. This allows PPP devices to be properly cleaned up by unregister_netdev() and friends. So checking for ppp->owner is now an accurate test to decide if a PPP device should be unregistered. Setting ppp->owner is done in ppp_create_interface(), before device registration, in order to avoid unprotected modification of this field. Finally, ppp_exit_net() now starts by unregistering all remaining PPP devices to ensure that none will get unregistered after the call to idr_destroy(). Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- drivers/net/ppp/ppp_generic.c | 78 +++++++++++++++++++---------------- 1 file changed, 42 insertions(+), 36 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 9d15566521a7..fa8f5046afe9 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -269,9 +269,9 @@ static void ppp_ccp_peek(struct ppp *ppp, struct sk_buff *skb, int inbound); static void ppp_ccp_closed(struct ppp *ppp); static struct compressor *find_compressor(int type); static void ppp_get_stats(struct ppp *ppp, struct ppp_stats *st); -static struct ppp *ppp_create_interface(struct net *net, int unit, int *retp); +static struct ppp *ppp_create_interface(struct net *net, int unit, + struct file *file, int *retp); static void init_ppp_file(struct ppp_file *pf, int kind); -static void ppp_shutdown_interface(struct ppp *ppp); static void ppp_destroy_interface(struct ppp *ppp); static struct ppp *ppp_find_unit(struct ppp_net *pn, int unit); static struct channel *ppp_find_channel(struct ppp_net *pn, int unit); @@ -392,8 +392,10 @@ static int ppp_release(struct inode *unused, struct file *file) file->private_data = NULL; if (pf->kind == INTERFACE) { ppp = PF_TO_PPP(pf); + rtnl_lock(); if (file == ppp->owner) - ppp_shutdown_interface(ppp); + unregister_netdevice(ppp->dev); + rtnl_unlock(); } if (atomic_dec_and_test(&pf->refcnt)) { switch (pf->kind) { @@ -593,8 +595,10 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) mutex_lock(&ppp_mutex); if (pf->kind == INTERFACE) { ppp = PF_TO_PPP(pf); + rtnl_lock(); if (file == ppp->owner) - ppp_shutdown_interface(ppp); + unregister_netdevice(ppp->dev); + rtnl_unlock(); } if (atomic_long_read(&file->f_count) < 2) { ppp_release(NULL, file); @@ -838,11 +842,10 @@ static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf, /* Create a new ppp unit */ if (get_user(unit, p)) break; - ppp = ppp_create_interface(net, unit, &err); + ppp = ppp_create_interface(net, unit, file, &err); if (!ppp) break; file->private_data = &ppp->file; - ppp->owner = file; err = -EFAULT; if (put_user(ppp->file.index, p)) break; @@ -916,6 +919,16 @@ static __net_init int ppp_init_net(struct net *net) static __net_exit void ppp_exit_net(struct net *net) { struct ppp_net *pn = net_generic(net, ppp_net_id); + struct ppp *ppp; + LIST_HEAD(list); + int id; + + rtnl_lock(); + idr_for_each_entry(&pn->units_idr, ppp, id) + unregister_netdevice_queue(ppp->dev, &list); + + unregister_netdevice_many(&list); + rtnl_unlock(); idr_destroy(&pn->units_idr); } @@ -1088,8 +1101,28 @@ static int ppp_dev_init(struct net_device *dev) return 0; } +static void ppp_dev_uninit(struct net_device *dev) +{ + struct ppp *ppp = netdev_priv(dev); + struct ppp_net *pn = ppp_pernet(ppp->ppp_net); + + ppp_lock(ppp); + ppp->closing = 1; + ppp_unlock(ppp); + + mutex_lock(&pn->all_ppp_mutex); + unit_put(&pn->units_idr, ppp->file.index); + mutex_unlock(&pn->all_ppp_mutex); + + ppp->owner = NULL; + + ppp->file.dead = 1; + wake_up_interruptible(&ppp->file.rwait); +} + static const struct net_device_ops ppp_netdev_ops = { .ndo_init = ppp_dev_init, + .ndo_uninit = ppp_dev_uninit, .ndo_start_xmit = ppp_start_xmit, .ndo_do_ioctl = ppp_net_ioctl, .ndo_get_stats64 = ppp_get_stats64, @@ -2667,8 +2700,8 @@ ppp_get_stats(struct ppp *ppp, struct ppp_stats *st) * or if there is already a unit with the requested number. * unit == -1 means allocate a new number. */ -static struct ppp * -ppp_create_interface(struct net *net, int unit, int *retp) +static struct ppp *ppp_create_interface(struct net *net, int unit, + struct file *file, int *retp) { struct ppp *ppp; struct ppp_net *pn; @@ -2688,6 +2721,7 @@ ppp_create_interface(struct net *net, int unit, int *retp) ppp->mru = PPP_MRU; init_ppp_file(&ppp->file, INTERFACE); ppp->file.hdrlen = PPP_HDRLEN - 2; /* don't count proto bytes */ + ppp->owner = file; for (i = 0; i < NUM_NP; ++i) ppp->npmode[i] = NPMODE_PASS; INIT_LIST_HEAD(&ppp->channels); @@ -2775,34 +2809,6 @@ init_ppp_file(struct ppp_file *pf, int kind) init_waitqueue_head(&pf->rwait); } -/* - * Take down a ppp interface unit - called when the owning file - * (the one that created the unit) is closed or detached. - */ -static void ppp_shutdown_interface(struct ppp *ppp) -{ - struct ppp_net *pn; - - pn = ppp_pernet(ppp->ppp_net); - mutex_lock(&pn->all_ppp_mutex); - - /* This will call dev_close() for us. */ - ppp_lock(ppp); - if (!ppp->closing) { - ppp->closing = 1; - ppp_unlock(ppp); - unregister_netdev(ppp->dev); - unit_put(&pn->units_idr, ppp->file.index); - } else - ppp_unlock(ppp); - - ppp->file.dead = 1; - ppp->owner = NULL; - wake_up_interruptible(&ppp->file.rwait); - - mutex_unlock(&pn->all_ppp_mutex); -} - /* * Free the memory used by a ppp unit. This is only called once * there are no channels connected to the unit and no file structs From 2902bc66fa7a6d959e033e5358fd836e2839b5db Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 14 Aug 2015 11:54:59 +0300 Subject: [PATCH 09/17] net: ethernet: micrel: fix an error code The dma_mapping_error() function returns true or false. We should return -ENOMEM if it there is a dma mapping error. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/micrel/ks8842.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8842.c b/drivers/net/ethernet/micrel/ks8842.c index f78909a00f15..09d2e16fd6b0 100644 --- a/drivers/net/ethernet/micrel/ks8842.c +++ b/drivers/net/ethernet/micrel/ks8842.c @@ -952,9 +952,8 @@ static int ks8842_alloc_dma_bufs(struct net_device *netdev) sg_dma_address(&tx_ctl->sg) = dma_map_single(adapter->dev, tx_ctl->buf, DMA_BUFFER_SIZE, DMA_TO_DEVICE); - err = dma_mapping_error(adapter->dev, - sg_dma_address(&tx_ctl->sg)); - if (err) { + if (dma_mapping_error(adapter->dev, sg_dma_address(&tx_ctl->sg))) { + err = -ENOMEM; sg_dma_address(&tx_ctl->sg) = 0; goto err; } From 776829de90c5972895db398993ddfa9417ff8b01 Mon Sep 17 00:00:00 2001 From: Igor Plyatov Date: Fri, 14 Aug 2015 20:11:02 +0300 Subject: [PATCH 10/17] net: phy: workaround for buggy cable detection by LAN8700 after cable plugging * Due to HW bug, LAN8700 sometimes does not detect presence of energy in the Ethernet cable in Energy Detect Power-Down mode (e.g while EDPWRDOWN bit is set, the ENERGYON bit does not asserted sometimes). This is a common bug of LAN87xx family of PHY chips. * The lan87xx_read_status() was improved to acquire ENERGYON bit. Its previous algorythm still not reliable on 100 % and sometimes skip cable plugging. Signed-off-by: Igor Plyatov Signed-off-by: David S. Miller --- drivers/net/phy/smsc.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index c0f6479e19d4..d64f01623bd1 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -91,19 +91,18 @@ static int lan911x_config_init(struct phy_device *phydev) } /* - * The LAN8710/LAN8720 requires a minimum of 2 link pulses within 64ms of each - * other in order to set the ENERGYON bit and exit EDPD mode. If a link partner - * does send the pulses within this interval, the PHY will remained powered - * down. - * - * This workaround will manually toggle the PHY on/off upon calls to read_status - * in order to generate link test pulses if the link is down. If a link partner - * is present, it will respond to the pulses, which will cause the ENERGYON bit - * to be set and will cause the EDPD mode to be exited. + * The LAN87xx suffers from rare absence of the ENERGYON-bit when Ethernet cable + * plugs in while LAN87xx is in Energy Detect Power-Down mode. This leads to + * unstable detection of plugging in Ethernet cable. + * This workaround disables Energy Detect Power-Down mode and waiting for + * response on link pulses to detect presence of plugged Ethernet cable. + * The Energy Detect Power-Down mode is enabled again in the end of procedure to + * save approximately 220 mW of power if cable is unplugged. */ static int lan87xx_read_status(struct phy_device *phydev) { int err = genphy_read_status(phydev); + int i; if (!phydev->link) { /* Disable EDPD to wake up PHY */ @@ -116,8 +115,16 @@ static int lan87xx_read_status(struct phy_device *phydev) if (rc < 0) return rc; - /* Sleep 64 ms to allow ~5 link test pulses to be sent */ - msleep(64); + /* Wait max 640 ms to detect energy */ + for (i = 0; i < 64; i++) { + /* Sleep to allow link test pulses to be sent */ + msleep(10); + rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS); + if (rc < 0) + return rc; + if (rc & MII_LAN83C185_ENERGYON) + break; + }; /* Re-enable EDPD */ rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS); @@ -191,7 +198,7 @@ static struct phy_driver smsc_phy_driver[] = { /* basic functions */ .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, + .read_status = lan87xx_read_status, .config_init = smsc_phy_config_init, .soft_reset = smsc_phy_reset, From ad706862890171e02df1d7391b05599fb676ec18 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 14 Aug 2015 11:05:52 -0700 Subject: [PATCH 11/17] ipv6: Remove un-used argument from ip6_dst_alloc() After 4b32b5ad31a6 ("ipv6: Stop rt6_info from using inet_peer's metrics"), ip6_dst_alloc() does not need the 'table' argument. This patch cleans it up. Signed-off-by: Martin KaFai Lau CC: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/route.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9de4d2bcd916..c95c3197c186 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -318,8 +318,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = { /* allocate dst with ip6_dst_ops */ static struct rt6_info *__ip6_dst_alloc(struct net *net, struct net_device *dev, - int flags, - struct fib6_table *table) + int flags) { struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0, DST_OBSOLETE_FORCE_CHK, flags); @@ -336,10 +335,9 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net, static struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, - int flags, - struct fib6_table *table) + int flags) { - struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags, table); + struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags); if (rt) { rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC); @@ -950,8 +948,7 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) ort = (struct rt6_info *)ort->dst.from; - rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, - 0, ort->rt6i_table); + rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0); if (!rt) return NULL; @@ -983,8 +980,7 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt) struct rt6_info *pcpu_rt; pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev), - rt->dst.dev, rt->dst.flags, - rt->rt6i_table); + rt->dst.dev, rt->dst.flags); if (!pcpu_rt) return NULL; @@ -1555,7 +1551,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, if (unlikely(!idev)) return ERR_PTR(-ENODEV); - rt = ip6_dst_alloc(net, dev, 0, NULL); + rt = ip6_dst_alloc(net, dev, 0); if (unlikely(!rt)) { in6_dev_put(idev); dst = ERR_PTR(-ENOMEM); @@ -1742,7 +1738,8 @@ int ip6_route_add(struct fib6_config *cfg) if (!table) goto out; - rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table); + rt = ip6_dst_alloc(net, NULL, + (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT); if (!rt) { err = -ENOMEM; @@ -2399,7 +2396,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, { struct net *net = dev_net(idev->dev); struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, - DST_NOCOUNT, NULL); + DST_NOCOUNT); if (!rt) return ERR_PTR(-ENOMEM); From a73e4195636c17f310b8530643a576f42b82385f Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 14 Aug 2015 11:05:53 -0700 Subject: [PATCH 12/17] ipv6: Add rt6_make_pcpu_route() It is a prep work for fixing a potential deadlock when creating a pcpu rt. The current rt6_get_pcpu_route() will also create a pcpu rt if one does not exist. This patch moves the pcpu rt creation logic into another function, rt6_make_pcpu_route(). Signed-off-by: Martin KaFai Lau CC: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/route.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c95c3197c186..0a82653efc88 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -993,13 +993,21 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt) /* It should be called with read_lock_bh(&tb6_lock) acquired */ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) { - struct rt6_info *pcpu_rt, *prev, **p; + struct rt6_info *pcpu_rt, **p; p = this_cpu_ptr(rt->rt6i_pcpu); pcpu_rt = *p; - if (pcpu_rt) - goto done; + if (pcpu_rt) { + dst_hold(&pcpu_rt->dst); + rt6_dst_from_metrics_check(pcpu_rt); + } + return pcpu_rt; +} + +static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) +{ + struct rt6_info *pcpu_rt, *prev, **p; pcpu_rt = ip6_rt_pcpu_alloc(rt); if (!pcpu_rt) { @@ -1009,6 +1017,7 @@ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) goto done; } + p = this_cpu_ptr(rt->rt6i_pcpu); prev = cmpxchg(p, NULL, pcpu_rt); if (prev) { /* If someone did it before us, return prev instead */ @@ -1093,8 +1102,11 @@ redo_rt6_select: rt->dst.lastuse = jiffies; rt->dst.__use++; pcpu_rt = rt6_get_pcpu_route(rt); - read_unlock_bh(&table->tb6_lock); + if (!pcpu_rt) + pcpu_rt = rt6_make_pcpu_route(rt); + + read_unlock_bh(&table->tb6_lock); return pcpu_rt; } } From 9c7370a166b4e157137bfbfe2ad296d57147547c Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 14 Aug 2015 11:05:54 -0700 Subject: [PATCH 13/17] ipv6: Fix a potential deadlock when creating pcpu rt rt6_make_pcpu_route() is called under read_lock(&table->tb6_lock). rt6_make_pcpu_route() calls ip6_rt_pcpu_alloc(rt) which then calls dst_alloc(). dst_alloc() _may_ call ip6_dst_gc() which takes the write_lock(&tabl->tb6_lock). A visualized version: read_lock(&table->tb6_lock); rt6_make_pcpu_route(); => ip6_rt_pcpu_alloc(); => dst_alloc(); => ip6_dst_gc(); => write_lock(&table->tb6_lock); /* oops */ The fix is to do a read_unlock first before calling ip6_rt_pcpu_alloc(). A reported stack: [141625.537638] INFO: rcu_sched self-detected stall on CPU { 27} (t=60000 jiffies g=4159086 c=4159085 q=2139) [141625.547469] Task dump for CPU 27: [141625.550881] mtr R running task 0 22121 22081 0x00000008 [141625.558069] 0000000000000000 ffff88103f363d98 ffffffff8106e488 000000000000001b [141625.565641] ffffffff81684900 ffff88103f363db8 ffffffff810702b0 0000000008000000 [141625.573220] ffffffff81684900 ffff88103f363de8 ffffffff8108df9f ffff88103f375a00 [141625.580803] Call Trace: [141625.583345] [] sched_show_task+0xc1/0xc6 [141625.589650] [] dump_cpu_task+0x35/0x39 [141625.595144] [] rcu_dump_cpu_stacks+0x6a/0x8c [141625.601320] [] rcu_check_callbacks+0x1f6/0x5d4 [141625.607669] [] update_process_times+0x2a/0x4f [141625.613925] [] tick_sched_handle+0x32/0x3e [141625.619923] [] tick_sched_timer+0x35/0x5c [141625.625830] [] __hrtimer_run_queues+0x8f/0x18d [141625.632171] [] hrtimer_interrupt+0xa0/0x166 [141625.638258] [] local_apic_timer_interrupt+0x4e/0x52 [141625.645036] [] smp_apic_timer_interrupt+0x39/0x4a [141625.651643] [] apic_timer_interrupt+0x68/0x70 [141625.657895] [] ? dst_destroy+0x7c/0xb5 [141625.664188] [] ? fib6_flush_trees+0x20/0x20 [141625.670272] [] ? queue_write_lock_slowpath+0x60/0x6f [141625.677140] [] _raw_write_lock_bh+0x23/0x25 [141625.683218] [] __fib6_clean_all+0x40/0x82 [141625.689124] [] ? fib6_flush_trees+0x20/0x20 [141625.695207] [] fib6_clean_all+0xe/0x10 [141625.700854] [] fib6_run_gc+0x79/0xc8 [141625.706329] [] ip6_dst_gc+0x85/0xf9 [141625.711718] [] dst_alloc+0x55/0x159 [141625.717105] [] __ip6_dst_alloc.isra.32+0x19/0x63 [141625.723620] [] ip6_pol_route+0x36a/0x3e8 [141625.729441] [] ip6_pol_route_output+0x11/0x13 [141625.735700] [] fib6_rule_action+0xa7/0x1bf [141625.741698] [] ? ip6_pol_route_input+0x17/0x17 [141625.748043] [] fib_rules_lookup+0xb5/0x12a [141625.754050] [] ? poll_select_copy_remaining+0xf9/0xf9 [141625.761002] [] fib6_rule_lookup+0x37/0x5c [141625.766914] [] ? ip6_pol_route_input+0x17/0x17 [141625.773260] [] ip6_route_output+0x7a/0x82 [141625.779177] [] ip6_dst_lookup_tail+0x53/0x112 [141625.785437] [] ip6_dst_lookup_flow+0x2a/0x6b [141625.791604] [] rawv6_sendmsg+0x407/0x9b6 [141625.797423] [] ? do_ipv6_setsockopt.isra.8+0xd87/0xde2 [141625.804464] [] inet_sendmsg+0x57/0x8e [141625.810028] [] sock_sendmsg+0x2e/0x3c [141625.815588] [] SyS_sendto+0xfe/0x143 [141625.821063] [] ? rawv6_setsockopt+0x5e/0x67 [141625.827146] [] ? sock_common_setsockopt+0xf/0x11 [141625.833660] [] ? SyS_setsockopt+0x81/0xa2 [141625.839565] [] entry_SYSCALL_64_fastpath+0x12/0x6a Fixes: d52d3997f843 ("pv6: Create percpu rt6_info") Signed-off-by: Martin KaFai Lau CC: Hannes Frederic Sowa Reported-by: Steinar H. Gunderson Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 2 ++ net/ipv6/route.c | 44 +++++++++++++++++++++++++++++++++----------- 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 55d19861ab20..548c6237b1e7 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -172,6 +172,8 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) *ppcpu_rt = NULL; } } + + non_pcpu_rt->rt6i_pcpu = NULL; } static void rt6_release(struct rt6_info *rt) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0a82653efc88..d15586490cec 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1007,27 +1007,39 @@ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) { + struct fib6_table *table = rt->rt6i_table; struct rt6_info *pcpu_rt, *prev, **p; pcpu_rt = ip6_rt_pcpu_alloc(rt); if (!pcpu_rt) { struct net *net = dev_net(rt->dst.dev); - pcpu_rt = net->ipv6.ip6_null_entry; - goto done; + dst_hold(&net->ipv6.ip6_null_entry->dst); + return net->ipv6.ip6_null_entry; } - p = this_cpu_ptr(rt->rt6i_pcpu); - prev = cmpxchg(p, NULL, pcpu_rt); - if (prev) { - /* If someone did it before us, return prev instead */ + read_lock_bh(&table->tb6_lock); + if (rt->rt6i_pcpu) { + p = this_cpu_ptr(rt->rt6i_pcpu); + prev = cmpxchg(p, NULL, pcpu_rt); + if (prev) { + /* If someone did it before us, return prev instead */ + dst_destroy(&pcpu_rt->dst); + pcpu_rt = prev; + } + } else { + /* rt has been removed from the fib6 tree + * before we have a chance to acquire the read_lock. + * In this case, don't brother to create a pcpu rt + * since rt is going away anyway. The next + * dst_check() will trigger a re-lookup. + */ dst_destroy(&pcpu_rt->dst); - pcpu_rt = prev; + pcpu_rt = rt; } - -done: dst_hold(&pcpu_rt->dst); rt6_dst_from_metrics_check(pcpu_rt); + read_unlock_bh(&table->tb6_lock); return pcpu_rt; } @@ -1103,11 +1115,21 @@ redo_rt6_select: rt->dst.__use++; pcpu_rt = rt6_get_pcpu_route(rt); - if (!pcpu_rt) + if (pcpu_rt) { + read_unlock_bh(&table->tb6_lock); + } else { + /* We have to do the read_unlock first + * because rt6_make_pcpu_route() may trigger + * ip6_dst_gc() which will take the write_lock. + */ + dst_hold(&rt->dst); + read_unlock_bh(&table->tb6_lock); pcpu_rt = rt6_make_pcpu_route(rt); + dst_release(&rt->dst); + } - read_unlock_bh(&table->tb6_lock); return pcpu_rt; + } } From af19e68683ba9c29e778d0f8be5be61a0ebb4166 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 14 Aug 2015 22:30:01 +0200 Subject: [PATCH 14/17] be2net: avoid vxlan offloading on multichannel configs VxLAN offloading is not functional if the NIC is running in multichannel mode (UMC, FLEX-10, VNIC...). Enabling this additionally kills whole connectivity through the NIC and the device needs to be down and up to restore it. The firmware should take care about it and does not allow the conversion of interface to tunnel type (be_cmd_manage_iface) or should support VxLAN offloading if multichannel config is enabled. I have tested this on the latest available firmware (10.6.144.21). Result: [root@sm-04 ~]# ip link set enp5s0f0 up[root@sm-04 ~]# ip addr add 172.30.10.50/24 dev enp5s0f0 [root@sm-04 ~]# ping -c 3 172.30.10.254PING 172.30.10.254 (172.30.10.254) 56(84) bytes of data. 64 bytes from 172.30.10.254: icmp_seq=1 ttl=64 time=0.317 ms 64 bytes from 172.30.10.254: icmp_seq=2 ttl=64 time=0.187 ms 64 bytes from 172.30.10.254: icmp_seq=3 ttl=64 time=0.188 ms --- 172.30.10.254 ping statistics --- 3 packets transmitted, 3 received, 0% packet loss, time 2000ms rtt min/avg/max/mdev = 0.187/0.230/0.317/0.063 ms [root@sm-04 ~]# ip link add link enp5s0f0 vxlan10 type vxlan id 10 remote 172.30.10.60 dstport 4789 [root@sm-04 ~]# ip link set vxlan10 up [ 7900.442811] be2net 0000:05:00.0: Enabled VxLAN offloads for UDP port 4789 [ 7900.455722] be2net 0000:05:00.1: Enabled VxLAN offloads for UDP port 4789 [ 7900.468635] be2net 0000:05:00.2: Enabled VxLAN offloads for UDP port 4789 [ 7900.481553] be2net 0000:05:00.3: Enabled VxLAN offloads for UDP port 4789 [root@sm-04 ~]# ping -c 3 172.30.10.254 PING 172.30.10.254 (172.30.10.254) 56(84) bytes of data. --- 172.30.10.254 ping statistics --- 3 packets transmitted, 0 received, 100% packet loss, time 1999ms [root@sm-04 ~]# ip link set vxlan10 down [ 7959.434093] be2net 0000:05:00.0: Disabled VxLAN offloads for UDP port 4789 [ 7959.444792] be2net 0000:05:00.1: Disabled VxLAN offloads for UDP port 4789 [ 7959.455592] be2net 0000:05:00.2: Disabled VxLAN offloads for UDP port 4789 [ 7959.466416] be2net 0000:05:00.3: Disabled VxLAN offloads for UDP port 4789 [root@sm-04 ~]# ip link del vxlan10 [root@sm-04 ~]# ping -c 3 172.30.10.254 PING 172.30.10.254 (172.30.10.254) 56(84) bytes of data. --- 172.30.10.254 ping statistics --- 3 packets transmitted, 0 received, 100% packet loss, time 1999ms [root@sm-04 ~]# ip link set enp5s0f0 down [root@sm-04 ~]# ip link set enp5s0f0 up [ 8071.019003] be2net 0000:05:00.0 enp5s0f0: Link is Up [root@sm-04 ~]# ping -c 3 172.30.10.254 PING 172.30.10.254 (172.30.10.254) 56(84) bytes of data. 64 bytes from 172.30.10.254: icmp_seq=1 ttl=64 time=0.318 ms 64 bytes from 172.30.10.254: icmp_seq=2 ttl=64 time=0.196 ms 64 bytes from 172.30.10.254: icmp_seq=3 ttl=64 time=0.194 ms --- 172.30.10.254 ping statistics --- 3 packets transmitted, 3 received, 0% packet loss, time 2000ms rtt min/avg/max/mdev = 0.194/0.236/0.318/0.057 ms Cc: Sathya Perla Cc: Ajit Khaparde Cc: Padmanabh Ratnakar Cc: Sriharsha Basavapatna Signed-off-by: Ivan Vecera Acked-by: Ajit Khaparde Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index c28e3bfdccd7..6ca693b03f33 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -5174,7 +5174,7 @@ static void be_add_vxlan_port(struct net_device *netdev, sa_family_t sa_family, struct device *dev = &adapter->pdev->dev; int status; - if (lancer_chip(adapter) || BEx_chip(adapter)) + if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter)) return; if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) { @@ -5221,7 +5221,7 @@ static void be_del_vxlan_port(struct net_device *netdev, sa_family_t sa_family, { struct be_adapter *adapter = netdev_priv(netdev); - if (lancer_chip(adapter) || BEx_chip(adapter)) + if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter)) return; if (adapter->vxlan_port != port) From a8079092c1bbf9aec3756b35256c7816b8845af7 Mon Sep 17 00:00:00 2001 From: David Ward Date: Sat, 15 Aug 2015 20:12:30 -0400 Subject: [PATCH 15/17] net: qmi_wwan: add HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module This is an HP-branded Sierra Wireless EM7355: https://bugzilla.redhat.com/show_bug.cgi?id=1223646#c2 Signed-off-by: David Ward Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 9d43460ce3c7..64a60afbe50c 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -785,6 +785,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x413c, 0x81a4, 8)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ {QMI_FIXED_INTF(0x413c, 0x81a8, 8)}, /* Dell Wireless 5808 Gobi(TM) 4G LTE Mobile Broadband Card */ {QMI_FIXED_INTF(0x413c, 0x81a9, 8)}, /* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */ + {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */ {QMI_FIXED_INTF(0x03f0, 0x581d, 4)}, /* HP lt4112 LTE/HSPA+ Gobi 4G Module (Huawei me906e) */ /* 4. Gobi 1000 devices */ From ff94c742dfeea3110f1e1d27399d728f8494d29e Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Tue, 18 Aug 2015 06:31:42 +0800 Subject: [PATCH 16/17] net: phy: fix semicolon.cocci warnings drivers/net/phy/smsc.c:127:3-4: Unneeded semicolon Remove unneeded semicolon. Generated by: scripts/coccinelle/misc/semicolon.cocci CC: Igor Plyatov Signed-off-by: Fengguang Wu Signed-off-by: David S. Miller --- drivers/net/phy/smsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index d64f01623bd1..70b08958763a 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -124,7 +124,7 @@ static int lan87xx_read_status(struct phy_device *phydev) return rc; if (rc & MII_LAN83C185_ENERGYON) break; - }; + } /* Re-enable EDPD */ rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS); From fd7dec25a18f495e50d2040398fd263836ff3b28 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Tue, 18 Aug 2015 13:37:01 +0200 Subject: [PATCH 17/17] batman-adv: Fix memory leak on tt add with invalid vlan The object tt_local is allocated with kmalloc and not initialized when the function batadv_tt_local_add checks for the vlan. But this function can only cleanup the object when the (not yet initialized) reference counter of the object is 1. This is unlikely and thus the object would leak when the vlan could not be found. Instead the uninitialized object tt_local has to be freed manually and the pointer has to set to NULL to avoid calling the function which would try to decrement the reference counter of the not existing object. CID: 1316518 Fixes: 354136bcc3c4 ("batman-adv: fix kernel crash due to missing NULL checks") Signed-off-by: Sven Eckelmann Signed-off-by: David S. Miller --- net/batman-adv/translation-table.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 5e953297d3b2..5809b39c1922 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -595,8 +595,11 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, /* increase the refcounter of the related vlan */ vlan = batadv_softif_vlan_get(bat_priv, vid); if (WARN(!vlan, "adding TT local entry %pM to non-existent VLAN %d", - addr, BATADV_PRINT_VID(vid))) + addr, BATADV_PRINT_VID(vid))) { + kfree(tt_local); + tt_local = NULL; goto out; + } batadv_dbg(BATADV_DBG_TT, bat_priv, "Creating new local tt entry: %pM (vid: %d, ttvn: %d)\n",