From a7f1884554b81bd68cd435d72f09a3527629ac43 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 12 May 2016 14:43:54 +0200 Subject: [PATCH 01/85] netfilter: nfnetlink_queue: fix timestamp attribute Since 4.4 we erronously use timestamp of the netlink skb (which is zero). Bugzilla: https://bugzilla.netfilter.org/show_bug.cgi?id=1066 Fixes: b28b1e826f818c30ea7 ("netfilter: nfnetlink_queue: use y2038 safe timestamp") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index cb5b630a645b..e34256a0e7e9 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -499,7 +499,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (entskb->tstamp.tv64) { struct nfqnl_msg_packet_timestamp ts; - struct timespec64 kts = ktime_to_timespec64(skb->tstamp); + struct timespec64 kts = ktime_to_timespec64(entskb->tstamp); ts.sec = cpu_to_be64(kts.tv_sec); ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC); From 720b287d8382616a29524dd98264ac1e3a069e9f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 13 May 2016 22:48:51 +0200 Subject: [PATCH 02/85] netfilter: conntrack: remove leftover binary sysctl define Users got removed in f8572d8f2a2ba ("sysctl net: Remove unused binary sysctl code"). Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_standalone.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 0f1a45bcacb2..2933db30098a 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -489,8 +489,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { { } }; -#define NET_NF_CONNTRACK_MAX 2089 - static struct ctl_table nf_ct_netfilter_table[] = { { .procname = "nf_conntrack_max", From dc3ee32e96d74dd6c80eed63af5065cb75899299 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 13 May 2016 21:18:52 -0500 Subject: [PATCH 03/85] netfilter: nf_queue: Make the queue_handler pernet Florian Weber reported: > Under full load (unshare() in loop -> OOM conditions) we can > get kernel panic: > > BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 > IP: [] nfqnl_nf_hook_drop+0x35/0x70 > [..] > task: ffff88012dfa3840 ti: ffff88012dffc000 task.ti: ffff88012dffc000 > RIP: 0010:[] [] nfqnl_nf_hook_drop+0x35/0x70 > RSP: 0000:ffff88012dfffd80 EFLAGS: 00010206 > RAX: 0000000000000008 RBX: ffffffff81add0c0 RCX: ffff88013fd80000 > [..] > Call Trace: > [] nf_queue_nf_hook_drop+0x18/0x20 > [] nf_unregister_net_hook+0xdb/0x150 > [] netfilter_net_exit+0x2f/0x60 > [] ops_exit_list.isra.4+0x38/0x60 > [] setup_net+0xc2/0x120 > [] copy_net_ns+0x79/0x120 > [] create_new_namespaces+0x11b/0x1e0 > [] unshare_nsproxy_namespaces+0x57/0xa0 > [] SyS_unshare+0x1b2/0x340 > [] entry_SYSCALL_64_fastpath+0x1e/0xa8 > Code: 65 00 48 89 e5 41 56 41 55 41 54 53 83 e8 01 48 8b 97 70 12 00 00 48 98 49 89 f4 4c 8b 74 c2 18 4d 8d 6e 08 49 81 c6 88 00 00 00 <49> 8b 5d 00 48 85 db 74 1a 48 89 df 4c 89 e2 48 c7 c6 90 68 47 > The simple fix for this requires a new pernet variable for struct nf_queue that indicates when it is safe to use the dynamically allocated nf_queue state. As we need a variable anyway make nf_register_queue_handler and nf_unregister_queue_handler pernet. This allows the existing logic of when it is safe to use the state from the nfnetlink_queue module to be reused with no changes except for making it per net. The syncrhonize_rcu from nf_unregister_queue_handler is moved to a new function nfnl_queue_net_exit_batch so that the worst case of having a syncrhonize_rcu in the pernet exit path is not experienced in batch mode. Reported-by: Florian Westphal Signed-off-by: "Eric W. Biederman" Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_queue.h | 4 ++-- include/net/netns/netfilter.h | 2 ++ net/netfilter/nf_queue.c | 17 ++++++++--------- net/netfilter/nfnetlink_queue.c | 18 ++++++++++++------ 4 files changed, 24 insertions(+), 17 deletions(-) diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 9c5638ad872e..0dbce55437f2 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -28,8 +28,8 @@ struct nf_queue_handler { struct nf_hook_ops *ops); }; -void nf_register_queue_handler(const struct nf_queue_handler *qh); -void nf_unregister_queue_handler(void); +void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh); +void nf_unregister_queue_handler(struct net *net); void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); void nf_queue_entry_get_refs(struct nf_queue_entry *entry); diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 38aa4983e2a9..36d723579af2 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -5,11 +5,13 @@ struct proc_dir_entry; struct nf_logger; +struct nf_queue_handler; struct netns_nf { #if defined CONFIG_PROC_FS struct proc_dir_entry *proc_netfilter; #endif + const struct nf_queue_handler __rcu *queue_handler; const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO]; #ifdef CONFIG_SYSCTL struct ctl_table_header *nf_log_dir_header; diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 5baa8e24e6ac..b19ad20a705c 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -26,23 +26,21 @@ * Once the queue is registered it must reinject all packets it * receives, no matter what. */ -static const struct nf_queue_handler __rcu *queue_handler __read_mostly; /* return EBUSY when somebody else is registered, return EEXIST if the * same handler is registered, return 0 in case of success. */ -void nf_register_queue_handler(const struct nf_queue_handler *qh) +void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh) { /* should never happen, we only have one queueing backend in kernel */ - WARN_ON(rcu_access_pointer(queue_handler)); - rcu_assign_pointer(queue_handler, qh); + WARN_ON(rcu_access_pointer(net->nf.queue_handler)); + rcu_assign_pointer(net->nf.queue_handler, qh); } EXPORT_SYMBOL(nf_register_queue_handler); /* The caller must flush their queue before this */ -void nf_unregister_queue_handler(void) +void nf_unregister_queue_handler(struct net *net) { - RCU_INIT_POINTER(queue_handler, NULL); - synchronize_rcu(); + RCU_INIT_POINTER(net->nf.queue_handler, NULL); } EXPORT_SYMBOL(nf_unregister_queue_handler); @@ -103,7 +101,7 @@ void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops) const struct nf_queue_handler *qh; rcu_read_lock(); - qh = rcu_dereference(queue_handler); + qh = rcu_dereference(net->nf.queue_handler); if (qh) qh->nf_hook_drop(net, ops); rcu_read_unlock(); @@ -122,9 +120,10 @@ int nf_queue(struct sk_buff *skb, struct nf_queue_entry *entry = NULL; const struct nf_afinfo *afinfo; const struct nf_queue_handler *qh; + struct net *net = state->net; /* QUEUE == DROP if no one is waiting, to be safe. */ - qh = rcu_dereference(queue_handler); + qh = rcu_dereference(net->nf.queue_handler); if (!qh) { status = -ESRCH; goto err; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index e34256a0e7e9..309ac026aac2 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1377,21 +1377,29 @@ static int __net_init nfnl_queue_net_init(struct net *net) net->nf.proc_netfilter, &nfqnl_file_ops)) return -ENOMEM; #endif + nf_register_queue_handler(net, &nfqh); return 0; } static void __net_exit nfnl_queue_net_exit(struct net *net) { + nf_unregister_queue_handler(net); #ifdef CONFIG_PROC_FS remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter); #endif } +static void nfnl_queue_net_exit_batch(struct list_head *net_exit_list) +{ + synchronize_rcu(); +} + static struct pernet_operations nfnl_queue_net_ops = { - .init = nfnl_queue_net_init, - .exit = nfnl_queue_net_exit, - .id = &nfnl_queue_net_id, - .size = sizeof(struct nfnl_queue_net), + .init = nfnl_queue_net_init, + .exit = nfnl_queue_net_exit, + .exit_batch = nfnl_queue_net_exit_batch, + .id = &nfnl_queue_net_id, + .size = sizeof(struct nfnl_queue_net), }; static int __init nfnetlink_queue_init(void) @@ -1412,7 +1420,6 @@ static int __init nfnetlink_queue_init(void) } register_netdevice_notifier(&nfqnl_dev_notifier); - nf_register_queue_handler(&nfqh); return status; cleanup_netlink_notifier: @@ -1424,7 +1431,6 @@ out: static void __exit nfnetlink_queue_fini(void) { - nf_unregister_queue_handler(); unregister_netdevice_notifier(&nfqnl_dev_notifier); nfnetlink_subsys_unregister(&nfqnl_subsys); netlink_unregister_notifier(&nfqnl_rtnl_notifier); From 94abd778a7bb00ed5d00f56d9fbfcbf5b7c02a5c Mon Sep 17 00:00:00 2001 From: Jaap Jan Meijer Date: Thu, 12 May 2016 18:25:08 +0200 Subject: [PATCH 04/85] brcmfmac: add fallback for devices that do not report per-chain values If brcmf_cfg80211_get_station fails to determine the RSSI from the per-chain values get the value individually as a fallback. Fixes: 1f0dc59a6de9 ("brcmfmac: rework .get_station() callback") Signed-off-by: Jaap Jan Meijer Acked-by: Arend van Spriel Signed-off-by: Kalle Valo --- .../broadcom/brcm80211/brcmfmac/cfg80211.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index d0631b6cfd53..62f475e31077 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -2540,12 +2540,14 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev, const u8 *mac, struct station_info *sinfo) { struct brcmf_if *ifp = netdev_priv(ndev); + struct brcmf_scb_val_le scb_val; s32 err = 0; struct brcmf_sta_info_le sta_info_le; u32 sta_flags; u32 is_tdls_peer; s32 total_rssi; s32 count_rssi; + int rssi; u32 i; brcmf_dbg(TRACE, "Enter, MAC %pM\n", mac); @@ -2629,6 +2631,20 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev, sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL); total_rssi /= count_rssi; sinfo->signal = total_rssi; + } else if (test_bit(BRCMF_VIF_STATUS_CONNECTED, + &ifp->vif->sme_state)) { + memset(&scb_val, 0, sizeof(scb_val)); + err = brcmf_fil_cmd_data_get(ifp, BRCMF_C_GET_RSSI, + &scb_val, sizeof(scb_val)); + if (err) { + brcmf_err("Could not get rssi (%d)\n", err); + goto done; + } else { + rssi = le32_to_cpu(scb_val.val); + sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL); + sinfo->signal = rssi; + brcmf_dbg(CONN, "RSSI %d dBm\n", rssi); + } } } done: From de26859dcf363d520cc44e59f6dcaf20ebe0aadf Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sat, 21 May 2016 11:50:35 -0500 Subject: [PATCH 05/85] rtlwifi: Fix scheduling while atomic error from commit 49f86ec21c01 Commit 49f86ec21c01 ("rtlwifi: Change long delays to sleeps") was correct for most cases; however, driver rtl8192ce calls the affected routines while in atomic context. The kernel bug output is as follows: BUG: scheduling while atomic: wpa_supplicant/627/0x00000002 [...] [] __schedule+0x899/0xad0 [] schedule+0x3c/0x90 [] schedule_hrtimeout_range_clock+0xa2/0x120 [] ? hrtimer_init+0x120/0x120 [] ? schedule_hrtimeout_range_clock+0x96/0x120 [] schedule_hrtimeout_range+0x13/0x20 [] usleep_range+0x4f/0x70 [] rtl_rfreg_delay+0x38/0x50 [rtlwifi] [] rtl92c_phy_config_rf_with_headerfile+0xc7/0xe0 [rtl8192ce] To fix this bug, three of the changes from delay to sleep are reverted. Unfortunately, one of the changes involves a delay of 50 msec. The calling code will be modified so that this long delay can be avoided; however, this change is being pushed now to fix the problem in kernel 4.6.0. Fixes: 49f86ec21c01 ("rtlwifi: Change long delays to sleeps") Reported-by: James Feeney Signed-off-by: Larry Finger Cc: James Feeney Cc: Stable [4.6+] Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index 0f48048b8654..3a0faa8fe9d4 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -54,7 +54,7 @@ EXPORT_SYMBOL(channel5g_80m); void rtl_addr_delay(u32 addr) { if (addr == 0xfe) - msleep(50); + mdelay(50); else if (addr == 0xfd) msleep(5); else if (addr == 0xfc) @@ -75,7 +75,7 @@ void rtl_rfreg_delay(struct ieee80211_hw *hw, enum radio_path rfpath, u32 addr, rtl_addr_delay(addr); } else { rtl_set_rfreg(hw, rfpath, addr, mask, data); - usleep_range(1, 2); + udelay(1); } } EXPORT_SYMBOL(rtl_rfreg_delay); @@ -86,7 +86,7 @@ void rtl_bb_delay(struct ieee80211_hw *hw, u32 addr, u32 data) rtl_addr_delay(addr); } else { rtl_set_bbreg(hw, addr, MASKDWORD, data); - usleep_range(1, 2); + udelay(1); } } EXPORT_SYMBOL(rtl_bb_delay); From b7a8daa9f3d1688e994f5557577d3252c94ec282 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sat, 14 May 2016 22:19:53 +0900 Subject: [PATCH 06/85] netfilter: nf_ct_helper: Fix helper unregister count. helpers should unregister the only registered ports. but, helper cannot have correct registered ports value when failed to register. Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_ftp.c | 1 + net/netfilter/nf_conntrack_irc.c | 1 + net/netfilter/nf_conntrack_sane.c | 1 + net/netfilter/nf_conntrack_sip.c | 1 + net/netfilter/nf_conntrack_tftp.c | 1 + 5 files changed, 5 insertions(+) diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 883c691ec8d0..19efeba02abb 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -632,6 +632,7 @@ static int __init nf_conntrack_ftp_init(void) if (ret) { pr_err("failed to register helper for pf: %d port: %d\n", ftp[i][j].tuple.src.l3num, ports[i]); + ports_c = i; nf_conntrack_ftp_fini(); return ret; } diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 8b6da2719600..f97ac61d2536 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -271,6 +271,7 @@ static int __init nf_conntrack_irc_init(void) if (ret) { pr_err("failed to register helper for pf: %u port: %u\n", irc[i].tuple.src.l3num, ports[i]); + ports_c = i; nf_conntrack_irc_fini(); return ret; } diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index 7523a575f6d1..3fcbaab83b3d 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -223,6 +223,7 @@ static int __init nf_conntrack_sane_init(void) if (ret) { pr_err("failed to register helper for pf: %d port: %d\n", sane[i][j].tuple.src.l3num, ports[i]); + ports_c = i; nf_conntrack_sane_fini(); return ret; } diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 3e06402739e0..f72ba5587588 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1669,6 +1669,7 @@ static int __init nf_conntrack_sip_init(void) if (ret) { pr_err("failed to register helper for pf: %u port: %u\n", sip[i][j].tuple.src.l3num, ports[i]); + ports_c = i; nf_conntrack_sip_fini(); return ret; } diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index 36f964066461..2e65b5430fba 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -142,6 +142,7 @@ static int __init nf_conntrack_tftp_init(void) if (ret) { pr_err("failed to register helper for pf: %u port: %u\n", tftp[i][j].tuple.src.l3num, ports[i]); + ports_c = i; nf_conntrack_tftp_fini(); return ret; } From 83170f3beccccd7ceb4f9a0ac0c4dc736afde90c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 26 May 2016 19:08:10 +0200 Subject: [PATCH 07/85] netfilter: nf_dup_ipv6: set again FLOWI_FLAG_KNOWN_NH at flowi6_flags With the commit 48e8aa6e3137 ("ipv6: Set FLOWI_FLAG_KNOWN_NH at flowi6_flags") ip6_pol_route() callers were asked to to set the FLOWI_FLAG_KNOWN_NH properly and xt_TEE was updated accordingly, but with the later refactor in commit bbde9fc1824a ("netfilter: factor out packet duplication for IPv4/IPv6") the flowi6_flags update was lost. This commit re-add it just before the routing decision. Fixes: bbde9fc1824a ("netfilter: factor out packet duplication for IPv4/IPv6") Signed-off-by: Paolo Abeni Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/nf_dup_ipv6.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c index 6989c70ae29f..4a84b5ad9ecb 100644 --- a/net/ipv6/netfilter/nf_dup_ipv6.c +++ b/net/ipv6/netfilter/nf_dup_ipv6.c @@ -33,6 +33,7 @@ static bool nf_dup_ipv6_route(struct net *net, struct sk_buff *skb, fl6.daddr = *gw; fl6.flowlabel = (__force __be32)(((iph->flow_lbl[0] & 0xF) << 16) | (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]); + fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH; dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { dst_release(dst); From eaa2bcd6d1d410a52df8c7b05e76d86c0319b7b0 Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Fri, 27 May 2016 13:34:04 -0400 Subject: [PATCH 08/85] netfilter: nf_tables: validate NFTA_SET_TABLE parameter If the NFTA_SET_TABLE parameter is missing and the NLM_F_DUMP flag is not set, then a NULL pointer dereference is triggered in nf_tables_set_lookup because ctx.table is NULL. Signed-off-by: Phil Turnbull Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2011977cd79d..6947e255cdd8 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2641,6 +2641,8 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk, /* Only accept unspec with dump */ if (nfmsg->nfgen_family == NFPROTO_UNSPEC) return -EAFNOSUPPORT; + if (!nla[NFTA_SET_TABLE]) + return -EINVAL; set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); if (IS_ERR(set)) From 893e093c786c4256d52809eed697e9d70a6f6643 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 24 May 2016 11:23:51 +0200 Subject: [PATCH 09/85] netfilter: nf_ct_helper: bail out on duplicated helpers Don't allow registration of helpers using the same tuple: { l3proto, l4proto, src-port } We lookup for the helper from the packet path using this tuple through __nf_ct_helper_find(). Therefore, we have to avoid having two helpers with the same tuple to ensure predictible behaviour. Don't compare the helper string names anymore since it is valid to register two helpers with the same name, but using different tuples. This is also implicitly fixing up duplicated helper registration via ports= modparam since the name comparison was defeating the tuple duplication validation. Reported-by: Feng Gao Reported-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_helper.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 3b40ec575cd5..48de9be45a9a 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -361,9 +361,10 @@ EXPORT_SYMBOL_GPL(nf_ct_helper_log); int nf_conntrack_helper_register(struct nf_conntrack_helper *me) { - int ret = 0; - struct nf_conntrack_helper *cur; + struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) }; unsigned int h = helper_hash(&me->tuple); + struct nf_conntrack_helper *cur; + int ret = 0; BUG_ON(me->expect_policy == NULL); BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES); @@ -371,9 +372,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) mutex_lock(&nf_ct_helper_mutex); hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) { - if (strncmp(cur->name, me->name, NF_CT_HELPER_NAME_LEN) == 0 && - cur->tuple.src.l3num == me->tuple.src.l3num && - cur->tuple.dst.protonum == me->tuple.dst.protonum) { + if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple, &mask)) { ret = -EEXIST; goto out; } From 62397da50bb20a6b812c949ef465d7e69fe54bb6 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Fri, 13 May 2016 12:41:48 +0200 Subject: [PATCH 10/85] mac80211_hwsim: Add missing check for HWSIM_ATTR_SIGNAL A wmediumd that does not send this attribute causes a NULL pointer dereference, as the attribute is accessed even if it does not exist. The attribute was required but never checked ever since userspace frame forwarding has been introduced. The issue gets more problematic once we allow wmediumd registration from user namespaces. Cc: stable@vger.kernel.org Fixes: 7882513bacb1 ("mac80211_hwsim driver support userspace frame tx/rx") Signed-off-by: Martin Willi Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 9ed0ed1bf514..4dd5adcdd29b 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -2776,6 +2776,7 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2, if (!info->attrs[HWSIM_ATTR_ADDR_TRANSMITTER] || !info->attrs[HWSIM_ATTR_FLAGS] || !info->attrs[HWSIM_ATTR_COOKIE] || + !info->attrs[HWSIM_ATTR_SIGNAL] || !info->attrs[HWSIM_ATTR_TX_INFO]) goto out; From fe7a7c57629e8dcbc0e297363a9b2366d67a6dc5 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Sun, 15 May 2016 13:19:16 -0400 Subject: [PATCH 11/85] mac80211: mesh: flush mesh paths unconditionally Currently, the mesh paths associated with a nexthop station are cleaned up in the following code path: __sta_info_destroy_part1 synchronize_net() __sta_info_destroy_part2 -> cleanup_single_sta -> mesh_sta_cleanup -> mesh_plink_deactivate -> mesh_path_flush_by_nexthop However, there are a couple of problems here: 1) the paths aren't flushed at all if the MPM is running in userspace (e.g. when using wpa_supplicant or authsae) 2) there is no synchronize_rcu between removing the path and readers accessing the nexthop, which means the following race is possible: CPU0 CPU1 ~~~~ ~~~~ sta_info_destroy_part1() synchronize_net() rcu_read_lock() mesh_nexthop_resolve() mpath = mesh_path_lookup() [...] -> mesh_path_flush_by_nexthop() sta = rcu_dereference( mpath->next_hop) kfree(sta) access sta <-- CRASH Fix both of these by unconditionally flushing paths before destroying the sta, and by adding a synchronize_net() after path flush to ensure no active readers can still dereference the sta. Fixes this crash: [ 348.529295] BUG: unable to handle kernel paging request at 00020040 [ 348.530014] IP: [] ieee80211_mps_set_frame_flags+0x40/0xaa [mac80211] [ 348.530014] *pde = 00000000 [ 348.530014] Oops: 0000 [#1] PREEMPT [ 348.530014] Modules linked in: drbg ansi_cprng ctr ccm ppp_generic slhc ipt_MASQUERADE nf_nat_masquerade_ipv4 8021q ] [ 348.530014] CPU: 0 PID: 20597 Comm: wget Tainted: G O 4.6.0-rc5-wt=V1 #1 [ 348.530014] Hardware name: To Be Filled By O.E.M./To be filled by O.E.M., BIOS 080016 11/07/2014 [ 348.530014] task: f64fa280 ti: f4f9c000 task.ti: f4f9c000 [ 348.530014] EIP: 0060:[] EFLAGS: 00010246 CPU: 0 [ 348.530014] EIP is at ieee80211_mps_set_frame_flags+0x40/0xaa [mac80211] [ 348.530014] EAX: f4ce63e0 EBX: 00000088 ECX: f3788416 EDX: 00020008 [ 348.530014] ESI: 00000000 EDI: 00000088 EBP: f6409a4c ESP: f6409a40 [ 348.530014] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068 [ 348.530014] CR0: 80050033 CR2: 00020040 CR3: 33190000 CR4: 00000690 [ 348.530014] Stack: [ 348.530014] 00000000 f4ce63e0 f5f9bd80 f6409a64 f9291d80 0000ce67 f5d51e00 f4ce63e0 [ 348.530014] f3788416 f6409a80 f9291dc1 f4ce8320 f4ce63e0 f5d51e00 f4ce63e0 f4ce8320 [ 348.530014] f6409a98 f9277f6f 00000000 00000000 0000007c 00000000 f6409b2c f9278dd1 [ 348.530014] Call Trace: [ 348.530014] [] mesh_nexthop_lookup+0xbb/0xc8 [mac80211] [ 348.530014] [] mesh_nexthop_resolve+0x34/0xd8 [mac80211] [ 348.530014] [] ieee80211_xmit+0x92/0xc1 [mac80211] [ 348.530014] [] __ieee80211_subif_start_xmit+0x807/0x83c [mac80211] [ 348.530014] [] ? sch_direct_xmit+0xd7/0x1b3 [ 348.530014] [] ? __local_bh_enable_ip+0x5d/0x7b [ 348.530014] [] ? nf_nat_ipv4_out+0x4c/0xd0 [nf_nat_ipv4] [ 348.530014] [] ? iptable_nat_ipv4_fn+0xf/0xf [iptable_nat] [ 348.530014] [] ? netif_skb_features+0x14d/0x30a [ 348.530014] [] ieee80211_subif_start_xmit+0xa/0xe [mac80211] [ 348.530014] [] dev_hard_start_xmit+0x1f8/0x267 [ 348.530014] [] ? validate_xmit_skb.isra.120.part.121+0x10/0x253 [ 348.530014] [] sch_direct_xmit+0x8b/0x1b3 [ 348.530014] [] __dev_queue_xmit+0x2c8/0x513 [ 348.530014] [] dev_queue_xmit+0xa/0xc [ 348.530014] [] batadv_send_skb_packet+0xd6/0xec [batman_adv] [ 348.530014] [] batadv_send_unicast_skb+0x15/0x4a [batman_adv] [ 348.530014] [] batadv_dat_send_data+0x27e/0x310 [batman_adv] [ 348.530014] [] ? batadv_tt_global_hash_find.isra.11+0x8/0xa [batman_adv] [ 348.530014] [] batadv_dat_snoop_outgoing_arp_request+0x208/0x23d [batman_adv] [ 348.530014] [] batadv_interface_tx+0x206/0x385 [batman_adv] [ 348.530014] [] dev_hard_start_xmit+0x1f8/0x267 [ 348.530014] [] ? validate_xmit_skb.isra.120.part.121+0x10/0x253 [ 348.530014] [] sch_direct_xmit+0x8b/0x1b3 [ 348.530014] [] __dev_queue_xmit+0x2c8/0x513 [ 348.530014] [] ? igb_xmit_frame+0x57/0x72 [igb] [ 348.530014] [] dev_queue_xmit+0xa/0xc [ 348.530014] [] br_dev_queue_push_xmit+0xeb/0xfb [bridge] [ 348.530014] [] br_forward_finish+0x29/0x74 [bridge] [ 348.530014] [] ? deliver_clone+0x3b/0x3b [bridge] [ 348.530014] [] __br_forward+0x89/0xe7 [bridge] [ 348.530014] [] ? br_dev_queue_push_xmit+0xfb/0xfb [bridge] [ 348.530014] [] deliver_clone+0x34/0x3b [bridge] [ 348.530014] [] ? br_flood+0x95/0x95 [bridge] [ 348.530014] [] br_flood+0x77/0x95 [bridge] [ 348.530014] [] br_flood_forward+0x13/0x1a [bridge] [ 348.530014] [] ? br_flood+0x95/0x95 [bridge] [ 348.530014] [] br_handle_frame_finish+0x392/0x3db [bridge] [ 348.530014] [] ? nf_iterate+0x2b/0x6b [ 348.530014] [] br_handle_frame+0x1e6/0x240 [bridge] [ 348.530014] [] ? br_handle_local_finish+0x6a/0x6a [bridge] [ 348.530014] [] __netif_receive_skb_core+0x43a/0x66b [ 348.530014] [] ? br_handle_frame_finish+0x3db/0x3db [bridge] [ 348.530014] [] ? resched_curr+0x19/0x37 [ 348.530014] [] ? check_preempt_wakeup+0xbf/0xfe [ 348.530014] [] ? ktime_get_with_offset+0x5c/0xfc [ 348.530014] [] __netif_receive_skb+0x47/0x55 [ 348.530014] [] netif_receive_skb_internal+0x40/0x5a [ 348.530014] [] napi_gro_receive+0x3a/0x94 [ 348.530014] [] igb_poll+0x6fd/0x9ad [igb] [ 348.530014] [] ? swake_up_locked+0x14/0x26 [ 348.530014] [] net_rx_action+0xde/0x250 [ 348.530014] [] __do_softirq+0x8a/0x163 [ 348.530014] [] ? __hrtimer_tasklet_trampoline+0x19/0x19 [ 348.530014] [] do_softirq_own_stack+0x26/0x2c [ 348.530014] [ 348.530014] [] irq_exit+0x31/0x6f [ 348.530014] [] do_IRQ+0x8d/0xa0 [ 348.530014] [] common_interrupt+0x2c/0x40 [ 348.530014] Code: e7 8c 00 66 81 ff 88 00 75 12 85 d2 75 0e b2 c3 b8 83 e9 29 f9 e8 a7 5f f9 c6 eb 74 66 81 e3 8c 005 [ 348.530014] EIP: [] ieee80211_mps_set_frame_flags+0x40/0xaa [mac80211] SS:ESP 0068:f6409a40 [ 348.530014] CR2: 0000000000020040 [ 348.530014] ---[ end trace 48556ac26779732e ]--- [ 348.530014] Kernel panic - not syncing: Fatal exception in interrupt [ 348.530014] Kernel Offset: disabled Cc: stable@vger.kernel.org Reported-by: Fred Veldini Tested-by: Fred Veldini Signed-off-by: Bob Copeland Signed-off-by: Johannes Berg --- net/mac80211/mesh.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 4c6404e1ad6e..21b1fdf5d01d 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -161,6 +161,10 @@ void mesh_sta_cleanup(struct sta_info *sta) del_timer_sync(&sta->mesh->plink_timer); } + /* make sure no readers can access nexthop sta from here on */ + mesh_path_flush_by_nexthop(sta); + synchronize_net(); + if (changed) ieee80211_mbss_info_change_notify(sdata, changed); } From 6fe04128f158c5ad27e7504bfdf1b12e63331bc9 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 19 May 2016 17:34:38 +0200 Subject: [PATCH 12/85] mac80211: fix fast_tx header alignment The header field is defined as u8[] but also accessed as struct ieee80211_hdr. Enforce an alignment of 2 to prevent unnecessary unaligned accesses, which can be very harmful for performance on many platforms. Fixes: e495c24731a2 ("mac80211: extend fast-xmit for more ciphers") Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/sta_info.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index c8b8ccc370eb..78b0ef32dddd 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -280,7 +280,7 @@ struct ieee80211_fast_tx { u8 sa_offs, da_offs, pn_offs; u8 band; u8 hdr[30 + 2 + IEEE80211_FAST_XMIT_MAX_IV + - sizeof(rfc1042_header)]; + sizeof(rfc1042_header)] __aligned(2); struct rcu_head rcu_head; }; From bfa49cfc526201119623de6593d284c96563bede Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 1 Jun 2016 14:16:50 +0100 Subject: [PATCH 13/85] net/ethoc: fix null dereference on error exit path priv is assigned to NULL however some of the early error exit paths to label 'free' dereference priv, causing a null pointer dereference. Move the label 'free' to just the free_netdev statement, and add a new exit path 'free2' for the error cases were clk_disable_unprepare needs calling before the final free. Fixes issue found by CoverityScan, CID#113260 Signed-off-by: Colin Ian King Reviewed-by: Max Filippov Signed-off-by: David S. Miller --- drivers/net/ethernet/ethoc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c index 41b010645100..4edb98c3c6c7 100644 --- a/drivers/net/ethernet/ethoc.c +++ b/drivers/net/ethernet/ethoc.c @@ -1195,7 +1195,7 @@ static int ethoc_probe(struct platform_device *pdev) priv->mdio = mdiobus_alloc(); if (!priv->mdio) { ret = -ENOMEM; - goto free; + goto free2; } priv->mdio->name = "ethoc-mdio"; @@ -1208,7 +1208,7 @@ static int ethoc_probe(struct platform_device *pdev) ret = mdiobus_register(priv->mdio); if (ret) { dev_err(&netdev->dev, "failed to register MDIO bus\n"); - goto free; + goto free2; } ret = ethoc_mdio_probe(netdev); @@ -1241,9 +1241,10 @@ error2: error: mdiobus_unregister(priv->mdio); mdiobus_free(priv->mdio); -free: +free2: if (priv->clk) clk_disable_unprepare(priv->clk); +free: free_netdev(netdev); out: return ret; From 14b84e8654c89ed59f433654e6bb64c886d095cd Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 1 Jun 2016 15:29:13 +0200 Subject: [PATCH 14/85] qed: fix qed_fill_link() error handling gcc warns about qed_fill_link possibly accessing uninitialized data: drivers/net/ethernet/qlogic/qed/qed_main.c: In function 'qed_fill_link': drivers/net/ethernet/qlogic/qed/qed_main.c:1170:35: error: 'link_caps' may be used uninitialized in this function [-Werror=maybe-uninitialized] While this warning is only about the specific case of CONFIG_QED_SRIOV being disabled but the function getting called for a VF (which should never happen), another possibility is that qed_mcp_get_*() fails without returning data. This rearranges the code so we bail out in either of the two cases and print a warning instead of accessing the uninitialized data. The qed_link_output structure remains untouched in this case, but all callers first call memset() on it, so at least we are not leaking stack data then. As discussed, we also use a compile-time check to ensure we never use any of the VF code if CONFIG_QED_SRIOV is disabled, and the PCI device table is updated to no longer bind to virtual functions in that configuration. Signed-off-by: Arnd Bergmann Acked-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_main.c | 45 ++++++++++++++++---- drivers/net/ethernet/qlogic/qed/qed_sriov.h | 4 +- drivers/net/ethernet/qlogic/qede/qede_main.c | 2 + 3 files changed, 41 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 753064679bde..61cc6869fa65 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1105,6 +1105,39 @@ static int qed_get_port_type(u32 media_type) return port_type; } +static int qed_get_link_data(struct qed_hwfn *hwfn, + struct qed_mcp_link_params *params, + struct qed_mcp_link_state *link, + struct qed_mcp_link_capabilities *link_caps) +{ + void *p; + + if (!IS_PF(hwfn->cdev)) { + qed_vf_get_link_params(hwfn, params); + qed_vf_get_link_state(hwfn, link); + qed_vf_get_link_caps(hwfn, link_caps); + + return 0; + } + + p = qed_mcp_get_link_params(hwfn); + if (!p) + return -ENXIO; + memcpy(params, p, sizeof(*params)); + + p = qed_mcp_get_link_state(hwfn); + if (!p) + return -ENXIO; + memcpy(link, p, sizeof(*link)); + + p = qed_mcp_get_link_capabilities(hwfn); + if (!p) + return -ENXIO; + memcpy(link_caps, p, sizeof(*link_caps)); + + return 0; +} + static void qed_fill_link(struct qed_hwfn *hwfn, struct qed_link_output *if_link) { @@ -1116,15 +1149,9 @@ static void qed_fill_link(struct qed_hwfn *hwfn, memset(if_link, 0, sizeof(*if_link)); /* Prepare source inputs */ - if (IS_PF(hwfn->cdev)) { - memcpy(¶ms, qed_mcp_get_link_params(hwfn), sizeof(params)); - memcpy(&link, qed_mcp_get_link_state(hwfn), sizeof(link)); - memcpy(&link_caps, qed_mcp_get_link_capabilities(hwfn), - sizeof(link_caps)); - } else { - qed_vf_get_link_params(hwfn, ¶ms); - qed_vf_get_link_state(hwfn, &link); - qed_vf_get_link_caps(hwfn, &link_caps); + if (qed_get_link_data(hwfn, ¶ms, &link, &link_caps)) { + dev_warn(&hwfn->cdev->pdev->dev, "no link data available\n"); + return; } /* Set the link parameters to pass to protocol driver */ diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.h b/drivers/net/ethernet/qlogic/qed/qed_sriov.h index c8667c65e685..c90b2b6ad969 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.h @@ -12,11 +12,13 @@ #include "qed_vf.h" #define QED_VF_ARRAY_LENGTH (3) +#ifdef CONFIG_QED_SRIOV #define IS_VF(cdev) ((cdev)->b_is_vf) #define IS_PF(cdev) (!((cdev)->b_is_vf)) -#ifdef CONFIG_QED_SRIOV #define IS_PF_SRIOV(p_hwfn) (!!((p_hwfn)->cdev->p_iov_info)) #else +#define IS_VF(cdev) (0) +#define IS_PF(cdev) (1) #define IS_PF_SRIOV(p_hwfn) (0) #endif #define IS_PF_SRIOV_ALLOC(p_hwfn) (!!((p_hwfn)->pf_iov_info)) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 5d00d1404bfc..5733d1888223 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -87,7 +87,9 @@ static const struct pci_device_id qede_pci_tbl[] = { {PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_57980S_100), QEDE_PRIVATE_PF}, {PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_57980S_50), QEDE_PRIVATE_PF}, {PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_57980S_25), QEDE_PRIVATE_PF}, +#ifdef CONFIG_QED_SRIOV {PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_57980S_IOV), QEDE_PRIVATE_VF}, +#endif { 0 } }; From 7b7eba0f3515fca3296b8881d583f7c1042f5226 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 1 Jun 2016 02:04:44 +0200 Subject: [PATCH 15/85] netfilter: x_tables: don't reject valid target size on some architectures Quoting John Stultz: In updating a 32bit arm device from 4.6 to Linus' current HEAD, I noticed I was having some trouble with networking, and realized that /proc/net/ip_tables_names was suddenly empty. Digging through the registration process, it seems we're catching on the: if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && target_offset + sizeof(struct xt_standard_target) != next_offset) return -EINVAL; Where next_offset seems to be 4 bytes larger then the offset + standard_target struct size. next_offset needs to be aligned via XT_ALIGN (so we can access all members of ip(6)t_entry struct). This problem didn't show up on i686 as it only needs 4-byte alignment for u64, but iptables userspace on other 32bit arches does insert extra padding. Reported-by: John Stultz Tested-by: John Stultz Fixes: 7ed2abddd20cf ("netfilter: x_tables: check standard target size too") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index c69c892231d7..2675d580c490 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -612,7 +612,7 @@ int xt_compat_check_entry_offsets(const void *base, const char *elems, return -EINVAL; if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && - target_offset + sizeof(struct compat_xt_standard_target) != next_offset) + COMPAT_XT_ALIGN(target_offset + sizeof(struct compat_xt_standard_target)) != next_offset) return -EINVAL; /* compat_xt_entry match has less strict aligment requirements, @@ -694,7 +694,7 @@ int xt_check_entry_offsets(const void *base, return -EINVAL; if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && - target_offset + sizeof(struct xt_standard_target) != next_offset) + XT_ALIGN(target_offset + sizeof(struct xt_standard_target)) != next_offset) return -EINVAL; return xt_check_entry_match(elems, base + target_offset, From f55d84b07c4e7340473a25dc82b462607578402c Mon Sep 17 00:00:00 2001 From: Vincent Palatin Date: Wed, 1 Jun 2016 08:53:48 -0700 Subject: [PATCH 16/85] stmmac: do not sleep in atomic context for mdio_reset stmmac_mdio_reset() has been updated to use msleep rather udelay (as some PHY requires a one second delay there). It called from stmmac_resume() within the spin_lock_irqsave block atomic context triggering 'scheduling while atomic'. The stmmac_priv lock usage is not fully documented, but it seems to protect the access to the MAC registers / DMA structures rather than the MDIO bus or the PHY (which have separate locking), so we can push the spin_lock after the stmmac_mdio_reset call. Signed-off-by: Vincent Palatin Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index eac45d0c75e2..a473c182c91d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3450,8 +3450,6 @@ int stmmac_resume(struct device *dev) if (!netif_running(ndev)) return 0; - spin_lock_irqsave(&priv->lock, flags); - /* Power Down bit, into the PM register, is cleared * automatically as soon as a magic packet or a Wake-up frame * is received. Anyway, it's better to manually clear @@ -3459,7 +3457,9 @@ int stmmac_resume(struct device *dev) * from another devices (e.g. serial console). */ if (device_may_wakeup(priv->device)) { + spin_lock_irqsave(&priv->lock, flags); priv->hw->mac->pmt(priv->hw, 0); + spin_unlock_irqrestore(&priv->lock, flags); priv->irq_wake = 0; } else { pinctrl_pm_select_default_state(priv->device); @@ -3473,6 +3473,8 @@ int stmmac_resume(struct device *dev) netif_device_attach(ndev); + spin_lock_irqsave(&priv->lock, flags); + priv->cur_rx = 0; priv->dirty_rx = 0; priv->dirty_tx = 0; From ce25d66ad5f8d921bac5fe2d32d62fa30c0f9a70 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Jun 2016 14:52:43 -0700 Subject: [PATCH 17/85] Possible problem with e6afc8ac ("udp: remove headers from UDP packets before queueing") Paul Moore tracked a regression caused by a recent commit, which mistakenly assumed that sk_filter() could be avoided if socket had no current BPF filter. The intent was to avoid udp_lib_checksum_complete() overhead. But sk_filter() also checks skb_pfmemalloc() and security_sock_rcv_skb(), so better call it. Fixes: e6afc8ace6dd ("udp: remove headers from UDP packets before queueing") Signed-off-by: Eric Dumazet Reported-by: Paul Moore Tested-by: Paul Moore Tested-by: Stephen Smalley Cc: samanthakumar Signed-off-by: David S. Miller --- net/ipv4/udp.c | 10 +++++----- net/ipv6/udp.c | 12 ++++++------ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d56c0559b477..0ff31d97d485 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1618,12 +1618,12 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } } - if (rcu_access_pointer(sk->sk_filter)) { - if (udp_lib_checksum_complete(skb)) + if (rcu_access_pointer(sk->sk_filter) && + udp_lib_checksum_complete(skb)) goto csum_error; - if (sk_filter(sk, skb)) - goto drop; - } + + if (sk_filter(sk, skb)) + goto drop; udp_csum_pull_header(skb); if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 2da1896af934..f421c9f23c5b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -653,12 +653,12 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } } - if (rcu_access_pointer(sk->sk_filter)) { - if (udp_lib_checksum_complete(skb)) - goto csum_error; - if (sk_filter(sk, skb)) - goto drop; - } + if (rcu_access_pointer(sk->sk_filter) && + udp_lib_checksum_complete(skb)) + goto csum_error; + + if (sk_filter(sk, skb)) + goto drop; udp_csum_pull_header(skb); if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { From 5d2be1422e02ccd697ccfcd45c85b4a26e6178e2 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 2 Jun 2016 04:04:56 -0400 Subject: [PATCH 18/85] tipc: fix an infoleak in tipc_nl_compat_link_dump link_info.str is a char array of size 60. Memory after the NULL byte is not initialized. Sending the whole object out can cause a leak. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller --- net/tipc/netlink_compat.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index f795b1dd0ccd..3ad9fab1985f 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -604,7 +604,8 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg, link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]); link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP])); - strcpy(link_info.str, nla_data(link[TIPC_NLA_LINK_NAME])); + nla_strlcpy(link_info.str, nla_data(link[TIPC_NLA_LINK_NAME]), + TIPC_MAX_LINK_NAME); return tipc_add_tlv(msg->rep, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info)); From 4116def2337991b39919f3b448326e21c40e0dbb Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 2 Jun 2016 04:11:20 -0400 Subject: [PATCH 19/85] rds: fix an infoleak in rds_inc_info_copy The last field "flags" of object "minfo" is not initialized. Copying this object out may leak kernel stack data. Assign 0 to it to avoid leak. Signed-off-by: Kangjie Lu Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/recv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/rds/recv.c b/net/rds/recv.c index c0be1ecd11c9..8413f6c99e13 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -561,5 +561,7 @@ void rds_inc_info_copy(struct rds_incoming *inc, minfo.fport = inc->i_hdr.h_dport; } + minfo.flags = 0; + rds_info_copy(iter, &minfo, sizeof(minfo)); } From 31143e2933d1675c4c1ba6ce125cdd95870edd85 Mon Sep 17 00:00:00 2001 From: Franky Lin Date: Thu, 2 Jun 2016 02:00:27 -0700 Subject: [PATCH 20/85] brcmfmac: add eth_type_trans back for PCIe full dongle A regression was introduced in commit 9c349892ccc9 ("brcmfmac: revise handling events in receive path") which moves eth_type_trans() call to brcmf_rx_frame(). Msgbuf layer doesn't use brcmf_rx_frame() but invokes brcmf_netif_rx() directly. In such case the Ethernet header was not stripped out resulting in null pointer dereference in the networking stack. BUG: unable to handle kernel NULL pointer dereference at 0000000000000048 IP: [] enqueue_to_backlog+0x56/0x260 PGD 0 Oops: 0000 [#1] PREEMPT SMP Modules linked in: fuse ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype [...] rtsx_pci scsi_mod usbcore usb_common i8042 serio nvme nvme_core CPU: 7 PID: 1340 Comm: irq/136-brcmf_p Not tainted 4.7.0-rc1-mainline #1 Hardware name: Dell Inc. XPS 15 9550/0N7TVV, BIOS 01.02.00 04/07/2016 task: ffff8804a0c5bd00 ti: ffff88049e124000 task.ti: ffff88049e124000 RIP: 0010:[] [] enqueue_to_backlog+0x56/0x260 RSP: 0018:ffff88049e127ca0 EFLAGS: 00010046 RAX: 0000000000000000 RBX: ffff8804bddd7c40 RCX: 000000000000002f RDX: 0000000000000000 RSI: 0000000000000007 RDI: ffff8804bddd7d4c RBP: ffff88049e127ce8 R08: 0000000000000000 R09: 0000000000000000 R10: ffff8804bddd12c0 R11: 000000000000149e R12: 0000000000017c40 R13: ffff88049e127d08 R14: ffff8804a9bd6d00 R15: ffff8804bddd7d4c FS: 0000000000000000(0000) GS:ffff8804bddc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000048 CR3: 0000000001806000 CR4: 00000000003406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Stack: ffff8804bdddad00 ffff8804ad089e00 0000000000000000 0000000000000282 0000000000000000 ffff8804a9bd6d00 ffff8804a1b27e00 ffff8804a9bd6d00 ffff88002ee88000 ffff88049e127d28 ffffffff814c3f3b ffffffff81311fc3 Call Trace: [] netif_rx_internal+0x4b/0x170 [] ? swiotlb_tbl_unmap_single+0xf3/0x120 [] netif_rx_ni+0x27/0xc0 [] brcmf_netif_rx+0x49/0x70 [brcmfmac] [] brcmf_msgbuf_process_rx+0x2b4/0x570 [brcmfmac] [] ? __xen_set_pgd_hyper+0x57/0xd0 [] ? irq_forced_thread_fn+0x70/0x70 [] brcmf_proto_msgbuf_rx_trigger+0x31/0xe0 [brcmfmac] [] brcmf_pcie_isr_thread+0x7f/0x110 [brcmfmac] [] irq_thread_fn+0x20/0x50 [] irq_thread+0x12d/0x1c0 [] ? __schedule+0x2f5/0x7a0 [] ? wake_threads_waitq+0x30/0x30 [] ? irq_thread_dtor+0xb0/0xb0 [] kthread+0xd8/0xf0 [] ret_from_fork+0x1f/0x40 [] ? kthread_worker_fn+0x170/0x170 Code: 1c f5 60 9a 8e 81 9c 58 0f 1f 44 00 00 48 89 45 d0 fa 66 0f 1f 44 00 00 4c 8d bb 0c 01 00 00 4c 89 ff e8 5e 08 11 00 49 8b 56 20 <48> 8b 52 48 83 e2 01 74 10 8b 8b 08 01 00 00 8b 15 59 c5 42 00 RIP [] enqueue_to_backlog+0x56/0x260 RSP CR2: 0000000000000048 Fixes: 9c349892ccc9 ("brcmfmac: revise handling events in receive path") Reported-by: Rafal Milecki Reported-by: Grey Christoforo Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Arend Van Spriel Reviewed-by: Hante Meuleman Signed-off-by: Franky Lin [arend@broadcom.com: rephrased the commit message] Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c index 68f1ce02f4bf..2b9a2bc429d6 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c @@ -1157,6 +1157,8 @@ brcmf_msgbuf_process_rx_complete(struct brcmf_msgbuf *msgbuf, void *buf) brcmu_pkt_buf_free_skb(skb); return; } + + skb->protocol = eth_type_trans(skb, ifp->ndev); brcmf_netif_rx(ifp, skb); } From 357cc9b4a8a7a0cd0e662537b76e6fa4670b6798 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 1 Jun 2016 16:15:15 -0700 Subject: [PATCH 21/85] sch_hfsc: always keep backlog updated hfsc updates backlog lazily, that is only when we dump the stats. This is problematic after we begin to update backlog in qdisc_tree_reduce_backlog(). Reported-by: Stas Nichiporovich Tested-by: Stas Nichiporovich Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_hfsc.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index d783d7cc3348..1ac9f9f03fe3 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1529,6 +1529,7 @@ hfsc_reset_qdisc(struct Qdisc *sch) q->eligible = RB_ROOT; INIT_LIST_HEAD(&q->droplist); qdisc_watchdog_cancel(&q->watchdog); + sch->qstats.backlog = 0; sch->q.qlen = 0; } @@ -1559,14 +1560,6 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb) struct hfsc_sched *q = qdisc_priv(sch); unsigned char *b = skb_tail_pointer(skb); struct tc_hfsc_qopt qopt; - struct hfsc_class *cl; - unsigned int i; - - sch->qstats.backlog = 0; - for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) - sch->qstats.backlog += cl->qdisc->qstats.backlog; - } qopt.defcls = q->defcls; if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt)) @@ -1604,6 +1597,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (cl->qdisc->q.qlen == 1) set_active(cl, qdisc_pkt_len(skb)); + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; @@ -1672,6 +1666,7 @@ hfsc_dequeue(struct Qdisc *sch) qdisc_unthrottled(sch); qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; return skb; @@ -1695,6 +1690,7 @@ hfsc_drop(struct Qdisc *sch) } cl->qstats.drops++; qdisc_qstats_drop(sch); + sch->qstats.backlog -= len; sch->q.qlen--; return len; } From 6529d75ad9228f4d8a8f6c5c5244ceb945ac9bc2 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 1 Jun 2016 16:15:16 -0700 Subject: [PATCH 22/85] sch_prio: update backlog as well We need to update backlog too when we update qlen. Joint work with Stas. Reported-by: Stas Nichiporovich Tested-by: Stas Nichiporovich Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_prio.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index fee1b15506b2..4b0a82191bc4 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -85,6 +85,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch) ret = qdisc_enqueue(skb, qdisc); if (ret == NET_XMIT_SUCCESS) { + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; } @@ -117,6 +118,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc *sch) struct sk_buff *skb = qdisc_dequeue_peeked(qdisc); if (skb) { qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; return skb; } @@ -135,6 +137,7 @@ static unsigned int prio_drop(struct Qdisc *sch) for (prio = q->bands-1; prio >= 0; prio--) { qdisc = q->queues[prio]; if (qdisc->ops->drop && (len = qdisc->ops->drop(qdisc)) != 0) { + sch->qstats.backlog -= len; sch->q.qlen--; return len; } @@ -151,6 +154,7 @@ prio_reset(struct Qdisc *sch) for (prio = 0; prio < q->bands; prio++) qdisc_reset(q->queues[prio]); + sch->qstats.backlog = 0; sch->q.qlen = 0; } From 6a73b571b63075ef408c83f07c2565b5652f93cc Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 1 Jun 2016 16:15:17 -0700 Subject: [PATCH 23/85] sch_drr: update backlog as well Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_drr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index a63e879e8975..bf8af2c43c2c 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -375,6 +375,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch) cl->deficit = cl->quantum; } + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return err; } @@ -407,6 +408,7 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch) bstats_update(&cl->bstats, skb); qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; return skb; } @@ -428,6 +430,7 @@ static unsigned int drr_drop(struct Qdisc *sch) if (cl->qdisc->ops->drop) { len = cl->qdisc->ops->drop(cl->qdisc); if (len > 0) { + sch->qstats.backlog -= len; sch->q.qlen--; if (cl->qdisc->q.qlen == 0) list_del(&cl->alist); @@ -463,6 +466,7 @@ static void drr_reset_qdisc(struct Qdisc *sch) qdisc_reset(cl->qdisc); } } + sch->qstats.backlog = 0; sch->q.qlen = 0; } From d7f4f332f082c4d4ba53582f902ed6b44fd6f45e Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 1 Jun 2016 16:15:18 -0700 Subject: [PATCH 24/85] sch_red: update backlog as well Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_red.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 8c0508c0e287..91578bdd378c 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -97,6 +97,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch) ret = qdisc_enqueue(skb, child); if (likely(ret == NET_XMIT_SUCCESS)) { + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; } else if (net_xmit_drop_count(ret)) { q->stats.pdrop++; @@ -118,6 +119,7 @@ static struct sk_buff *red_dequeue(struct Qdisc *sch) skb = child->dequeue(child); if (skb) { qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; } else { if (!red_is_idling(&q->vars)) @@ -143,6 +145,7 @@ static unsigned int red_drop(struct Qdisc *sch) if (child->ops->drop && (len = child->ops->drop(child)) > 0) { q->stats.other++; qdisc_qstats_drop(sch); + sch->qstats.backlog -= len; sch->q.qlen--; return len; } @@ -158,6 +161,7 @@ static void red_reset(struct Qdisc *sch) struct red_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); + sch->qstats.backlog = 0; sch->q.qlen = 0; red_restart(&q->vars); } From 8d5958f424b62060a8696b12c17dad198d5d386f Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 1 Jun 2016 16:15:19 -0700 Subject: [PATCH 25/85] sch_tbf: update backlog as well Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_tbf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 83b90b584fae..3161e491990b 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -207,6 +207,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; } @@ -217,6 +218,7 @@ static unsigned int tbf_drop(struct Qdisc *sch) unsigned int len = 0; if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) { + sch->qstats.backlog -= len; sch->q.qlen--; qdisc_qstats_drop(sch); } @@ -263,6 +265,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch) q->t_c = now; q->tokens = toks; q->ptokens = ptoks; + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; qdisc_unthrottled(sch); qdisc_bstats_update(sch, skb); @@ -294,6 +297,7 @@ static void tbf_reset(struct Qdisc *sch) struct tbf_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); + sch->qstats.backlog = 0; sch->q.qlen = 0; q->t_c = ktime_get_ns(); q->tokens = q->buffer; From 9c8b0778e48e4bbdb77121c6c1b7dd48e5182e67 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Thu, 2 Jun 2016 17:36:28 +0800 Subject: [PATCH 26/85] gianfar: fix the last transmit buffer descriptor When the transmit hardware timestamping is enabled, an additional TxBD would be added and would be set as the last TxBD with TXBD_LAST and TXBD_INTERRUPT. However this has been broken by a patch recently. This made the software couldn't get transmit hardware timestamps and resulted in call trace. So, this patch is to fix this issue. Fixes: 48963b4492e9 ("gianfar: Remove redundant ops for do_tstamp from xmit()") Signed-off-by: Yangbo Lu Reviewed-by: Claudiu Manoil Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/gianfar.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 7615e0668acb..2e6785b6e8be 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -2440,7 +2440,8 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) tx_queue->tx_ring_size); if (likely(!nr_frags)) { - lstatus |= BD_LFLAG(TXBD_LAST | TXBD_INTERRUPT); + if (likely(!do_tstamp)) + lstatus |= BD_LFLAG(TXBD_LAST | TXBD_INTERRUPT); } else { u32 lstatus_start = lstatus; From 8478b6cdc10e8a7735deeb9d9e46ad5b157c84d0 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Thu, 2 Jun 2016 16:14:52 +0300 Subject: [PATCH 27/85] net: ethernet: ti: cpsw: fix rx-usecs interrupt pacing consistency The rx-usecs shouldn't be changed while interface down/up. Currently, for instance, if it's set to 100us, after interface down/up it's 500us. It's a hidden bug that can lead to lavish interrupt pacing time increasing while "down/up" up to max value. Steps to reproduce: - set rx-usecs to be 100us - down/up interface - read new unexpected rx-usecs Signed-off-by: Ivan Khoronzhuk Reviewed-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 4b08a2f52b3e..e6bb0ecb12c7 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1339,7 +1339,7 @@ static int cpsw_ndo_open(struct net_device *ndev) if (priv->coal_intvl != 0) { struct ethtool_coalesce coal; - coal.rx_coalesce_usecs = (priv->coal_intvl << 4); + coal.rx_coalesce_usecs = priv->coal_intvl; cpsw_set_coalesce(ndev, &coal); } From 207bdf1844286d2ba94523d30528f82aaf33d52b Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 3 Jun 2016 10:17:06 +0200 Subject: [PATCH 28/85] net-next: mediatek: use mdiobus_free() in favour of kfree() The driver currently uses kfree() to clear the mii_bus. This is not the correct way to clear the memory and mdiobus_free() should be used instead. This patch fixes the two instances where this happens in the driver. Reviewed-by: Andrew Lunn Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index c984462fad2a..28f169f489da 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -280,7 +280,7 @@ static int mtk_mdio_init(struct mtk_eth *eth) return 0; err_free_bus: - kfree(eth->mii_bus); + mdiobus_free(eth->mii_bus); err_put_node: of_node_put(mii_np); @@ -295,7 +295,7 @@ static void mtk_mdio_cleanup(struct mtk_eth *eth) mdiobus_unregister(eth->mii_bus); of_node_put(eth->mii_bus->dev.of_node); - kfree(eth->mii_bus); + mdiobus_free(eth->mii_bus); } static inline void mtk_irq_disable(struct mtk_eth *eth, u32 mask) From 08ef55c6f257acf3bdc6940813f80e8f0f5d90ec Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 3 Jun 2016 10:17:07 +0200 Subject: [PATCH 29/85] net-next: mediatek: fix gigabit and flow control advertisement The current code will not setup the PHYs advertisement features correctly. Fix this and properly advertise Gigabit features and properly handle asymmetric pause frames. Signed-off-by: Sean Wang Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 30 ++++++++++++++++++--- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 28f169f489da..b0652f6182f4 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -133,6 +133,8 @@ static int mtk_mdio_read(struct mii_bus *bus, int phy_addr, int phy_reg) static void mtk_phy_link_adjust(struct net_device *dev) { struct mtk_mac *mac = netdev_priv(dev); + u16 lcl_adv = 0, rmt_adv = 0; + u8 flowctrl; u32 mcr = MAC_MCR_MAX_RX_1536 | MAC_MCR_IPG_CFG | MAC_MCR_FORCE_MODE | MAC_MCR_TX_EN | MAC_MCR_RX_EN | MAC_MCR_BACKOFF_EN | @@ -150,11 +152,30 @@ static void mtk_phy_link_adjust(struct net_device *dev) if (mac->phy_dev->link) mcr |= MAC_MCR_FORCE_LINK; - if (mac->phy_dev->duplex) + if (mac->phy_dev->duplex) { mcr |= MAC_MCR_FORCE_DPX; - if (mac->phy_dev->pause) - mcr |= MAC_MCR_FORCE_RX_FC | MAC_MCR_FORCE_TX_FC; + if (mac->phy_dev->pause) + rmt_adv = LPA_PAUSE_CAP; + if (mac->phy_dev->asym_pause) + rmt_adv |= LPA_PAUSE_ASYM; + + if (mac->phy_dev->advertising & ADVERTISED_Pause) + lcl_adv |= ADVERTISE_PAUSE_CAP; + if (mac->phy_dev->advertising & ADVERTISED_Asym_Pause) + lcl_adv |= ADVERTISE_PAUSE_ASYM; + + flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv); + + if (flowctrl & FLOW_CTRL_TX) + mcr |= MAC_MCR_FORCE_TX_FC; + if (flowctrl & FLOW_CTRL_RX) + mcr |= MAC_MCR_FORCE_RX_FC; + + netif_dbg(mac->hw, link, dev, "rx pause %s, tx pause %s\n", + flowctrl & FLOW_CTRL_RX ? "enabled" : "disabled", + flowctrl & FLOW_CTRL_TX ? "enabled" : "disabled"); + } mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id)); @@ -236,7 +257,8 @@ static int mtk_phy_connect(struct mtk_mac *mac) mac->phy_dev->autoneg = AUTONEG_ENABLE; mac->phy_dev->speed = 0; mac->phy_dev->duplex = 0; - mac->phy_dev->supported &= PHY_BASIC_FEATURES; + mac->phy_dev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause | + SUPPORTED_Asym_Pause; mac->phy_dev->advertising = mac->phy_dev->supported | ADVERTISED_Autoneg; phy_start_aneg(mac->phy_dev); From 0c72c50f6f93b0c3daa9ea35d89ab3a933c7b5a0 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 3 Jun 2016 10:17:08 +0200 Subject: [PATCH 30/85] net-next: mediatek: add fixed-phy support The MT7623 SoC has a builtin gigabit switch. If we want to use it, GMAC1 needs to be configured using a fixed link speed and flow control settings. The easiest way to do this is to used the fixed-phy driver, allowing us to reuse the existing mdio polling code to setup the MAC. Reviewed-by: Andrew Lunn Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index b0652f6182f4..231d28444a54 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -229,6 +229,9 @@ static int mtk_phy_connect(struct mtk_mac *mac) u32 val, ge_mode; np = of_parse_phandle(mac->of_node, "phy-handle", 0); + if (!np && of_phy_is_fixed_link(mac->of_node)) + if (!of_phy_register_fixed_link(mac->of_node)) + np = of_node_get(mac->of_node); if (!np) return -ENODEV; From 37920fce0fc10264410eb880d411968b7934b61d Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 3 Jun 2016 10:17:09 +0200 Subject: [PATCH 31/85] net-next: mediatek: properly handle RGMII modes If an external Gigabit PHY is connected to either of the MACs we need to be able to tell the PHY to use a delay. Not doing so will result in heavy packet loss and/or data corruption when using PHYs such as the IC+ IP1001. We tell the PHY which MII delay mode to use via the devictree. The ethernet driver needs to be adapted to handle all 3 rgmii-*id modes in the same way as normal rgmii when setting up the MAC. Reviewed-by: Andrew Lunn Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 231d28444a54..4763252bbf85 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -236,6 +236,9 @@ static int mtk_phy_connect(struct mtk_mac *mac) return -ENODEV; switch (of_get_phy_mode(np)) { + case PHY_INTERFACE_MODE_RGMII_TXID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII: ge_mode = 0; break; From 182fd9eecb287e696c82b30d06c6150d80a49c5b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 22 May 2016 11:06:10 +0200 Subject: [PATCH 32/85] MAINTAINERS: Add file patterns for wireless device tree bindings Submitters of device tree binding documentation may forget to CC the subsystem maintainer if this is missing. Signed-off-by: Geert Uytterhoeven Cc: Kalle Valo Cc: linux-wireless@vger.kernel.org Signed-off-by: Kalle Valo --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8d397157981c..7e4734623579 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7851,6 +7851,7 @@ Q: http://patchwork.kernel.org/project/linux-wireless/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers-next.git S: Maintained +F: Documentation/devicetree/bindings/net/wireless/ F: drivers/net/wireless/ NETXEN (1/10) GbE SUPPORT From 3ec10d3a2ba591c87da94219c1e46b02ae97757a Mon Sep 17 00:00:00 2001 From: Marco Angaroni Date: Mon, 16 May 2016 19:18:09 +0200 Subject: [PATCH 33/85] ipvs: update real-server binding of outgoing connections in SIP-pe Previous patch that introduced handling of outgoing packets in SIP persistent-engine did not call ip_vs_check_template() in case packet was matching a connection template. Assumption was that real-server was healthy, since it was sending a packet just in that moment. There are however real-server fault conditions requiring that association between call-id and real-server (represented by connection template) gets updated. Here is an example of the sequence of events: 1) RS1 is a back2back user agent that handled call-id1 and call-id2 2) RS1 is down and was marked as unavailable 3) new message from outside comes to IPVS with call-id1 4) IPVS reschedules the message to RS2, which becomes new call handler 5) RS2 forwards the message outside, translating call-id1 to call-id2 6) inside pe->conn_out() IPVS matches call-id2 with existing template 7) IPVS does not change association call-id2 <-> RS1 8) new message comes from client with call-id2 9) IPVS reschedules the message to a real-server potentially different from RS2, which is now the correct destination This patch introduces ip_vs_check_template() call in the handling of outgoing packets for SIP-pe. And also introduces a second optional argument for ip_vs_check_template() that allows to check if dest associated to a connection template is the same dest that was identified as the source of the packet. This is to change the real-server bound to a particular call-id independently from its availability status: the idea is that it's more reliable, for in->out direction (where internal network can be considered trusted), to always associate a call-id with the last real-server that used it in one of its messages. Think about above sequence of events where, just after step 5, RS1 returns instead to be available. Comparison of dests is done by simply comparing pointers to struct ip_vs_dest; there should be no cases where struct ip_vs_dest keeps its memory address, but represent a different real-server in terms of ip-address / port. Fixes: 39b972231536 ("ipvs: handle connections started by real-servers") Signed-off-by: Marco Angaroni Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 2 +- net/netfilter/ipvs/ip_vs_conn.c | 5 +++-- net/netfilter/ipvs/ip_vs_core.c | 5 +++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index af4c10ebb241..cd6018a9ee24 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1232,7 +1232,7 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp); const char *ip_vs_state_name(__u16 proto, int state); void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp); -int ip_vs_check_template(struct ip_vs_conn *ct); +int ip_vs_check_template(struct ip_vs_conn *ct, struct ip_vs_dest *cdest); void ip_vs_random_dropentry(struct netns_ipvs *ipvs); int ip_vs_conn_init(void); void ip_vs_conn_cleanup(void); diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 2cb3c626cd43..096a45103f14 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -762,7 +762,7 @@ static int expire_quiescent_template(struct netns_ipvs *ipvs, * If available, return 1, otherwise invalidate this connection * template and return 0. */ -int ip_vs_check_template(struct ip_vs_conn *ct) +int ip_vs_check_template(struct ip_vs_conn *ct, struct ip_vs_dest *cdest) { struct ip_vs_dest *dest = ct->dest; struct netns_ipvs *ipvs = ct->ipvs; @@ -772,7 +772,8 @@ int ip_vs_check_template(struct ip_vs_conn *ct) */ if ((dest == NULL) || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || - expire_quiescent_template(ipvs, dest)) { + expire_quiescent_template(ipvs, dest) || + (cdest && (dest != cdest))) { IP_VS_DBG_BUF(9, "check_template: dest not available for " "protocol %s s:%s:%d v:%s:%d " "-> d:%s:%d\n", diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 1207f20d24e4..2c1b498a7a27 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -321,7 +321,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, /* Check if a template already exists */ ct = ip_vs_ct_in_get(¶m); - if (!ct || !ip_vs_check_template(ct)) { + if (!ct || !ip_vs_check_template(ct, NULL)) { struct ip_vs_scheduler *sched; /* @@ -1154,7 +1154,8 @@ struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc, vport, ¶m) < 0) return NULL; ct = ip_vs_ct_in_get(¶m); - if (!ct) { + /* check if template exists and points to the same dest */ + if (!ct || !ip_vs_check_template(ct, dest)) { ct = ip_vs_conn_new(¶m, dest->af, daddr, dport, IP_VS_CONN_F_TEMPLATE, dest, 0); if (!ct) { From a02cc9d3cc9f98905df214d4a57e5918473260ea Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Fri, 3 Jun 2016 15:32:18 +0200 Subject: [PATCH 34/85] bnx2x: allow adding VLANs while interface is down Since implementing VLAN filtering in commit 05cc5a39ddb74 ("bnx2x: add vlan filtering offload") bnx2x refuses to add a VLAN while the interface is down: # ip link add link enp3s0f0 enp3s0f0_10 type vlan id 10 RTNETLINK answers: Bad address and in dmesg (with bnx2x.debug=0x20): bnx2x: [bnx2x_vlan_rx_add_vid:12941(enp3s0f0)]Ignoring VLAN configuration the interface is down Other drivers have no problem with this. Fix this peculiar behavior in the following way: - Accept requests to add/kill VID regardless of the device state. Maintain the requested list of VIDs in the bp->vlan_reg list. - If the device is up, try to configure the VID list into the hardware. If we run out of VLAN credits or encounter a failure configuring an entry, fall back to accepting all VLANs. If we successfully configure all entries from the list, turn the fallback off. - Use the same code for reconfiguring VLANs during NIC load. Signed-off-by: Michal Schmidt Acked-by: Yuval Mintz Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnx2x/bnx2x_main.c | 151 +++++++----------- 1 file changed, 62 insertions(+), 89 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index c5fe915870ad..a59d55e25d5f 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12895,52 +12895,71 @@ static int __bnx2x_vlan_configure_vid(struct bnx2x *bp, u16 vid, bool add) return rc; } -int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp) +static int bnx2x_vlan_configure_vid_list(struct bnx2x *bp) { struct bnx2x_vlan_entry *vlan; int rc = 0; - if (!bp->vlan_cnt) { - DP(NETIF_MSG_IFUP, "No need to re-configure vlan filters\n"); - return 0; - } - + /* Configure all non-configured entries */ list_for_each_entry(vlan, &bp->vlan_reg, link) { - /* Prepare for cleanup in case of errors */ - if (rc) { - vlan->hw = false; - continue; - } - - if (!vlan->hw) + if (vlan->hw) continue; - DP(NETIF_MSG_IFUP, "Re-configuring vlan 0x%04x\n", vlan->vid); + if (bp->vlan_cnt >= bp->vlan_credit) + return -ENOBUFS; rc = __bnx2x_vlan_configure_vid(bp, vlan->vid, true); if (rc) { - BNX2X_ERR("Unable to configure VLAN %d\n", vlan->vid); - vlan->hw = false; - rc = -EINVAL; - continue; + BNX2X_ERR("Unable to config VLAN %d\n", vlan->vid); + return rc; } + + DP(NETIF_MSG_IFUP, "HW configured for VLAN %d\n", vlan->vid); + vlan->hw = true; + bp->vlan_cnt++; } - return rc; + return 0; +} + +static void bnx2x_vlan_configure(struct bnx2x *bp, bool set_rx_mode) +{ + bool need_accept_any_vlan; + + need_accept_any_vlan = !!bnx2x_vlan_configure_vid_list(bp); + + if (bp->accept_any_vlan != need_accept_any_vlan) { + bp->accept_any_vlan = need_accept_any_vlan; + DP(NETIF_MSG_IFUP, "Accept all VLAN %s\n", + bp->accept_any_vlan ? "raised" : "cleared"); + if (set_rx_mode) { + if (IS_PF(bp)) + bnx2x_set_rx_mode_inner(bp); + else + bnx2x_vfpf_storm_rx_mode(bp); + } + } +} + +int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp) +{ + struct bnx2x_vlan_entry *vlan; + + /* The hw forgot all entries after reload */ + list_for_each_entry(vlan, &bp->vlan_reg, link) + vlan->hw = false; + bp->vlan_cnt = 0; + + /* Don't set rx mode here. Our caller will do it. */ + bnx2x_vlan_configure(bp, false); + + return 0; } static int bnx2x_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { struct bnx2x *bp = netdev_priv(dev); struct bnx2x_vlan_entry *vlan; - bool hw = false; - int rc = 0; - - if (!netif_running(bp->dev)) { - DP(NETIF_MSG_IFUP, - "Ignoring VLAN configuration the interface is down\n"); - return -EFAULT; - } DP(NETIF_MSG_IFUP, "Adding VLAN %d\n", vid); @@ -12948,93 +12967,47 @@ static int bnx2x_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) if (!vlan) return -ENOMEM; - bp->vlan_cnt++; - if (bp->vlan_cnt > bp->vlan_credit && !bp->accept_any_vlan) { - DP(NETIF_MSG_IFUP, "Accept all VLAN raised\n"); - bp->accept_any_vlan = true; - if (IS_PF(bp)) - bnx2x_set_rx_mode_inner(bp); - else - bnx2x_vfpf_storm_rx_mode(bp); - } else if (bp->vlan_cnt <= bp->vlan_credit) { - rc = __bnx2x_vlan_configure_vid(bp, vid, true); - hw = true; - } - vlan->vid = vid; - vlan->hw = hw; + vlan->hw = false; + list_add_tail(&vlan->link, &bp->vlan_reg); - if (!rc) { - list_add(&vlan->link, &bp->vlan_reg); - } else { - bp->vlan_cnt--; - kfree(vlan); - } + if (netif_running(dev)) + bnx2x_vlan_configure(bp, true); - DP(NETIF_MSG_IFUP, "Adding VLAN result %d\n", rc); - - return rc; + return 0; } static int bnx2x_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) { struct bnx2x *bp = netdev_priv(dev); struct bnx2x_vlan_entry *vlan; + bool found = false; int rc = 0; - if (!netif_running(bp->dev)) { - DP(NETIF_MSG_IFUP, - "Ignoring VLAN configuration the interface is down\n"); - return -EFAULT; - } - DP(NETIF_MSG_IFUP, "Removing VLAN %d\n", vid); - if (!bp->vlan_cnt) { - BNX2X_ERR("Unable to kill VLAN %d\n", vid); - return -EINVAL; - } - list_for_each_entry(vlan, &bp->vlan_reg, link) - if (vlan->vid == vid) + if (vlan->vid == vid) { + found = true; break; + } - if (vlan->vid != vid) { + if (!found) { BNX2X_ERR("Unable to kill VLAN %d - not found\n", vid); return -EINVAL; } - if (vlan->hw) + if (netif_running(dev) && vlan->hw) { rc = __bnx2x_vlan_configure_vid(bp, vid, false); + DP(NETIF_MSG_IFUP, "HW deconfigured for VLAN %d\n", vid); + bp->vlan_cnt--; + } list_del(&vlan->link); kfree(vlan); - bp->vlan_cnt--; - - if (bp->vlan_cnt <= bp->vlan_credit && bp->accept_any_vlan) { - /* Configure all non-configured entries */ - list_for_each_entry(vlan, &bp->vlan_reg, link) { - if (vlan->hw) - continue; - - rc = __bnx2x_vlan_configure_vid(bp, vlan->vid, true); - if (rc) { - BNX2X_ERR("Unable to config VLAN %d\n", - vlan->vid); - continue; - } - DP(NETIF_MSG_IFUP, "HW configured for VLAN %d\n", - vlan->vid); - vlan->hw = true; - } - DP(NETIF_MSG_IFUP, "Accept all VLAN Removed\n"); - bp->accept_any_vlan = false; - if (IS_PF(bp)) - bnx2x_set_rx_mode_inner(bp); - else - bnx2x_vfpf_storm_rx_mode(bp); - } + if (netif_running(dev)) + bnx2x_vlan_configure(bp, true); DP(NETIF_MSG_IFUP, "Removing VLAN result %d\n", rc); From fc100a7f89da85da8edd9c2e6f6e8b2490d74ae1 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 3 Jun 2016 19:19:20 +0200 Subject: [PATCH 35/85] soreuseport: Fix reuseport_bpf testcase on 32bit architectures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the following compiler warnings when compiling the reuseport_bpf testcase on a 32 bit platform: reuseport_bpf.c: In function ‘attach_ebpf’: reuseport_bpf.c:114:15: warning: cast from pointer to integer of ifferent size [-Wpointer-to-int-cast] Signed-off-by: Helge Deller Signed-off-by: David S. Miller --- tools/testing/selftests/net/reuseport_bpf.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c index 96ba386b1b7b..4a8217448f20 100644 --- a/tools/testing/selftests/net/reuseport_bpf.c +++ b/tools/testing/selftests/net/reuseport_bpf.c @@ -111,9 +111,9 @@ static void attach_ebpf(int fd, uint16_t mod) memset(&attr, 0, sizeof(attr)); attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; attr.insn_cnt = ARRAY_SIZE(prog); - attr.insns = (uint64_t)prog; - attr.license = (uint64_t)bpf_license; - attr.log_buf = (uint64_t)bpf_log_buf; + attr.insns = (unsigned long) &prog; + attr.license = (unsigned long) &bpf_license; + attr.log_buf = (unsigned long) &bpf_log_buf; attr.log_size = sizeof(bpf_log_buf); attr.log_level = 1; attr.kern_version = 0; @@ -351,8 +351,8 @@ static void test_filter_no_reuseport(const struct test_params p) memset(&eprog, 0, sizeof(eprog)); eprog.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; eprog.insn_cnt = ARRAY_SIZE(ecode); - eprog.insns = (uint64_t)ecode; - eprog.license = (uint64_t)bpf_license; + eprog.insns = (unsigned long) &ecode; + eprog.license = (unsigned long) &bpf_license; eprog.kern_version = 0; memset(&cprog, 0, sizeof(cprog)); From 1957598840f47d42bb0b7f8a871717a780708686 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 3 Jun 2016 23:49:17 +0200 Subject: [PATCH 36/85] soreuseport: add compat case for setsockopt SO_ATTACH_REUSEPORT_CBPF Commit 538950a1b752 ("soreuseport: setsockopt SO_ATTACH_REUSEPORT_[CE]BPF") missed to add the compat case for the SO_ATTACH_REUSEPORT_CBPF option. Signed-off-by: Helge Deller Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/compat.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/compat.c b/net/compat.c index 5cfd26a0006f..1373947efb50 100644 --- a/net/compat.c +++ b/net/compat.c @@ -354,7 +354,8 @@ static int do_set_sock_timeout(struct socket *sock, int level, static int compat_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) { - if (optname == SO_ATTACH_FILTER) + if (optname == SO_ATTACH_FILTER || + optname == SO_ATTACH_REUSEPORT_CBPF) return do_set_attach_filter(sock, level, optname, optval, optlen); if (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO) From a27758ffaf96f89002129eedb2cc172d254099f8 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Fri, 3 Jun 2016 15:05:57 -0700 Subject: [PATCH 37/85] net_sched: keep backlog updated with qlen For gso_skb we only update qlen, backlog should be updated too. Note, it is correct to just update these stats at one layer, because the gso_skb is cached there. Reported-by: Stas Nichiporovich Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/sch_generic.h | 5 ++++- net/sched/sch_generic.c | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index a1fd76c22a59..6803af17dfcf 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -691,9 +691,11 @@ static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch) /* we can reuse ->gso_skb because peek isn't called for root qdiscs */ if (!sch->gso_skb) { sch->gso_skb = sch->dequeue(sch); - if (sch->gso_skb) + if (sch->gso_skb) { /* it's still part of the queue */ + qdisc_qstats_backlog_inc(sch, sch->gso_skb); sch->q.qlen++; + } } return sch->gso_skb; @@ -706,6 +708,7 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) if (skb) { sch->gso_skb = NULL; + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; } else { skb = sch->dequeue(sch); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 269dd71b3828..f9e0e9c03d0a 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -49,6 +49,7 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { q->gso_skb = skb; q->qstats.requeues++; + qdisc_qstats_backlog_inc(q, skb); q->q.qlen++; /* it's still part of the queue */ __netif_schedule(q); @@ -92,6 +93,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, txq = skb_get_tx_queue(txq->dev, skb); if (!netif_xmit_frozen_or_stopped(txq)) { q->gso_skb = NULL; + qdisc_qstats_backlog_dec(q, skb); q->q.qlen--; } else skb = NULL; From 5b6c1b4d46b0dae4edea636a776d09f2064f4cd7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 4 Jun 2016 20:50:59 +0200 Subject: [PATCH 38/85] bpf, trace: use READ_ONCE for retrieving file ptr In bpf_perf_event_read() and bpf_perf_event_output(), we must use READ_ONCE() for fetching the struct file pointer, which could get updated concurrently, so we must prevent the compiler from potential refetching. We already do this with tail calls for fetching the related bpf_prog, but not so on stored perf events. Semantics for both are the same with regards to updates. Fixes: a43eec304259 ("bpf: introduce bpf_perf_event_output() helper") Fixes: 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/trace/bpf_trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 780bcbe1d4de..720b7bb01d43 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -198,7 +198,7 @@ static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5) if (unlikely(index >= array->map.max_entries)) return -E2BIG; - file = (struct file *)array->ptrs[index]; + file = READ_ONCE(array->ptrs[index]); if (unlikely(!file)) return -ENOENT; @@ -247,7 +247,7 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size) if (unlikely(index >= array->map.max_entries)) return -E2BIG; - file = (struct file *)array->ptrs[index]; + file = READ_ONCE(array->ptrs[index]); if (unlikely(!file)) return -ENOENT; From 80e509db54c81247b32fcb75bb1730fc789b893d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Jun 2016 12:55:13 -0700 Subject: [PATCH 39/85] fq_codel: fix NET_XMIT_CN behavior My prior attempt to fix the backlogs of parents failed. If we return NET_XMIT_CN, our parents wont increase their backlog, so our qdisc_tree_reduce_backlog() should take this into account. v2: Florian Westphal pointed out that we could drop the packet, so we need to save qdisc_pkt_len(skb) in a temp variable before calling fq_codel_drop() Fixes: 9d18562a2278 ("fq_codel: add batch ability to fq_codel_drop()") Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Reported-by: Stas Nichiporovich Signed-off-by: Eric Dumazet Cc: WANG Cong Cc: Jamal Hadi Salim Acked-by: Jamal Hadi Salim Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_fq_codel.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 6883a8971562..fff7867f4a4f 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -199,6 +199,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) unsigned int idx, prev_backlog, prev_qlen; struct fq_codel_flow *flow; int uninitialized_var(ret); + unsigned int pkt_len; bool memory_limited; idx = fq_codel_classify(skb, sch, &ret); @@ -230,6 +231,8 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) prev_backlog = sch->qstats.backlog; prev_qlen = sch->q.qlen; + /* save this packet length as it might be dropped by fq_codel_drop() */ + pkt_len = qdisc_pkt_len(skb); /* fq_codel_drop() is quite expensive, as it performs a linear search * in q->backlogs[] to find a fat flow. * So instead of dropping a single packet, drop half of its backlog @@ -237,14 +240,23 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) */ ret = fq_codel_drop(sch, q->drop_batch_size); - q->drop_overlimit += prev_qlen - sch->q.qlen; + prev_qlen -= sch->q.qlen; + prev_backlog -= sch->qstats.backlog; + q->drop_overlimit += prev_qlen; if (memory_limited) - q->drop_overmemory += prev_qlen - sch->q.qlen; - /* As we dropped packet(s), better let upper stack know this */ - qdisc_tree_reduce_backlog(sch, prev_qlen - sch->q.qlen, - prev_backlog - sch->qstats.backlog); + q->drop_overmemory += prev_qlen; - return ret == idx ? NET_XMIT_CN : NET_XMIT_SUCCESS; + /* As we dropped packet(s), better let upper stack know this. + * If we dropped a packet for this flow, return NET_XMIT_CN, + * but in this case, our parents wont increase their backlogs. + */ + if (ret == idx) { + qdisc_tree_reduce_backlog(sch, prev_qlen - 1, + prev_backlog - pkt_len); + return NET_XMIT_CN; + } + qdisc_tree_reduce_backlog(sch, prev_qlen, prev_backlog); + return NET_XMIT_SUCCESS; } /* This is the specific function called from codel_dequeue() From 335b48d980f631fbc5b233cbb3625ac0c86d67cb Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Sat, 4 Jun 2016 13:59:58 -0700 Subject: [PATCH 40/85] RDS: TCP: Add/use rds_tcp_reset_callbacks to reset tcp socket safely When rds_tcp_accept_one() has to replace the existing tcp socket with a newer tcp socket (duelling-syn resolution), it must lock_sock() to suppress the rds_tcp_data_recv() path while callbacks are being changed. Also, existing RDS datagram reassembly state must be reset, so that the next datagram on the new socket does not have corrupted state. Similarly when resetting the newly accepted socket, appropriate locks and synchronization is needed. This commit ensures correct synchronization by invoking kernel_sock_shutdown to reset a newly accepted sock, and by taking appropriate lock_sock()s (for old and new sockets) when resetting existing callbacks. Signed-off-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/tcp.c | 65 ++++++++++++++++++++++++++++++++++++++++++-- net/rds/tcp.h | 1 + net/rds/tcp_listen.c | 13 +++------ 3 files changed, 67 insertions(+), 12 deletions(-) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 86187dad1440..8faa0b1ae39d 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -126,9 +126,68 @@ void rds_tcp_restore_callbacks(struct socket *sock, } /* - * This is the only path that sets tc->t_sock. Send and receive trust that - * it is set. The RDS_CONN_UP bit protects those paths from being - * called while it isn't set. + * rds_tcp_reset_callbacks() switches the to the new sock and + * returns the existing tc->t_sock. + * + * The only functions that set tc->t_sock are rds_tcp_set_callbacks + * and rds_tcp_reset_callbacks. Send and receive trust that + * it is set. The absence of RDS_CONN_UP bit protects those paths + * from being called while it isn't set. + */ +void rds_tcp_reset_callbacks(struct socket *sock, + struct rds_connection *conn) +{ + struct rds_tcp_connection *tc = conn->c_transport_data; + struct socket *osock = tc->t_sock; + + if (!osock) + goto newsock; + + /* Need to resolve a duelling SYN between peers. + * We have an outstanding SYN to this peer, which may + * potentially have transitioned to the RDS_CONN_UP state, + * so we must quiesce any send threads before resetting + * c_transport_data. We quiesce these threads by setting + * cp_state to something other than RDS_CONN_UP, and then + * waiting for any existing threads in rds_send_xmit to + * complete release_in_xmit(). (Subsequent threads entering + * rds_send_xmit() will bail on !rds_conn_up(). + */ + lock_sock(osock->sk); + /* reset receive side state for rds_tcp_data_recv() for osock */ + if (tc->t_tinc) { + rds_inc_put(&tc->t_tinc->ti_inc); + tc->t_tinc = NULL; + } + tc->t_tinc_hdr_rem = sizeof(struct rds_header); + tc->t_tinc_data_rem = 0; + tc->t_sock = NULL; + + write_lock_bh(&osock->sk->sk_callback_lock); + + osock->sk->sk_user_data = NULL; + osock->sk->sk_data_ready = tc->t_orig_data_ready; + osock->sk->sk_write_space = tc->t_orig_write_space; + osock->sk->sk_state_change = tc->t_orig_state_change; + write_unlock_bh(&osock->sk->sk_callback_lock); + release_sock(osock->sk); + sock_release(osock); +newsock: + lock_sock(sock->sk); + write_lock_bh(&sock->sk->sk_callback_lock); + tc->t_sock = sock; + sock->sk->sk_user_data = conn; + sock->sk->sk_data_ready = rds_tcp_data_ready; + sock->sk->sk_write_space = rds_tcp_write_space; + sock->sk->sk_state_change = rds_tcp_state_change; + + write_unlock_bh(&sock->sk->sk_callback_lock); + release_sock(sock->sk); +} + +/* Add tc to rds_tcp_tc_list and set tc->t_sock. See comments + * above rds_tcp_reset_callbacks for notes about synchronization + * with data path */ void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn) { diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 41c228300525..ec0602b0dc24 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -50,6 +50,7 @@ struct rds_tcp_statistics { void rds_tcp_tune(struct socket *sock); void rds_tcp_nonagle(struct socket *sock); void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn); +void rds_tcp_reset_callbacks(struct socket *sock, struct rds_connection *conn); void rds_tcp_restore_callbacks(struct socket *sock, struct rds_tcp_connection *tc); u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc); diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 4bf4befe5066..d9fe53675d95 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -78,7 +78,6 @@ int rds_tcp_accept_one(struct socket *sock) struct inet_sock *inet; struct rds_tcp_connection *rs_tcp = NULL; int conn_state; - struct sock *nsk; if (!sock) /* module unload or netns delete in progress */ return -ENETUNREACH; @@ -139,23 +138,19 @@ int rds_tcp_accept_one(struct socket *sock) atomic_set(&conn->c_state, RDS_CONN_CONNECTING); wait_event(conn->c_waitq, !test_bit(RDS_IN_XMIT, &conn->c_flags)); - rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp); + rds_tcp_reset_callbacks(new_sock, conn); conn->c_outgoing = 0; } + } else { + rds_tcp_set_callbacks(new_sock, conn); } - rds_tcp_set_callbacks(new_sock, conn); rds_connect_complete(conn); /* marks RDS_CONN_UP */ new_sock = NULL; ret = 0; goto out; rst_nsk: /* reset the newly returned accept sock and bail */ - nsk = new_sock->sk; - rds_tcp_stats_inc(s_tcp_listen_closed_stale); - nsk->sk_user_data = NULL; - nsk->sk_prot->disconnect(nsk, 0); - tcp_done(nsk); - new_sock = NULL; + kernel_sock_shutdown(new_sock, SHUT_RDWR); ret = 0; out: if (rs_tcp) From 0b6f760cff04a7cdfafc3ec6915e91fed0533d8d Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Sat, 4 Jun 2016 13:59:59 -0700 Subject: [PATCH 41/85] RDS: TCP: Retransmit half-sent datagrams when switching sockets in rds_tcp_reset_callbacks When we switch a connection's sockets in rds_tcp_rest_callbacks, any partially sent datagram must be retransmitted on the new socket so that the receiver can correctly reassmble the RDS datagram. Use rds_send_reset() which is designed for this purpose. Signed-off-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/send.c | 1 + net/rds/tcp.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/rds/send.c b/net/rds/send.c index c9cdb358ea88..b1962f8e30f7 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -99,6 +99,7 @@ void rds_send_reset(struct rds_connection *conn) list_splice_init(&conn->c_retrans, &conn->c_send_queue); spin_unlock_irqrestore(&conn->c_lock, flags); } +EXPORT_SYMBOL_GPL(rds_send_reset); static int acquire_in_xmit(struct rds_connection *conn) { diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 8faa0b1ae39d..7ab1b41ffc88 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -173,6 +173,7 @@ void rds_tcp_reset_callbacks(struct socket *sock, release_sock(osock->sk); sock_release(osock); newsock: + rds_send_reset(conn); lock_sock(sock->sk); write_lock_bh(&sock->sk->sk_callback_lock); tc->t_sock = sock; From 9c79440e2c5e2518879f1599270f64c3ddda3baf Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Sat, 4 Jun 2016 14:00:00 -0700 Subject: [PATCH 42/85] RDS: TCP: fix race windows in send-path quiescence by rds_tcp_accept_one() The send path needs to be quiesced before resetting callbacks from rds_tcp_accept_one(), and commit eb192840266f ("RDS:TCP: Synchronize rds_tcp_accept_one with rds_send_xmit when resetting t_sock") achieves this using the c_state and RDS_IN_XMIT bit following the pattern used by rds_conn_shutdown(). However this leaves the possibility of a race window as shown in the sequence below take t_conn_lock in rds_tcp_conn_connect send outgoing syn to peer drop t_conn_lock in rds_tcp_conn_connect incoming from peer triggers rds_tcp_accept_one, conn is marked CONNECTING wait for RDS_IN_XMIT to quiesce any rds_send_xmit threads call rds_tcp_reset_callbacks [.. race-window where incoming syn-ack can cause the conn to be marked UP from rds_tcp_state_change ..] lock_sock called from rds_tcp_reset_callbacks, and we set t_sock to null As soon as the conn is marked UP in the race-window above, rds_send_xmit() threads will proceed to rds_tcp_xmit and may encounter a null-pointer deref on the t_sock. Given that rds_tcp_state_change() is invoked in softirq context, whereas rds_tcp_reset_callbacks() is in workq context, and testing for RDS_IN_XMIT after lock_sock could result in a deadlock with tcp_sendmsg, this commit fixes the race by using a new c_state, RDS_TCP_RESETTING, which will prevent a transition to RDS_CONN_UP from rds_tcp_state_change(). Signed-off-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/rds.h | 2 ++ net/rds/tcp.c | 14 +++++++++++++- net/rds/tcp_connect.c | 2 +- net/rds/tcp_listen.c | 7 +++---- net/rds/threads.c | 10 ++++++++-- 5 files changed, 27 insertions(+), 8 deletions(-) diff --git a/net/rds/rds.h b/net/rds/rds.h index 80256b08eac0..387df5f32e49 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -74,6 +74,7 @@ enum { RDS_CONN_CONNECTING, RDS_CONN_DISCONNECTING, RDS_CONN_UP, + RDS_CONN_RESETTING, RDS_CONN_ERROR, }; @@ -813,6 +814,7 @@ void rds_connect_worker(struct work_struct *); void rds_shutdown_worker(struct work_struct *); void rds_send_worker(struct work_struct *); void rds_recv_worker(struct work_struct *); +void rds_connect_path_complete(struct rds_connection *conn, int curr); void rds_connect_complete(struct rds_connection *conn); /* transport.c */ diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 7ab1b41ffc88..74ee126a6fe6 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -148,11 +148,23 @@ void rds_tcp_reset_callbacks(struct socket *sock, * potentially have transitioned to the RDS_CONN_UP state, * so we must quiesce any send threads before resetting * c_transport_data. We quiesce these threads by setting - * cp_state to something other than RDS_CONN_UP, and then + * c_state to something other than RDS_CONN_UP, and then * waiting for any existing threads in rds_send_xmit to * complete release_in_xmit(). (Subsequent threads entering * rds_send_xmit() will bail on !rds_conn_up(). + * + * However an incoming syn-ack at this point would end up + * marking the conn as RDS_CONN_UP, and would again permit + * rds_send_xmi() threads through, so ideally we would + * synchronize on RDS_CONN_UP after lock_sock(), but cannot + * do that: waiting on !RDS_IN_XMIT after lock_sock() may + * end up deadlocking with tcp_sendmsg(), and the RDS_IN_XMIT + * would not get set. As a result, we set c_state to + * RDS_CONN_RESETTTING, to ensure that rds_tcp_state_change + * cannot mark rds_conn_path_up() in the window before lock_sock() */ + atomic_set(&conn->c_state, RDS_CONN_RESETTING); + wait_event(conn->c_waitq, !test_bit(RDS_IN_XMIT, &conn->c_flags)); lock_sock(osock->sk); /* reset receive side state for rds_tcp_data_recv() for osock */ if (tc->t_tinc) { diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index fb82e0a0bf89..fba13d0305fb 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -60,7 +60,7 @@ void rds_tcp_state_change(struct sock *sk) case TCP_SYN_RECV: break; case TCP_ESTABLISHED: - rds_connect_complete(conn); + rds_connect_path_complete(conn, RDS_CONN_CONNECTING); break; case TCP_CLOSE_WAIT: case TCP_CLOSE: diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index d9fe53675d95..686b1d03a558 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -135,16 +135,15 @@ int rds_tcp_accept_one(struct socket *sock) !conn->c_outgoing) { goto rst_nsk; } else { - atomic_set(&conn->c_state, RDS_CONN_CONNECTING); - wait_event(conn->c_waitq, - !test_bit(RDS_IN_XMIT, &conn->c_flags)); rds_tcp_reset_callbacks(new_sock, conn); conn->c_outgoing = 0; + /* rds_connect_path_complete() marks RDS_CONN_UP */ + rds_connect_path_complete(conn, RDS_CONN_DISCONNECTING); } } else { rds_tcp_set_callbacks(new_sock, conn); + rds_connect_path_complete(conn, RDS_CONN_CONNECTING); } - rds_connect_complete(conn); /* marks RDS_CONN_UP */ new_sock = NULL; ret = 0; goto out; diff --git a/net/rds/threads.c b/net/rds/threads.c index 454aa6d23327..4a323045719b 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -71,9 +71,9 @@ struct workqueue_struct *rds_wq; EXPORT_SYMBOL_GPL(rds_wq); -void rds_connect_complete(struct rds_connection *conn) +void rds_connect_path_complete(struct rds_connection *conn, int curr) { - if (!rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_UP)) { + if (!rds_conn_transition(conn, curr, RDS_CONN_UP)) { printk(KERN_WARNING "%s: Cannot transition to state UP, " "current state is %d\n", __func__, @@ -90,6 +90,12 @@ void rds_connect_complete(struct rds_connection *conn) queue_delayed_work(rds_wq, &conn->c_send_w, 0); queue_delayed_work(rds_wq, &conn->c_recv_w, 0); } +EXPORT_SYMBOL_GPL(rds_connect_path_complete); + +void rds_connect_complete(struct rds_connection *conn) +{ + rds_connect_path_complete(conn, RDS_CONN_CONNECTING); +} EXPORT_SYMBOL_GPL(rds_connect_complete); /* From fa54cc70ed2eddd96773ffdf4d96d23ea7483e56 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Sun, 5 Jun 2016 07:17:19 +0800 Subject: [PATCH 43/85] rxrpc: fix ptr_ret.cocci warnings net/rxrpc/rxkad.c:1165:1-3: WARNING: PTR_ERR_OR_ZERO can be used Use PTR_ERR_OR_ZERO rather than if(IS_ERR(...)) + PTR_ERR Generated by: scripts/coccinelle/api/ptr_ret.cocci CC: David Howells Signed-off-by: Fengguang Wu Signed-off-by: David S. Miller --- net/rxrpc/rxkad.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 6b726a046a7d..bab56ed649ba 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -1162,9 +1162,7 @@ static int rxkad_init(void) /* pin the cipher we need so that the crypto layer doesn't invoke * keventd to go get it */ rxkad_ci = crypto_alloc_skcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(rxkad_ci)) - return PTR_ERR(rxkad_ci); - return 0; + return PTR_ERR_OR_ZERO(rxkad_ci); } /* From b9a8460a08a1e0150073cda3e7a0dd23cb888052 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 6 Jun 2016 02:37:14 -0400 Subject: [PATCH 44/85] bnxt_en: Fix tx push race condition. Set the is_push flag in the software BD before the tx data is pushed to the chip. It is possible to get the tx interrupt as soon as the tx data is pushed. The tx handler will not handle the event properly if the is_push flag is not set and it will crash. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 72a2efff8e49..4615ed4311e8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -286,7 +286,9 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) cpu_to_le32(DB_KEY_TX_PUSH | DB_LONG_TX_PUSH | prod); txr->tx_prod = prod; + tx_buf->is_push = 1; netdev_tx_sent_queue(txq, skb->len); + wmb(); /* Sync is_push and byte queue before pushing data */ push_len = (length + sizeof(*tx_push) + 7) / 8; if (push_len > 16) { @@ -298,7 +300,6 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) push_len); } - tx_buf->is_push = 1; goto tx_done; } From 5a9f6b238e59bc05afb4cdeaf3672990bf2a5309 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 6 Jun 2016 02:37:15 -0400 Subject: [PATCH 45/85] bnxt_en: Enable and disable RX CTAG and RX STAG VLAN acceleration together. The hardware can only be set to strip or not strip both the VLAN CTAG and STAG. It cannot strip one and not strip the other. Add logic to bnxt_fix_features() to toggle both feature flags when the user is toggling one of them. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4615ed4311e8..ae2b2646604a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5467,6 +5467,20 @@ static netdev_features_t bnxt_fix_features(struct net_device *dev, if (!bnxt_rfs_capable(bp)) features &= ~NETIF_F_NTUPLE; + + /* Both CTAG and STAG VLAN accelaration on the RX side have to be + * turned on or off together. + */ + if ((features & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX)) != + (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX)) { + if (dev->features & NETIF_F_HW_VLAN_CTAG_RX) + features &= ~(NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_STAG_RX); + else + features |= NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_STAG_RX; + } + return features; } From 8852ddb4dcdfe6f877a02f79bf2bca9ae63c039a Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 6 Jun 2016 02:37:16 -0400 Subject: [PATCH 46/85] bnxt_en: Simplify VLAN receive logic. Since both CTAG and STAG rx acceleration must be enabled together, we only need to check one feature flag (NETIF_F_HW_VLAN_CTAG_RX) before calling __vlan_hwaccel_put_tag(). Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 29 +++++++---------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index ae2b2646604a..c777cde85ce4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1113,19 +1113,13 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, if (tpa_info->hash_type != PKT_HASH_TYPE_NONE) skb_set_hash(skb, tpa_info->rss_hash, tpa_info->hash_type); - if (tpa_info->flags2 & RX_CMP_FLAGS2_META_FORMAT_VLAN) { - netdev_features_t features = skb->dev->features; + if ((tpa_info->flags2 & RX_CMP_FLAGS2_META_FORMAT_VLAN) && + (skb->dev->features & NETIF_F_HW_VLAN_CTAG_RX)) { u16 vlan_proto = tpa_info->metadata >> RX_CMP_FLAGS2_METADATA_TPID_SFT; + u16 vtag = tpa_info->metadata & RX_CMP_FLAGS2_METADATA_VID_MASK; - if (((features & NETIF_F_HW_VLAN_CTAG_RX) && - vlan_proto == ETH_P_8021Q) || - ((features & NETIF_F_HW_VLAN_STAG_RX) && - vlan_proto == ETH_P_8021AD)) { - __vlan_hwaccel_put_tag(skb, htons(vlan_proto), - tpa_info->metadata & - RX_CMP_FLAGS2_METADATA_VID_MASK); - } + __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag); } skb_checksum_none_assert(skb); @@ -1278,19 +1272,14 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons, skb->protocol = eth_type_trans(skb, dev); - if (rxcmp1->rx_cmp_flags2 & - cpu_to_le32(RX_CMP_FLAGS2_META_FORMAT_VLAN)) { - netdev_features_t features = skb->dev->features; + if ((rxcmp1->rx_cmp_flags2 & + cpu_to_le32(RX_CMP_FLAGS2_META_FORMAT_VLAN)) && + (skb->dev->features & NETIF_F_HW_VLAN_CTAG_RX)) { u32 meta_data = le32_to_cpu(rxcmp1->rx_cmp_meta_data); + u16 vtag = meta_data & RX_CMP_FLAGS2_METADATA_VID_MASK; u16 vlan_proto = meta_data >> RX_CMP_FLAGS2_METADATA_TPID_SFT; - if (((features & NETIF_F_HW_VLAN_CTAG_RX) && - vlan_proto == ETH_P_8021Q) || - ((features & NETIF_F_HW_VLAN_STAG_RX) && - vlan_proto == ETH_P_8021AD)) - __vlan_hwaccel_put_tag(skb, htons(vlan_proto), - meta_data & - RX_CMP_FLAGS2_METADATA_VID_MASK); + __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag); } skb_checksum_none_assert(skb); From 9f647a6de9926f4844feb8c5e21c78d4e61c55ab Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 6 Jun 2016 09:21:44 +0100 Subject: [PATCH 47/85] net: fec: fix spelling mistakes and add missing newline trivial fix to spelling mistakes and add missing newline in pr_err messages Signed-off-by: Colin Ian King Acked-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 3c0255e98535..fea0f330ddbd 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2416,24 +2416,24 @@ fec_enet_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *ec) return -EOPNOTSUPP; if (ec->rx_max_coalesced_frames > 255) { - pr_err("Rx coalesced frames exceed hardware limiation"); + pr_err("Rx coalesced frames exceed hardware limitation\n"); return -EINVAL; } if (ec->tx_max_coalesced_frames > 255) { - pr_err("Tx coalesced frame exceed hardware limiation"); + pr_err("Tx coalesced frame exceed hardware limitation\n"); return -EINVAL; } cycle = fec_enet_us_to_itr_clock(ndev, fep->rx_time_itr); if (cycle > 0xFFFF) { - pr_err("Rx coalesed usec exceeed hardware limiation"); + pr_err("Rx coalesced usec exceed hardware limitation\n"); return -EINVAL; } cycle = fec_enet_us_to_itr_clock(ndev, fep->tx_time_itr); if (cycle > 0xFFFF) { - pr_err("Rx coalesed usec exceeed hardware limiation"); + pr_err("Rx coalesced usec exceed hardware limitation\n"); return -EINVAL; } From 7b01b8e847d00cf9cf0c2c3aa8fdfc4126dca024 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 6 Jun 2016 16:08:41 +0100 Subject: [PATCH 48/85] gtp: #define _UAPI_LINUX_GTP_H_ and not _UAPI_LINUX_GTP_H__ Fix clang build warning: ./include/uapi/linux/gtp.h:1:9: warning: '_UAPI_LINUX_GTP_H_' is used as a header guard here, followed by #define of a different macro [-Wheader-guard] fix by defining _UAPI_LINUX_GTP_H_ and not _UAPI_LINUX_GTP_H__ Signed-off-by: Colin Ian King Acked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/uapi/linux/gtp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/gtp.h b/include/uapi/linux/gtp.h index ca1054dd8249..72a04a0e8cce 100644 --- a/include/uapi/linux/gtp.h +++ b/include/uapi/linux/gtp.h @@ -1,5 +1,5 @@ #ifndef _UAPI_LINUX_GTP_H_ -#define _UAPI_LINUX_GTP_H__ +#define _UAPI_LINUX_GTP_H_ enum gtp_genl_cmds { GTP_CMD_NEWPDP, From 1a0f7d2984f3864e64a43714b4a0999b5a27cff5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 6 Jun 2016 16:16:47 +0100 Subject: [PATCH 49/85] net: cls_u32: fix error code for invalid flags 'err' variable is not set in this test, we would return whatever previous test set 'err' to. Signed-off-by: Jakub Kicinski Acked-by: Sridhar Samudrala Acked-by: John Fastabend Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 079b43b3c5d2..b17e090f2fe1 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -863,7 +863,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, if (tb[TCA_U32_FLAGS]) { flags = nla_get_u32(tb[TCA_U32_FLAGS]); if (!tc_flags_valid(flags)) - return err; + return -EINVAL; } n = (struct tc_u_knode *)*arg; From d47a0f387fe907bdb0430a398850c1cb80eb7def Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 6 Jun 2016 16:16:48 +0100 Subject: [PATCH 50/85] net: cls_u32: be more strict about skip-sw flag Return an error if user requested skip-sw and the underlaying hardware cannot handle tc offloads (or offloads are disabled). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index b17e090f2fe1..fe05449537a3 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -457,20 +457,21 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_to_netdev offload; int err; + if (!tc_should_offload(dev, flags)) + return tc_skip_sw(flags) ? -EINVAL : 0; + offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; - if (tc_should_offload(dev, flags)) { - offload.cls_u32->command = TC_CLSU32_NEW_HNODE; - offload.cls_u32->hnode.divisor = h->divisor; - offload.cls_u32->hnode.handle = h->handle; - offload.cls_u32->hnode.prio = h->prio; + offload.cls_u32->command = TC_CLSU32_NEW_HNODE; + offload.cls_u32->hnode.divisor = h->divisor; + offload.cls_u32->hnode.handle = h->handle; + offload.cls_u32->hnode.prio = h->prio; - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->protocol, &offload); - if (tc_skip_sw(flags)) - return err; - } + err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->protocol, &offload); + if (tc_skip_sw(flags)) + return err; return 0; } From aafddbf0cffeb790f919436285328c762279b5d4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Jun 2016 09:12:39 -0700 Subject: [PATCH 51/85] fq_codel: return non zero qlen in class dumps We properly scan the flow list to count number of packets, but John passed 0 to gnet_stats_copy_queue() so we report a zero value to user space instead of the result. Fixes: 640158536632 ("net: sched: restrict use of qstats qlen") Signed-off-by: Eric Dumazet Cc: John Fastabend Acked-by: John Fastabend Signed-off-by: David S. Miller --- net/sched/sch_fq_codel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index fff7867f4a4f..da250b2e06ae 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -661,7 +661,7 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl, qs.backlog = q->backlogs[idx]; qs.drops = flow->dropped; } - if (gnet_stats_copy_queue(d, NULL, &qs, 0) < 0) + if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0) return -1; if (idx < q->flows_cnt) return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); From a03e6fe569713fb3ff0714f8fd7c8785c0ca9e22 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 6 Jun 2016 09:54:30 -0700 Subject: [PATCH 52/85] act_police: fix a crash during removal The police action is using its own code to initialize tcf hash info, which makes us to forgot to initialize a->hinfo correctly. Fix this by calling the helper function tcf_hash_create() directly. This patch fixed the following crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000028 IP: [] __lock_acquire+0xd3/0xf91 PGD d3c34067 PUD d3e18067 PMD 0 Oops: 0000 [#1] SMP CPU: 2 PID: 853 Comm: tc Not tainted 4.6.0+ #87 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: ffff8800d3e28040 ti: ffff8800d3f6c000 task.ti: ffff8800d3f6c000 RIP: 0010:[] [] __lock_acquire+0xd3/0xf91 RSP: 0000:ffff88011b203c80 EFLAGS: 00010002 RAX: 0000000000000046 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000028 RBP: ffff88011b203d40 R08: 0000000000000001 R09: 0000000000000000 R10: ffff88011b203d58 R11: ffff88011b208000 R12: 0000000000000001 R13: ffff8800d3e28040 R14: 0000000000000028 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88011b200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000028 CR3: 00000000d4be1000 CR4: 00000000000006e0 Stack: ffff8800d3e289c0 0000000000000046 000000001b203d60 ffffffff00000000 0000000000000000 ffff880000000000 0000000000000000 ffffffff00000000 ffffffff8187142c ffff88011b203ce8 ffff88011b203ce8 ffffffff8101dbfc Call Trace: [] ? __tcf_hash_release+0x77/0xd1 [] ? native_sched_clock+0x1a/0x35 [] ? native_sched_clock+0x1a/0x35 [] ? sched_clock_local+0x11/0x78 [] ? mark_lock+0x24/0x201 [] lock_acquire+0x120/0x1b4 [] ? lock_acquire+0x120/0x1b4 [] ? __tcf_hash_release+0x77/0xd1 [] _raw_spin_lock_bh+0x3c/0x72 [] ? __tcf_hash_release+0x77/0xd1 [] __tcf_hash_release+0x77/0xd1 [] tcf_action_destroy+0x49/0x7c [] tcf_exts_destroy+0x20/0x2d [] u32_destroy_key+0x1b/0x4d [] u32_delete_key_freepf_rcu+0x1b/0x1d [] rcu_process_callbacks+0x610/0x82e [] ? u32_destroy_key+0x4d/0x4d [] __do_softirq+0x191/0x3f4 Fixes: ddf97ccdd7cb ("net_sched: add network namespace support for tc actions") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_police.c | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index b884dae692a1..c557789765dc 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -38,7 +38,7 @@ struct tcf_police { bool peak_present; }; #define to_police(pc) \ - container_of(pc, struct tcf_police, common) + container_of(pc->priv, struct tcf_police, common) #define POL_TAB_MASK 15 @@ -119,14 +119,12 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action *a, int ovr, int bind) { - unsigned int h; int ret = 0, err; struct nlattr *tb[TCA_POLICE_MAX + 1]; struct tc_police *parm; struct tcf_police *police; struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; struct tc_action_net *tn = net_generic(net, police_net_id); - struct tcf_hashinfo *hinfo = tn->hinfo; int size; if (nla == NULL) @@ -145,7 +143,7 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla, if (parm->index) { if (tcf_hash_search(tn, a, parm->index)) { - police = to_police(a->priv); + police = to_police(a); if (bind) { police->tcf_bindcnt += 1; police->tcf_refcnt += 1; @@ -156,16 +154,15 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla, /* not replacing */ return -EEXIST; } + } else { + ret = tcf_hash_create(tn, parm->index, NULL, a, + sizeof(*police), bind, false); + if (ret) + return ret; + ret = ACT_P_CREATED; } - police = kzalloc(sizeof(*police), GFP_KERNEL); - if (police == NULL) - return -ENOMEM; - ret = ACT_P_CREATED; - police->tcf_refcnt = 1; - spin_lock_init(&police->tcf_lock); - if (bind) - police->tcf_bindcnt = 1; + police = to_police(a); override: if (parm->rate.rate) { err = -ENOMEM; @@ -237,16 +234,8 @@ override: return ret; police->tcfp_t_c = ktime_get_ns(); - police->tcf_index = parm->index ? parm->index : - tcf_hash_new_index(tn); - police->tcf_tm.install = jiffies; - police->tcf_tm.lastuse = jiffies; - h = tcf_hash(police->tcf_index, POL_TAB_MASK); - spin_lock_bh(&hinfo->lock); - hlist_add_head(&police->tcf_head, &hinfo->htab[h]); - spin_unlock_bh(&hinfo->lock); + tcf_hash_insert(tn, a); - a->priv = police; return ret; failure_unlock: @@ -255,7 +244,7 @@ failure: qdisc_put_rtab(P_tab); qdisc_put_rtab(R_tab); if (ret == ACT_P_CREATED) - kfree(police); + tcf_hash_cleanup(a, est); return err; } From 92c075dbdeed02bdf293cb0f513bad70aa714b8d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 6 Jun 2016 22:50:39 +0200 Subject: [PATCH 53/85] net: sched: fix tc_should_offload for specific clsact classes When offloading classifiers such as u32 or flower to hardware, and the qdisc is clsact (TC_H_CLSACT), then we need to differentiate its classes, since not all of them handle ingress, therefore we must leave those in software path. Add a .tcf_cl_offload() callback, so we can generically handle them, tested on ixgbe. Fixes: 10cbc6843446 ("net/sched: cls_flower: Hardware offloaded filters statistics support") Fixes: 5b33f48842fa ("net/flower: Introduce hardware offload support") Fixes: a1b7c5fd7fe9 ("net: sched: add cls_u32 offload hooks for netdevs") Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 10 +++++++--- include/net/sch_generic.h | 1 + net/sched/cls_flower.c | 6 +++--- net/sched/cls_u32.c | 8 ++++---- net/sched/sch_ingress.c | 12 ++++++++++++ 5 files changed, 27 insertions(+), 10 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 0f7efa88f210..3722dda0199d 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -392,16 +392,20 @@ struct tc_cls_u32_offload { }; }; -static inline bool tc_should_offload(struct net_device *dev, u32 flags) +static inline bool tc_should_offload(const struct net_device *dev, + const struct tcf_proto *tp, u32 flags) { + const struct Qdisc *sch = tp->q; + const struct Qdisc_class_ops *cops = sch->ops->cl_ops; + if (!(dev->features & NETIF_F_HW_TC)) return false; - if (flags & TCA_CLS_FLAGS_SKIP_HW) return false; - if (!dev->netdev_ops->ndo_setup_tc) return false; + if (cops && cops->tcf_cl_offload) + return cops->tcf_cl_offload(tp->classid); return true; } diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 6803af17dfcf..62d553184e91 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -168,6 +168,7 @@ struct Qdisc_class_ops { /* Filter manipulation */ struct tcf_proto __rcu ** (*tcf_chain)(struct Qdisc *, unsigned long); + bool (*tcf_cl_offload)(u32 classid); unsigned long (*bind_tcf)(struct Qdisc *, unsigned long, u32 classid); void (*unbind_tcf)(struct Qdisc *, unsigned long); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 730aacafc22d..b3b7978f4182 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -171,7 +171,7 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, unsigned long cookie) struct tc_cls_flower_offload offload = {0}; struct tc_to_netdev tc; - if (!tc_should_offload(dev, 0)) + if (!tc_should_offload(dev, tp, 0)) return; offload.command = TC_CLSFLOWER_DESTROY; @@ -194,7 +194,7 @@ static void fl_hw_replace_filter(struct tcf_proto *tp, struct tc_cls_flower_offload offload = {0}; struct tc_to_netdev tc; - if (!tc_should_offload(dev, flags)) + if (!tc_should_offload(dev, tp, flags)) return; offload.command = TC_CLSFLOWER_REPLACE; @@ -216,7 +216,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) struct tc_cls_flower_offload offload = {0}; struct tc_to_netdev tc; - if (!tc_should_offload(dev, 0)) + if (!tc_should_offload(dev, tp, 0)) return; offload.command = TC_CLSFLOWER_STATS; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index fe05449537a3..27b99fd774d7 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -440,7 +440,7 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; - if (tc_should_offload(dev, 0)) { + if (tc_should_offload(dev, tp, 0)) { offload.cls_u32->command = TC_CLSU32_DELETE_KNODE; offload.cls_u32->knode.handle = handle; dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, @@ -457,7 +457,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_to_netdev offload; int err; - if (!tc_should_offload(dev, flags)) + if (!tc_should_offload(dev, tp, flags)) return tc_skip_sw(flags) ? -EINVAL : 0; offload.type = TC_SETUP_CLSU32; @@ -485,7 +485,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; - if (tc_should_offload(dev, 0)) { + if (tc_should_offload(dev, tp, 0)) { offload.cls_u32->command = TC_CLSU32_DELETE_HNODE; offload.cls_u32->hnode.divisor = h->divisor; offload.cls_u32->hnode.handle = h->handle; @@ -508,7 +508,7 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; - if (tc_should_offload(dev, flags)) { + if (tc_should_offload(dev, tp, flags)) { offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE; offload.cls_u32->knode.handle = n->handle; offload.cls_u32->knode.fshift = n->fshift; diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 10adbc617905..8fe6999b642a 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -27,6 +27,11 @@ static unsigned long ingress_get(struct Qdisc *sch, u32 classid) return TC_H_MIN(classid) + 1; } +static bool ingress_cl_offload(u32 classid) +{ + return true; +} + static unsigned long ingress_bind_filter(struct Qdisc *sch, unsigned long parent, u32 classid) { @@ -86,6 +91,7 @@ static const struct Qdisc_class_ops ingress_class_ops = { .put = ingress_put, .walk = ingress_walk, .tcf_chain = ingress_find_tcf, + .tcf_cl_offload = ingress_cl_offload, .bind_tcf = ingress_bind_filter, .unbind_tcf = ingress_put, }; @@ -110,6 +116,11 @@ static unsigned long clsact_get(struct Qdisc *sch, u32 classid) } } +static bool clsact_cl_offload(u32 classid) +{ + return TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_INGRESS); +} + static unsigned long clsact_bind_filter(struct Qdisc *sch, unsigned long parent, u32 classid) { @@ -158,6 +169,7 @@ static const struct Qdisc_class_ops clsact_class_ops = { .put = ingress_put, .walk = ingress_walk, .tcf_chain = clsact_find_tcf, + .tcf_cl_offload = clsact_cl_offload, .bind_tcf = clsact_bind_filter, .unbind_tcf = ingress_put, }; From ce3cf4ec0305919fc69a972f6c2b2efd35d36abc Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 6 Jun 2016 15:07:18 -0700 Subject: [PATCH 54/85] tcp: record TLP and ER timer stats in v6 stats The v6 tcp stats scan do not provide TLP and ER timer information correctly like the v4 version . This patch fixes that. Fixes: 6ba8a3b19e76 ("tcp: Tail loss probe (TLP)") Fixes: eed530b6c676 ("tcp: early retransmit") Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 79e33e02f11a..f36c2d076fce 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1721,7 +1721,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) destp = ntohs(inet->inet_dport); srcp = ntohs(inet->inet_sport); - if (icsk->icsk_pending == ICSK_TIME_RETRANS) { + if (icsk->icsk_pending == ICSK_TIME_RETRANS || + icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; timer_expires = icsk->icsk_timeout; } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { From 0b148def403153a4d1565f1640356cb78ce5109f Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Tue, 7 Jun 2016 19:14:17 +0900 Subject: [PATCH 55/85] bridge: Don't insert unnecessary local fdb entry on changing mac address The missing br_vlan_should_use() test caused creation of an unneeded local fdb entry on changing mac address of a bridge device when there is a vlan which is configured on a bridge port but not on the bridge device. Fixes: 2594e9064a57 ("bridge: vlan: add per-vlan struct and move to rhashtables") Signed-off-by: Toshiaki Makita Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index dcea4f4c62b3..c18080ad4085 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -279,6 +279,8 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) * change from under us. */ list_for_each_entry(v, &vg->vlan_list, vlist) { + if (!br_vlan_should_use(v)) + continue; f = __br_fdb_get(br, br->dev->dev_addr, v->vid); if (f && f->is_local && !f->dst) fdb_delete_local(br, NULL, f); From 88832a22d6bb50e3b5f9d5ecc6cf26707c35f322 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 7 Jun 2016 19:27:51 +0100 Subject: [PATCH 56/85] net-sysfs: fix missing The of_find_net_device_by_node() function is defined in but not included in the .c file that implements it. Fix the following warning by including the header: net/core/net-sysfs.c:1494:19: warning: symbol 'of_find_net_device_by_node' was not declared. Should it be static? Signed-off-by: Ben Dooks Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 2b3f76fe65f4..7a0b616557ab 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "net-sysfs.h" From c0530dd3ef1d3997d885e1345e34a284db2b9cfa Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Wed, 8 Jun 2016 14:57:28 +0530 Subject: [PATCH 57/85] cxgb4: Add device id of T540-BT adapter Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h index a2cdfc1261dc..50812a1d67bd 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h @@ -144,6 +144,7 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN CH_PCI_ID_TABLE_FENTRY(0x5015), /* T502-bt */ CH_PCI_ID_TABLE_FENTRY(0x5016), /* T580-OCP-SO */ CH_PCI_ID_TABLE_FENTRY(0x5017), /* T520-OCP-SO */ + CH_PCI_ID_TABLE_FENTRY(0x5018), /* T540-BT */ CH_PCI_ID_TABLE_FENTRY(0x5080), /* Custom T540-cr */ CH_PCI_ID_TABLE_FENTRY(0x5081), /* Custom T540-LL-cr */ CH_PCI_ID_TABLE_FENTRY(0x5082), /* Custom T504-cr */ From a5c5e2da8551eb69e5d5d09d51d526140b5db9fb Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 8 Jun 2016 12:59:17 +0200 Subject: [PATCH 58/85] l2tp: fix configuration passed to setup_udp_tunnel_sock() Unused fields of udp_cfg must be all zeros. Otherwise setup_udp_tunnel_sock() fills ->gro_receive and ->gro_complete callbacks with garbage, eventually resulting in panic when used by udp_gro_receive(). [ 72.694123] BUG: unable to handle kernel paging request at ffff880033f87d78 [ 72.695518] IP: [] 0xffff880033f87d78 [ 72.696530] PGD 26e2067 PUD 26e3067 PMD 342ed063 PTE 8000000033f87163 [ 72.696530] Oops: 0011 [#1] SMP KASAN [ 72.696530] Modules linked in: l2tp_ppp l2tp_netlink l2tp_core ip6_udp_tunnel udp_tunnel pptp gre pppox ppp_generic slhc crc32c_intel ghash_clmulni_intel jitterentropy_rng sha256_generic hmac drbg ansi_cprng aesni_intel evdev aes_x86_64 ablk_helper cryptd lrw gf128mul glue_helper serio_raw acpi_cpufreq button proc\ essor ext4 crc16 jbd2 mbcache virtio_blk virtio_net virtio_pci virtio_ring virtio [ 72.696530] CPU: 3 PID: 0 Comm: swapper/3 Not tainted 4.7.0-rc1 #1 [ 72.696530] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Debian-1.8.2-1 04/01/2014 [ 72.696530] task: ffff880035b59700 ti: ffff880035b70000 task.ti: ffff880035b70000 [ 72.696530] RIP: 0010:[] [] 0xffff880033f87d78 [ 72.696530] RSP: 0018:ffff880035f87bc0 EFLAGS: 00010246 [ 72.696530] RAX: ffffed000698f996 RBX: ffff88003326b840 RCX: ffffffff814cc823 [ 72.696530] RDX: ffff88003326b840 RSI: ffff880033e48038 RDI: ffff880034c7c780 [ 72.696530] RBP: ffff880035f87c18 R08: 000000000000a506 R09: 0000000000000000 [ 72.696530] R10: ffff880035f87b38 R11: ffff880034b9344d R12: 00000000ebfea715 [ 72.696530] R13: 0000000000000000 R14: ffff880034c7c780 R15: 0000000000000000 [ 72.696530] FS: 0000000000000000(0000) GS:ffff880035f80000(0000) knlGS:0000000000000000 [ 72.696530] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 72.696530] CR2: ffff880033f87d78 CR3: 0000000033c98000 CR4: 00000000000406a0 [ 72.696530] Stack: [ 72.696530] ffffffff814cc834 ffff880034b93468 0000001481416818 ffff88003326b874 [ 72.696530] ffff880034c7ccb0 ffff880033e48038 ffff88003326b840 ffff880034b93462 [ 72.696530] ffff88003326b88a ffff88003326b88c ffff880034b93468 ffff880035f87c70 [ 72.696530] Call Trace: [ 72.696530] [ 72.696530] [] ? udp_gro_receive+0x1c6/0x1f9 [ 72.696530] [] udp4_gro_receive+0x2b5/0x310 [ 72.696530] [] inet_gro_receive+0x4a3/0x4cd [ 72.696530] [] dev_gro_receive+0x584/0x7a3 [ 72.696530] [] ? __lock_is_held+0x29/0x64 [ 72.696530] [] napi_gro_receive+0x124/0x21d [ 72.696530] [] virtnet_receive+0x8df/0x8f6 [virtio_net] [ 72.696530] [] virtnet_poll+0x1d/0x8d [virtio_net] [ 72.696530] [] net_rx_action+0x15b/0x3b9 [ 72.696530] [] __do_softirq+0x216/0x546 [ 72.696530] [] irq_exit+0x49/0xb6 [ 72.696530] [] do_IRQ+0xe2/0xfa [ 72.696530] [] common_interrupt+0x89/0x89 [ 72.696530] [ 72.696530] [] ? trace_hardirqs_on_caller+0x229/0x270 [ 72.696530] [] ? default_idle+0x1c/0x2d [ 72.696530] [] ? default_idle+0x1a/0x2d [ 72.696530] [] arch_cpu_idle+0xa/0xc [ 72.696530] [] default_idle_call+0x1a/0x1c [ 72.696530] [] cpu_startup_entry+0x15b/0x20f [ 72.696530] [] start_secondary+0x12c/0x133 [ 72.696530] Code: ff ff ff ff ff ff ff ff ff ff 7f ff ff ff ff ff ff ff 7f 00 7e f8 33 00 88 ff ff 6d 61 58 81 ff ff ff ff 5e de 0a 81 ff ff ff ff <00> 5c e2 34 00 88 ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 72.696530] RIP [] 0xffff880033f87d78 [ 72.696530] RSP [ 72.696530] CR2: ffff880033f87d78 [ 72.696530] ---[ end trace ad7758b9a1dccf99 ]--- [ 72.696530] Kernel panic - not syncing: Fatal exception in interrupt [ 72.696530] Kernel Offset: disabled [ 72.696530] ---[ end Kernel panic - not syncing: Fatal exception in interrupt v2: use empty initialiser instead of "{ NULL }" to avoid relying on first field's type. Fixes: 38fd2af24fcf ("udp: Add socket based GRO and config") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 6edfa9980314..1e40dacaa137 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1581,7 +1581,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ tunnel->encap = encap; if (encap == L2TP_ENCAPTYPE_UDP) { - struct udp_tunnel_sock_cfg udp_cfg; + struct udp_tunnel_sock_cfg udp_cfg = { }; udp_cfg.sk_user_data = tunnel; udp_cfg.encap_type = UDP_ENCAP_L2TPINUDP; From 00bc0ef5880dc7b82f9c320dead4afaad48e47be Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 8 Jun 2016 15:13:34 +0200 Subject: [PATCH 59/85] ipv6: Skip XFRM lookup if dst_entry in socket cache is valid At present we perform an xfrm_lookup() for each UDPv6 message we send. The lookup involves querying the flow cache (flow_cache_lookup) and, in case of a cache miss, creating an XFRM bundle. If we miss the flow cache, we can end up creating a new bundle and deriving the path MTU (xfrm_init_pmtu) from on an already transformed dst_entry, which we pass from the socket cache (sk->sk_dst_cache) down to xfrm_lookup(). This can happen only if we're caching the dst_entry in the socket, that is when we're using a connected UDP socket. To put it another way, the path MTU shrinks each time we miss the flow cache, which later on leads to incorrectly fragmented payload. It can be observed with ESPv6 in transport mode: 1) Set up a transformation and lower the MTU to trigger fragmentation # ip xfrm policy add dir out src ::1 dst ::1 \ tmpl src ::1 dst ::1 proto esp spi 1 # ip xfrm state add src ::1 dst ::1 \ proto esp spi 1 enc 'aes' 0x0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b # ip link set dev lo mtu 1500 2) Monitor the packet flow and set up an UDP sink # tcpdump -ni lo -ttt & # socat udp6-listen:12345,fork /dev/null & 3) Send a datagram that needs fragmentation with a connected socket # perl -e 'print "@" x 1470 | socat - udp6:[::1]:12345 2016/06/07 18:52:52 socat[724] E read(3, 0x555bb3d5ba00, 8192): Protocol error 00:00:00.000000 IP6 ::1 > ::1: frag (0|1448) ESP(spi=0x00000001,seq=0x2), length 1448 00:00:00.000014 IP6 ::1 > ::1: frag (1448|32) 00:00:00.000050 IP6 ::1 > ::1: ESP(spi=0x00000001,seq=0x3), length 1272 (^ ICMPv6 Parameter Problem) 00:00:00.000022 IP6 ::1 > ::1: ESP(spi=0x00000001,seq=0x5), length 136 4) Compare it to a non-connected socket # perl -e 'print "@" x 1500' | socat - udp6-sendto:[::1]:12345 00:00:40.535488 IP6 ::1 > ::1: frag (0|1448) ESP(spi=0x00000001,seq=0x6), length 1448 00:00:00.000010 IP6 ::1 > ::1: frag (1448|64) What happens in step (3) is: 1) when connecting the socket in __ip6_datagram_connect(), we perform an XFRM lookup, miss the flow cache, create an XFRM bundle, and cache the destination, 2) afterwards, when sending the datagram, we perform an XFRM lookup, again, miss the flow cache (due to mismatch of flowi6_iif and flowi6_oif, which is an issue of its own), and recreate an XFRM bundle based on the cached (and already transformed) destination. To prevent the recreation of an XFRM bundle, avoid an XFRM lookup altogether whenever we already have a destination entry cached in the socket. This prevents the path MTU shrinkage and brings us on par with UDPv4. The fix also benefits connected PINGv6 sockets, another user of ip6_sk_dst_lookup_flow(), who also suffer messages being transformed twice. Joint work with Hannes Frederic Sowa. Reported-by: Jan Tluka Signed-off-by: Jakub Sitnicki Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index cbf127ae7c67..635b8d340cdb 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1071,17 +1071,12 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst) { struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); - int err; dst = ip6_sk_dst_check(sk, dst, fl6); + if (!dst) + dst = ip6_dst_lookup_flow(sk, fl6, final_dst); - err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6); - if (err) - return ERR_PTR(err); - if (final_dst) - fl6->daddr = *final_dst; - - return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); + return dst; } EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); From e0d194adfa9f5f473068cc546bee60fb84ab77ba Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jun 2016 06:19:45 -0700 Subject: [PATCH 60/85] net_sched: add missing paddattr description "make htmldocs" complains otherwise: .//net/core/gen_stats.c:65: warning: No description found for parameter 'padattr' .//net/core/gen_stats.c:101: warning: No description found for parameter 'padattr' Fixes: 9854518ea04d ("sched: align nlattr properly when needed") Signed-off-by: Eric Dumazet Reported-by: kbuild test robot Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/core/gen_stats.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index f96ee8b9478d..be873e4e3125 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -47,6 +47,7 @@ nla_put_failure: * @xstats_type: TLV type for backward compatibility xstats TLV * @lock: statistics lock * @d: dumping handle + * @padattr: padding attribute * * Initializes the dumping handle, grabs the statistic lock and appends * an empty TLV header to the socket buffer for use a container for all @@ -87,6 +88,7 @@ EXPORT_SYMBOL(gnet_stats_start_copy_compat); * @type: TLV type for top level statistic TLV * @lock: statistics lock * @d: dumping handle + * @padattr: padding attribute * * Initializes the dumping handle, grabs the statistic lock and appends * an empty TLV header to the socket buffer for use a container for all From 3497ed8c852a5a3d48957ca91baaa443d9bfcd4d Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Mon, 6 Jun 2016 17:29:30 +0100 Subject: [PATCH 61/85] sfc: report supported link speeds on SFP connections 7000-series SFC NICs connected with an SFP+ module currently fail to report any supported link speeds. Reported-by: Jarod Wilson Signed-off-by: Bert Kenward Reviewed-by: Jarod Wilson Tested-by: Jarod Wilson Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/mcdi_port.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/sfc/mcdi_port.c b/drivers/net/ethernet/sfc/mcdi_port.c index 7f295c4d7b80..2a9228a6e4a0 100644 --- a/drivers/net/ethernet/sfc/mcdi_port.c +++ b/drivers/net/ethernet/sfc/mcdi_port.c @@ -189,11 +189,12 @@ static u32 mcdi_to_ethtool_cap(u32 media, u32 cap) case MC_CMD_MEDIA_XFP: case MC_CMD_MEDIA_SFP_PLUS: - result |= SUPPORTED_FIBRE; - break; - case MC_CMD_MEDIA_QSFP_PLUS: result |= SUPPORTED_FIBRE; + if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN)) + result |= SUPPORTED_1000baseT_Full; + if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN)) + result |= SUPPORTED_10000baseT_Full; if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) result |= SUPPORTED_40000baseCR4_Full; break; From 6eef3801e719e4ea9c15c01b1d77706f47331166 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 8 Jun 2016 20:11:03 +0100 Subject: [PATCH 62/85] net: cls_u32: catch all hardware offload errors Errors reported by u32_replace_hw_hnode() were not propagated. Signed-off-by: Jakub Kicinski Acked-by: Sridhar Samudrala Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 27b99fd774d7..54ab32a8ff4c 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -922,11 +922,17 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, ht->divisor = divisor; ht->handle = handle; ht->prio = tp->prio; + + err = u32_replace_hw_hnode(tp, ht, flags); + if (err) { + kfree(ht); + return err; + } + RCU_INIT_POINTER(ht->next, tp_c->hlist); rcu_assign_pointer(tp_c->hlist, ht); *arg = (unsigned long)ht; - u32_replace_hw_hnode(tp, ht, flags); return 0; } From 201c44bd8ffa899f07b7b322a73e19baf0ada1e5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 8 Jun 2016 20:11:04 +0100 Subject: [PATCH 63/85] net: cls_u32: be more strict about skip-sw flag for knodes Return an error if user requested skip-sw and the underlaying hardware cannot handle tc offloads (or offloads are disabled). This patch fixes the knode handling. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 54ab32a8ff4c..ffe593efe930 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -508,27 +508,28 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; - if (tc_should_offload(dev, tp, flags)) { - offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE; - offload.cls_u32->knode.handle = n->handle; - offload.cls_u32->knode.fshift = n->fshift; -#ifdef CONFIG_CLS_U32_MARK - offload.cls_u32->knode.val = n->val; - offload.cls_u32->knode.mask = n->mask; -#else - offload.cls_u32->knode.val = 0; - offload.cls_u32->knode.mask = 0; -#endif - offload.cls_u32->knode.sel = &n->sel; - offload.cls_u32->knode.exts = &n->exts; - if (n->ht_down) - offload.cls_u32->knode.link_handle = n->ht_down->handle; + if (!tc_should_offload(dev, tp, flags)) + return tc_skip_sw(flags) ? -EINVAL : 0; - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->protocol, &offload); - if (tc_skip_sw(flags)) - return err; - } + offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE; + offload.cls_u32->knode.handle = n->handle; + offload.cls_u32->knode.fshift = n->fshift; +#ifdef CONFIG_CLS_U32_MARK + offload.cls_u32->knode.val = n->val; + offload.cls_u32->knode.mask = n->mask; +#else + offload.cls_u32->knode.val = 0; + offload.cls_u32->knode.mask = 0; +#endif + offload.cls_u32->knode.sel = &n->sel; + offload.cls_u32->knode.exts = &n->exts; + if (n->ht_down) + offload.cls_u32->knode.link_handle = n->ht_down->handle; + + err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->protocol, &offload); + if (tc_skip_sw(flags)) + return err; return 0; } From 0a46baaf634663d28038fc137239b71bf5385e5a Mon Sep 17 00:00:00 2001 From: Shweta Choudaha Date: Wed, 8 Jun 2016 20:15:43 +0100 Subject: [PATCH 64/85] ip6gre: Allow live link address change The ip6 GRE tap device should not be forced to down state to change the mac address and should allow live address change for tap device similar to ipv4 gre. Signed-off-by: Shweta Choudaha Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index f4ac2842d4d9..fdc9de276ab1 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1256,6 +1256,8 @@ static int ip6gre_tap_init(struct net_device *dev) if (ret) return ret; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + tunnel = netdev_priv(dev); ip6gre_tnl_link_config(tunnel, 1); @@ -1289,6 +1291,7 @@ static void ip6gre_tap_setup(struct net_device *dev) dev->features |= NETIF_F_NETNS_LOCAL; dev->priv_flags &= ~IFF_TX_SKB_SHARING; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; } static bool ip6gre_netlink_encap_parms(struct nlattr *data[], From 9b15350f0d5c401c02eca15c4e6ca0603cff1a41 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 8 Jun 2016 23:23:01 +0200 Subject: [PATCH 65/85] qfq: don't leak skb if kzalloc fails When we need to create a new aggregate to enqueue the skb we call kzalloc. If that fails we returned ENOBUFS without freeing the skb. Spotted during code review. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/sched/sch_qfq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 8d2d8d953432..f18857febdad 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1235,8 +1235,10 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) cl->agg->lmax, qdisc_pkt_len(skb), cl->common.classid); err = qfq_change_agg(sch, cl, cl->agg->class_weight, qdisc_pkt_len(skb)); - if (err) - return err; + if (err) { + cl->qstats.drops++; + return qdisc_drop(skb, sch); + } } err = qdisc_enqueue(skb, cl->qdisc); From 6cbf6236d54c24b9a29e6892549c25b6902b44ce Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 9 Jun 2016 09:40:55 +0200 Subject: [PATCH 66/85] cfg80211: remove get/set antenna and tx power warnings Since set_tx_power and set_antenna are frequently implemented without the matching get_tx_power/get_antenna, we shouldn't have added warnings for those. Remove them. The remaining ones are correct and need to be implemented symmetrically for correct operation. Cc: stable@vger.kernel.org Fixes: de3bb771f471 ("cfg80211: add more warnings for inconsistent ops") Signed-off-by: Johannes Berg --- net/wireless/core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/wireless/core.c b/net/wireless/core.c index d25c82bc1bbe..ecca3896b9f7 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -363,8 +363,6 @@ struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv, WARN_ON(ops->remain_on_channel && !ops->cancel_remain_on_channel); WARN_ON(ops->tdls_channel_switch && !ops->tdls_cancel_channel_switch); WARN_ON(ops->add_tx_ts && !ops->del_tx_ts); - WARN_ON(ops->set_tx_power && !ops->get_tx_power); - WARN_ON(ops->set_antenna && !ops->get_antenna); alloc_size = sizeof(*rdev) + sizeof_priv; From 3d5fdff46c4b2b9534fa2f9fc78e90a48e0ff724 Mon Sep 17 00:00:00 2001 From: Prasun Maiti Date: Mon, 6 Jun 2016 20:04:19 +0530 Subject: [PATCH 67/85] wext: Fix 32 bit iwpriv compatibility issue with 64 bit Kernel iwpriv app uses iw_point structure to send data to Kernel. The iw_point structure holds a pointer. For compatibility Kernel converts the pointer as required for WEXT IOCTLs (SIOCIWFIRST to SIOCIWLAST). Some drivers may use iw_handler_def.private_args to populate iwpriv commands instead of iw_handler_def.private. For those case, the IOCTLs from SIOCIWFIRSTPRIV to SIOCIWLASTPRIV will follow the path ndo_do_ioctl(). Accordingly when the filled up iw_point structure comes from 32 bit iwpriv to 64 bit Kernel, Kernel will not convert the pointer and sends it to driver. So, the driver may get the invalid data. The pointer conversion for the IOCTLs (SIOCIWFIRSTPRIV to SIOCIWLASTPRIV), which follow the path ndo_do_ioctl(), is mandatory. This patch adds pointer conversion from 32 bit to 64 bit and vice versa, if the ioctl comes from 32 bit iwpriv to 64 bit Kernel. Cc: stable@vger.kernel.org Signed-off-by: Prasun Maiti Signed-off-by: Ujjal Roy Tested-by: Dibyajyoti Ghosh Signed-off-by: Johannes Berg --- net/wireless/wext-core.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 6250b1cfcde5..dbb2738e356a 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -958,8 +958,29 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, return private(dev, iwr, cmd, info, handler); } /* Old driver API : call driver ioctl handler */ - if (dev->netdev_ops->ndo_do_ioctl) - return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd); + if (dev->netdev_ops->ndo_do_ioctl) { +#ifdef CONFIG_COMPAT + if (info->flags & IW_REQUEST_FLAG_COMPAT) { + int ret = 0; + struct iwreq iwr_lcl; + struct compat_iw_point *iwp_compat = (void *) &iwr->u.data; + + memcpy(&iwr_lcl, iwr, sizeof(struct iwreq)); + iwr_lcl.u.data.pointer = compat_ptr(iwp_compat->pointer); + iwr_lcl.u.data.length = iwp_compat->length; + iwr_lcl.u.data.flags = iwp_compat->flags; + + ret = dev->netdev_ops->ndo_do_ioctl(dev, (void *) &iwr_lcl, cmd); + + iwp_compat->pointer = ptr_to_compat(iwr_lcl.u.data.pointer); + iwp_compat->length = iwr_lcl.u.data.length; + iwp_compat->flags = iwr_lcl.u.data.flags; + + return ret; + } else +#endif + return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd); + } return -EOPNOTSUPP; } From be94535f95313a013b844b563ef15ddd8fb43da8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 9 Jun 2016 09:51:39 +0200 Subject: [PATCH 68/85] mlxsw: spectrum: Make split flow match firmware requirements When a port is created following a split / unsplit we need to map it to the correct module and lane, enable it and then continue to initialize its various parameters such as MTU and VLAN filters. Under certain conditions, such as trying to split ports at the bottom row of the front panel by four, we get firmware errors. After evaluating this with the firmware team it was decided to alter the split / unsplit flow, so that first all the affected ports are mapped, then enabled and finally each is initialized separately. Fix the split / unsplit flow by first mapping and enabling all the affected ports. Newer firmware versions will support both flows. Fixes: 18f1e70c4137 ("mlxsw: spectrum: Introduce port splitting") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 147 +++++++++++------- 1 file changed, 95 insertions(+), 52 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 4a7273771028..cc62d4c64fb5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -247,13 +247,21 @@ static int mlxsw_sp_port_mtu_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu) return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pmtu), pmtu_pl); } +static int __mlxsw_sp_port_swid_set(struct mlxsw_sp *mlxsw_sp, u8 local_port, + u8 swid) +{ + char pspa_pl[MLXSW_REG_PSPA_LEN]; + + mlxsw_reg_pspa_pack(pspa_pl, swid, local_port); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pspa), pspa_pl); +} + static int mlxsw_sp_port_swid_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 swid) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - char pspa_pl[MLXSW_REG_PSPA_LEN]; - mlxsw_reg_pspa_pack(pspa_pl, swid, mlxsw_sp_port->local_port); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pspa), pspa_pl); + return __mlxsw_sp_port_swid_set(mlxsw_sp, mlxsw_sp_port->local_port, + swid); } static int mlxsw_sp_port_vp_mode_set(struct mlxsw_sp_port *mlxsw_sp_port, @@ -1681,8 +1689,8 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) return 0; } -static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, - bool split, u8 module, u8 width) +static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, + bool split, u8 module, u8 width) { struct mlxsw_sp_port *mlxsw_sp_port; struct net_device *dev; @@ -1839,28 +1847,6 @@ err_port_active_vlans_alloc: return err; } -static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, - bool split, u8 module, u8 width, u8 lane) -{ - int err; - - err = mlxsw_sp_port_module_map(mlxsw_sp, local_port, module, width, - lane); - if (err) - return err; - - err = __mlxsw_sp_port_create(mlxsw_sp, local_port, split, module, - width); - if (err) - goto err_port_create; - - return 0; - -err_port_create: - mlxsw_sp_port_module_unmap(mlxsw_sp, local_port); - return err; -} - static void mlxsw_sp_port_vports_fini(struct mlxsw_sp_port *mlxsw_sp_port) { struct net_device *dev = mlxsw_sp_port->dev; @@ -1927,7 +1913,7 @@ static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp) if (!width) continue; mlxsw_sp->port_to_module[i] = module; - err = __mlxsw_sp_port_create(mlxsw_sp, i, false, module, width); + err = mlxsw_sp_port_create(mlxsw_sp, i, false, module, width); if (err) goto err_port_create; } @@ -1948,12 +1934,85 @@ static u8 mlxsw_sp_cluster_base_port_get(u8 local_port) return local_port - offset; } +static int mlxsw_sp_port_split_create(struct mlxsw_sp *mlxsw_sp, u8 base_port, + u8 module, unsigned int count) +{ + u8 width = MLXSW_PORT_MODULE_MAX_WIDTH / count; + int err, i; + + for (i = 0; i < count; i++) { + err = mlxsw_sp_port_module_map(mlxsw_sp, base_port + i, module, + width, i * width); + if (err) + goto err_port_module_map; + } + + for (i = 0; i < count; i++) { + err = __mlxsw_sp_port_swid_set(mlxsw_sp, base_port + i, 0); + if (err) + goto err_port_swid_set; + } + + for (i = 0; i < count; i++) { + err = mlxsw_sp_port_create(mlxsw_sp, base_port + i, true, + module, width); + if (err) + goto err_port_create; + } + + return 0; + +err_port_create: + for (i--; i >= 0; i--) + mlxsw_sp_port_remove(mlxsw_sp, base_port + i); + i = count; +err_port_swid_set: + for (i--; i >= 0; i--) + __mlxsw_sp_port_swid_set(mlxsw_sp, base_port + i, + MLXSW_PORT_SWID_DISABLED_PORT); + i = count; +err_port_module_map: + for (i--; i >= 0; i--) + mlxsw_sp_port_module_unmap(mlxsw_sp, base_port + i); + return err; +} + +static void mlxsw_sp_port_unsplit_create(struct mlxsw_sp *mlxsw_sp, + u8 base_port, unsigned int count) +{ + u8 local_port, module, width = MLXSW_PORT_MODULE_MAX_WIDTH; + int i; + + /* Split by four means we need to re-create two ports, otherwise + * only one. + */ + count = count / 2; + + for (i = 0; i < count; i++) { + local_port = base_port + i * 2; + module = mlxsw_sp->port_to_module[local_port]; + + mlxsw_sp_port_module_map(mlxsw_sp, local_port, module, width, + 0); + } + + for (i = 0; i < count; i++) + __mlxsw_sp_port_swid_set(mlxsw_sp, base_port + i * 2, 0); + + for (i = 0; i < count; i++) { + local_port = base_port + i * 2; + module = mlxsw_sp->port_to_module[local_port]; + + mlxsw_sp_port_create(mlxsw_sp, local_port, false, module, + width); + } +} + static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port, unsigned int count) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); struct mlxsw_sp_port *mlxsw_sp_port; - u8 width = MLXSW_PORT_MODULE_MAX_WIDTH / count; u8 module, cur_width, base_port; int i; int err; @@ -2001,25 +2060,16 @@ static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port, for (i = 0; i < count; i++) mlxsw_sp_port_remove(mlxsw_sp, base_port + i); - for (i = 0; i < count; i++) { - err = mlxsw_sp_port_create(mlxsw_sp, base_port + i, true, - module, width, i * width); - if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Failed to create split port\n"); - goto err_port_create; - } + err = mlxsw_sp_port_split_create(mlxsw_sp, base_port, module, count); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to create split ports\n"); + goto err_port_split_create; } return 0; -err_port_create: - for (i--; i >= 0; i--) - mlxsw_sp_port_remove(mlxsw_sp, base_port + i); - for (i = 0; i < count / 2; i++) { - module = mlxsw_sp->port_to_module[base_port + i * 2]; - mlxsw_sp_port_create(mlxsw_sp, base_port + i * 2, false, - module, MLXSW_PORT_MODULE_MAX_WIDTH, 0); - } +err_port_split_create: + mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count); return err; } @@ -2061,14 +2111,7 @@ static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u8 local_port) for (i = 0; i < count; i++) mlxsw_sp_port_remove(mlxsw_sp, base_port + i); - for (i = 0; i < count / 2; i++) { - module = mlxsw_sp->port_to_module[base_port + i * 2]; - err = mlxsw_sp_port_create(mlxsw_sp, base_port + i * 2, false, - module, MLXSW_PORT_MODULE_MAX_WIDTH, - 0); - if (err) - dev_err(mlxsw_sp->bus_info->dev, "Failed to reinstantiate port\n"); - } + mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count); return 0; } From d664b41e2adf5851e4d0d39f450b2f3f808b65d6 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 9 Jun 2016 09:51:40 +0200 Subject: [PATCH 69/85] mlxsw: spectrum: Don't sleep during ndo_get_phys_port_name() When rtnl_fill_ifinfo() is called for a certain netdevice it queries its various parameters such as switch id and physical port name. The function might get called in an atomic context, which means the underlying driver must not sleep during the query operation. Don't query the device and sleep during ndo_get_phys_port_name(), but instead store the needed parameters in port creation time. Fixes: 2bf9a58675c5 ("mlxsw: spectrum: Add support for physical port names") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 64 ++++++------------- .../net/ethernet/mellanox/mlxsw/spectrum.h | 5 ++ 2 files changed, 26 insertions(+), 43 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index cc62d4c64fb5..6f9e3ddff4a8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -313,9 +313,9 @@ mlxsw_sp_port_system_port_mapping_set(struct mlxsw_sp_port *mlxsw_sp_port) return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sspr), sspr_pl); } -static int __mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp, - u8 local_port, u8 *p_module, - u8 *p_width, u8 *p_lane) +static int mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp, + u8 local_port, u8 *p_module, + u8 *p_width, u8 *p_lane) { char pmlp_pl[MLXSW_REG_PMLP_LEN]; int err; @@ -330,16 +330,6 @@ static int __mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp, return 0; } -static int mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp, - u8 local_port, u8 *p_module, - u8 *p_width) -{ - u8 lane; - - return __mlxsw_sp_port_module_info_get(mlxsw_sp, local_port, p_module, - p_width, &lane); -} - static int mlxsw_sp_port_module_map(struct mlxsw_sp *mlxsw_sp, u8 local_port, u8 module, u8 width, u8 lane) { @@ -957,17 +947,11 @@ static int mlxsw_sp_port_get_phys_port_name(struct net_device *dev, char *name, size_t len) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - u8 module, width, lane; + u8 module = mlxsw_sp_port->mapping.module; + u8 width = mlxsw_sp_port->mapping.width; + u8 lane = mlxsw_sp_port->mapping.lane; int err; - err = __mlxsw_sp_port_module_info_get(mlxsw_sp_port->mlxsw_sp, - mlxsw_sp_port->local_port, - &module, &width, &lane); - if (err) { - netdev_err(dev, "Failed to retrieve module information\n"); - return err; - } - if (!mlxsw_sp_port->split) err = snprintf(name, len, "p%d", module + 1); else @@ -1690,7 +1674,7 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) } static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, - bool split, u8 module, u8 width) + bool split, u8 module, u8 width, u8 lane) { struct mlxsw_sp_port *mlxsw_sp_port; struct net_device *dev; @@ -1705,6 +1689,9 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, mlxsw_sp_port->mlxsw_sp = mlxsw_sp; mlxsw_sp_port->local_port = local_port; mlxsw_sp_port->split = split; + mlxsw_sp_port->mapping.module = module; + mlxsw_sp_port->mapping.width = width; + mlxsw_sp_port->mapping.lane = lane; bytes = DIV_ROUND_UP(VLAN_N_VID, BITS_PER_BYTE); mlxsw_sp_port->active_vlans = kzalloc(bytes, GFP_KERNEL); if (!mlxsw_sp_port->active_vlans) { @@ -1895,8 +1882,8 @@ static void mlxsw_sp_ports_remove(struct mlxsw_sp *mlxsw_sp) static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp) { + u8 module, width, lane; size_t alloc_size; - u8 module, width; int i; int err; @@ -1907,13 +1894,14 @@ static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp) for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++) { err = mlxsw_sp_port_module_info_get(mlxsw_sp, i, &module, - &width); + &width, &lane); if (err) goto err_port_module_info_get; if (!width) continue; mlxsw_sp->port_to_module[i] = module; - err = mlxsw_sp_port_create(mlxsw_sp, i, false, module, width); + err = mlxsw_sp_port_create(mlxsw_sp, i, false, module, width, + lane); if (err) goto err_port_create; } @@ -1955,7 +1943,7 @@ static int mlxsw_sp_port_split_create(struct mlxsw_sp *mlxsw_sp, u8 base_port, for (i = 0; i < count; i++) { err = mlxsw_sp_port_create(mlxsw_sp, base_port + i, true, - module, width); + module, width, i * width); if (err) goto err_port_create; } @@ -2004,7 +1992,7 @@ static void mlxsw_sp_port_unsplit_create(struct mlxsw_sp *mlxsw_sp, module = mlxsw_sp->port_to_module[local_port]; mlxsw_sp_port_create(mlxsw_sp, local_port, false, module, - width); + width, 0); } } @@ -2024,18 +2012,14 @@ static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port, return -EINVAL; } + module = mlxsw_sp_port->mapping.module; + cur_width = mlxsw_sp_port->mapping.width; + if (count != 2 && count != 4) { netdev_err(mlxsw_sp_port->dev, "Port can only be split into 2 or 4 ports\n"); return -EINVAL; } - err = mlxsw_sp_port_module_info_get(mlxsw_sp, local_port, &module, - &cur_width); - if (err) { - netdev_err(mlxsw_sp_port->dev, "Failed to get port's width\n"); - return err; - } - if (cur_width != MLXSW_PORT_MODULE_MAX_WIDTH) { netdev_err(mlxsw_sp_port->dev, "Port cannot be split further\n"); return -EINVAL; @@ -2077,10 +2061,9 @@ static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u8 local_port) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); struct mlxsw_sp_port *mlxsw_sp_port; - u8 module, cur_width, base_port; + u8 cur_width, base_port; unsigned int count; int i; - int err; mlxsw_sp_port = mlxsw_sp->ports[local_port]; if (!mlxsw_sp_port) { @@ -2094,12 +2077,7 @@ static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u8 local_port) return -EINVAL; } - err = mlxsw_sp_port_module_info_get(mlxsw_sp, local_port, &module, - &cur_width); - if (err) { - netdev_err(mlxsw_sp_port->dev, "Failed to get port's width\n"); - return err; - } + cur_width = mlxsw_sp_port->mapping.width; count = cur_width == 1 ? 4 : 2; base_port = mlxsw_sp_cluster_base_port_get(local_port); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index e2c022d3e2f3..13b30eaa13d4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -229,6 +229,11 @@ struct mlxsw_sp_port { struct ieee_maxrate *maxrate; struct ieee_pfc *pfc; } dcb; + struct { + u8 module; + u8 width; + u8 lane; + } mapping; /* 802.1Q bridge VLANs */ unsigned long *active_vlans; unsigned long *untagged_vlans; From 418f8399a8bedf376ec13eb01088f04a76ebdd6f Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Fri, 10 Jun 2016 00:07:28 +0300 Subject: [PATCH 70/85] net/mlx5: Fix the size of modify QP mailbox Add 16 reserved bytes at the end of mlx5_modify_qp_mbox_in to match the hardware spec definition. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Majd Dibbiny Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/qp.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 64221027bf1f..1532dcf6fc5e 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -560,6 +560,7 @@ struct mlx5_modify_qp_mbox_in { __be32 optparam; u8 rsvd0[4]; struct mlx5_qp_context ctx; + u8 rsvd2[16]; }; struct mlx5_modify_qp_mbox_out { From 9cd3411c42c5d5ba55d6e745edfe7df53c1ffa41 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Fri, 10 Jun 2016 00:07:29 +0300 Subject: [PATCH 71/85] net/mlx5: Fix masking of reserved bits in XRCD number Mask the reserved bits when reading the number of newly created XRCD. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Majd Dibbiny Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index b720a274220d..b82d65802d96 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -418,7 +418,7 @@ int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn) if (out.hdr.status) err = mlx5_cmd_status_to_err(&out.hdr); else - *xrcdn = be32_to_cpu(out.xrcdn); + *xrcdn = be32_to_cpu(out.xrcdn) & 0xffffff; return err; } From 86d56a1a6b7352542661d8a9463758c7f285fce3 Mon Sep 17 00:00:00 2001 From: Shahar Klein Date: Fri, 10 Jun 2016 00:07:30 +0300 Subject: [PATCH 72/85] net/mlx5: Fix MLX5_CMD_OP_MAX to be defined correctly Having MLX5_CMD_OP_MAX on another file causes us to repeatedly miss accounting new commands added to the driver and hence there're no entries for them in debugfs. To solve that, we integrate it into the commands enum as the last entry. Fixes: 34a40e689393 ('net/mlx5_core: Introduce modify flow table command') Signed-off-by: Shahar Klein Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 2 -- include/linux/mlx5/mlx5_ifc.h | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 035abdf62cfe..51f0caf299d8 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1240,8 +1240,6 @@ struct mlx5_destroy_psv_out { u8 rsvd[8]; }; -#define MLX5_CMD_OP_MAX 0x920 - enum { VPORT_STATE_DOWN = 0x0, VPORT_STATE_UP = 0x1, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 9a05cd7e5890..986a615f623c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -205,7 +205,8 @@ enum { MLX5_CMD_OP_ALLOC_FLOW_COUNTER = 0x939, MLX5_CMD_OP_DEALLOC_FLOW_COUNTER = 0x93a, MLX5_CMD_OP_QUERY_FLOW_COUNTER = 0x93b, - MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c + MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c, + MLX5_CMD_OP_MAX }; struct mlx5_ifc_flow_table_fields_supported_bits { From 2fee37a47cebc26d58eec5dafc8ba787a6ee5350 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Fri, 10 Jun 2016 00:07:31 +0300 Subject: [PATCH 73/85] net/mlx5: Fix root flow table update When we destroy the last flow table we need to update the root_ft to NULL. It fixes an issue for when the last flow table is destroyed and recreated again, root_ft pointer will not be updated, as a result traffic will be dropped. Fixes: 2cc43b494a6c ('net/mlx5_core: Managing root flow table') Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 8b5f0b2c0d5c..fa6fec1930f5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1292,8 +1292,8 @@ static int update_root_ft_destroy(struct mlx5_flow_table *ft) ft->id); return err; } - root->root_ft = new_root_ft; } + root->root_ft = new_root_ft; return 0; } From 876d634d19e41603aab91455f2c52a78a28372d5 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Fri, 10 Jun 2016 00:07:32 +0300 Subject: [PATCH 74/85] net/mlx5: Fix flow steering NIC capabilities check Flow steering infrastructure is currently used only on link layer ethernet, therefore the driver should initialize the flow steering when the device link layer is ethernet. In addition, add missing capability check before initializing the namespace of NIC RX flow tables. Fixes: 2530236303d9 ('net/mlx5_core: Flow steering tree initialization') Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 10 +++++++++- include/linux/mlx5/device.h | 6 ++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index fa6fec1930f5..c1efa5517d17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1767,6 +1767,9 @@ static void cleanup_root_ns(struct mlx5_core_dev *dev) void mlx5_cleanup_fs(struct mlx5_core_dev *dev) { + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return; + cleanup_root_ns(dev); cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns); cleanup_single_prio_root_ns(dev, dev->priv.esw_egress_root_ns); @@ -1828,15 +1831,20 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) { int err = 0; + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return 0; + err = mlx5_init_fc_stats(dev); if (err) return err; - if (MLX5_CAP_GEN(dev, nic_flow_table)) { + if (MLX5_CAP_GEN(dev, nic_flow_table) && + MLX5_CAP_FLOWTABLE_NIC_RX(dev, ft_support)) { err = init_root_ns(dev); if (err) goto err; } + if (MLX5_CAP_GEN(dev, eswitch_flow_table)) { err = init_fdb_root_ns(dev); if (err) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 51f0caf299d8..73a48479892d 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1367,6 +1367,12 @@ enum mlx5_cap_type { #define MLX5_CAP_FLOWTABLE_MAX(mdev, cap) \ MLX5_GET(flow_table_nic_cap, mdev->hca_caps_max[MLX5_CAP_FLOW_TABLE], cap) +#define MLX5_CAP_FLOWTABLE_NIC_RX(mdev, cap) \ + MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.cap) + +#define MLX5_CAP_FLOWTABLE_NIC_RX_MAX(mdev, cap) \ + MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive.cap) + #define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \ MLX5_GET(flow_table_eswitch_cap, \ mdev->hca_caps_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap) From bd02ef8eec0b98abe6d5efe280c87903b2eb9874 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Fri, 10 Jun 2016 00:07:33 +0300 Subject: [PATCH 75/85] net/mlx5: Fix E-Switch flow steering capabilities check Add missing capabilities check for E-Switch FDB and ACLs flow tables before creating their namespace in flow steering. Fixes: efdc810ba39d ('net/mlx5: Flow steering, Add vport ACL support') Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index c1efa5517d17..e912a3d2505e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1846,19 +1846,21 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) } if (MLX5_CAP_GEN(dev, eswitch_flow_table)) { - err = init_fdb_root_ns(dev); - if (err) - goto err; - } - if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) { - err = init_egress_acl_root_ns(dev); - if (err) - goto err; - } - if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) { - err = init_ingress_acl_root_ns(dev); - if (err) - goto err; + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) { + err = init_fdb_root_ns(dev); + if (err) + goto err; + } + if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) { + err = init_egress_acl_root_ns(dev); + if (err) + goto err; + } + if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) { + err = init_ingress_acl_root_ns(dev); + if (err) + goto err; + } } return 0; From 3fe3d819d5015d56d0d7289ae16db5e612640c5b Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 10 Jun 2016 00:07:34 +0300 Subject: [PATCH 76/85] net/mlx5: E-Switch, Use the correct free() function We must use kvfree() for something that could have been allocated with vzalloc(), do that. Fixes: 5742df0f7dbe ('net/mlx5: E-Switch, Introduce VST vport ingress/egress ACLs') Fixes: 86d722ad2c3b ('net/mlx5: Use flow steering infrastructure for mlx5_en') Signed-off-by: Or Gerlitz Reported-by: Ilya Lesokhin Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index b84a6918a700..537479641c8e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -529,7 +529,7 @@ out: } } - kfree(flow_group_in); + kvfree(flow_group_in); return err; } @@ -1097,7 +1097,7 @@ static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, vport->egress.drop_grp = drop_grp; vport->egress.allowed_vlans_grp = vlan_grp; out: - kfree(flow_group_in); + kvfree(flow_group_in); if (err && !IS_ERR_OR_NULL(vlan_grp)) mlx5_destroy_flow_group(vlan_grp); if (err && !IS_ERR_OR_NULL(acl)) @@ -1259,7 +1259,7 @@ out: mlx5_destroy_flow_table(vport->ingress.acl); } - kfree(flow_group_in); + kvfree(flow_group_in); } static void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, From 3f42ac6648723e906c1c10edc0c523aff29963cc Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 10 Jun 2016 00:07:35 +0300 Subject: [PATCH 77/85] net/mlx5: E-Switch, Use the correct error check on returned pointers The mlx5 flow-steering API (mlx5_create_flow_table/group/rule) never returns null pointer on error. Even if it was doing that, checking for IS_ERR_OR_NULL(p) and then returning PTR_ERR(p) would have cause bugs, since PTR_ERR(NULL) --> success, crash. To make things more robust and protect against related future bugs, convert all IS_ERR_OR_NULL checks on returned values to IS_ERR. Fixes: 5742df0f7dbe ('net/mlx5: E-Switch, Introduce VST vport ingress/egress ACLs') Fixes: 86d722ad2c3b ('net/mlx5: Use flow steering infrastructure for mlx5_en') Signed-off-by: Or Gerlitz Reported-by: Ilya Lesokhin Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 537479641c8e..a350af221d15 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -383,7 +383,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, match_v, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, 0, &dest); - if (IS_ERR_OR_NULL(flow_rule)) { + if (IS_ERR(flow_rule)) { pr_warn( "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n", dmac_v, dmac_c, vport, PTR_ERR(flow_rule)); @@ -457,7 +457,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0); - if (IS_ERR_OR_NULL(fdb)) { + if (IS_ERR(fdb)) { err = PTR_ERR(fdb); esw_warn(dev, "Failed to create FDB Table err %d\n", err); goto out; @@ -474,7 +474,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 3); eth_broadcast_addr(dmac); g = mlx5_create_flow_group(fdb, flow_group_in); - if (IS_ERR_OR_NULL(g)) { + if (IS_ERR(g)) { err = PTR_ERR(g); esw_warn(dev, "Failed to create flow group err(%d)\n", err); goto out; @@ -489,7 +489,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) eth_zero_addr(dmac); dmac[0] = 0x01; g = mlx5_create_flow_group(fdb, flow_group_in); - if (IS_ERR_OR_NULL(g)) { + if (IS_ERR(g)) { err = PTR_ERR(g); esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", err); goto out; @@ -506,7 +506,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 1); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 1); g = mlx5_create_flow_group(fdb, flow_group_in); - if (IS_ERR_OR_NULL(g)) { + if (IS_ERR(g)) { err = PTR_ERR(g); esw_warn(dev, "Failed to create promisc flow group err(%d)\n", err); goto out; @@ -1060,7 +1060,7 @@ static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, return; acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport); - if (IS_ERR_OR_NULL(acl)) { + if (IS_ERR(acl)) { err = PTR_ERR(acl); esw_warn(dev, "Failed to create E-Switch vport[%d] egress flow Table, err(%d)\n", vport->vport, err); @@ -1075,7 +1075,7 @@ static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); vlan_grp = mlx5_create_flow_group(acl, flow_group_in); - if (IS_ERR_OR_NULL(vlan_grp)) { + if (IS_ERR(vlan_grp)) { err = PTR_ERR(vlan_grp); esw_warn(dev, "Failed to create E-Switch vport[%d] egress allowed vlans flow group, err(%d)\n", vport->vport, err); @@ -1086,7 +1086,7 @@ static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); drop_grp = mlx5_create_flow_group(acl, flow_group_in); - if (IS_ERR_OR_NULL(drop_grp)) { + if (IS_ERR(drop_grp)) { err = PTR_ERR(drop_grp); esw_warn(dev, "Failed to create E-Switch vport[%d] egress drop flow group, err(%d)\n", vport->vport, err); @@ -1174,7 +1174,7 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, return; acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport); - if (IS_ERR_OR_NULL(acl)) { + if (IS_ERR(acl)) { err = PTR_ERR(acl); esw_warn(dev, "Failed to create E-Switch vport[%d] ingress flow Table, err(%d)\n", vport->vport, err); @@ -1192,7 +1192,7 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); g = mlx5_create_flow_group(acl, flow_group_in); - if (IS_ERR_OR_NULL(g)) { + if (IS_ERR(g)) { err = PTR_ERR(g); esw_warn(dev, "Failed to create E-Switch vport[%d] ingress untagged spoofchk flow group, err(%d)\n", vport->vport, err); @@ -1207,7 +1207,7 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); g = mlx5_create_flow_group(acl, flow_group_in); - if (IS_ERR_OR_NULL(g)) { + if (IS_ERR(g)) { err = PTR_ERR(g); esw_warn(dev, "Failed to create E-Switch vport[%d] ingress untagged flow group, err(%d)\n", vport->vport, err); @@ -1223,7 +1223,7 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 2); g = mlx5_create_flow_group(acl, flow_group_in); - if (IS_ERR_OR_NULL(g)) { + if (IS_ERR(g)) { err = PTR_ERR(g); esw_warn(dev, "Failed to create E-Switch vport[%d] ingress spoofchk flow group, err(%d)\n", vport->vport, err); @@ -1236,7 +1236,7 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 3); g = mlx5_create_flow_group(acl, flow_group_in); - if (IS_ERR_OR_NULL(g)) { + if (IS_ERR(g)) { err = PTR_ERR(g); esw_warn(dev, "Failed to create E-Switch vport[%d] ingress drop flow group, err(%d)\n", vport->vport, err); @@ -1363,7 +1363,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, match_v, MLX5_FLOW_CONTEXT_ACTION_ALLOW, 0, NULL); - if (IS_ERR_OR_NULL(vport->ingress.allow_rule)) { + if (IS_ERR(vport->ingress.allow_rule)) { err = PTR_ERR(vport->ingress.allow_rule); pr_warn("vport[%d] configure ingress allow rule, err(%d)\n", vport->vport, err); @@ -1380,7 +1380,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, match_v, MLX5_FLOW_CONTEXT_ACTION_DROP, 0, NULL); - if (IS_ERR_OR_NULL(vport->ingress.drop_rule)) { + if (IS_ERR(vport->ingress.drop_rule)) { err = PTR_ERR(vport->ingress.drop_rule); pr_warn("vport[%d] configure ingress drop rule, err(%d)\n", vport->vport, err); @@ -1439,7 +1439,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, match_v, MLX5_FLOW_CONTEXT_ACTION_ALLOW, 0, NULL); - if (IS_ERR_OR_NULL(vport->egress.allowed_vlan)) { + if (IS_ERR(vport->egress.allowed_vlan)) { err = PTR_ERR(vport->egress.allowed_vlan); pr_warn("vport[%d] configure egress allowed vlan rule failed, err(%d)\n", vport->vport, err); @@ -1457,7 +1457,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, match_v, MLX5_FLOW_CONTEXT_ACTION_DROP, 0, NULL); - if (IS_ERR_OR_NULL(vport->egress.drop_rule)) { + if (IS_ERR(vport->egress.drop_rule)) { err = PTR_ERR(vport->egress.drop_rule); pr_warn("vport[%d] configure egress drop rule failed, err(%d)\n", vport->vport, err); From 25fff58cb24c8880090d8b8ef7746001a135e876 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 10 Jun 2016 00:07:36 +0300 Subject: [PATCH 78/85] net/mlx5: E-Switch, Fix vport enable flow Reorder vport enable flow to mark the vport as enabled before calling the vport change handler which was modified to handle the case for when vport is not enabled. This fixes the case for when the PF netdev is open before sriov is enabled, once sriov is enabled at esw_enable_vport, esw_vport_change_handle_locked didn't read the PF context since it thought the PF vport was not enabled. When we enable the vport, arming for events is not required anymore, since it's done on the vport change handle Fixes: 586cfa7f1d58 ('net/mlx5: E-Switch, Use vport event handler for vport cleanup') Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index a350af221d15..cfec20cffd26 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1491,14 +1491,11 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, /* Sync with current vport context */ vport->enabled_events = enable_events; - esw_vport_change_handle_locked(vport); - vport->enabled = true; /* only PF is trusted by default */ vport->trusted = (vport_num) ? false : true; - - arm_vport_context_events_cmd(esw->dev, vport_num, enable_events); + esw_vport_change_handle_locked(vport); esw->enabled_vports++; esw_debug(esw->dev, "Enabled VPORT(%d)\n", vport_num); From 23898c763f4af6f5c80b0230b1ea788a0ce3cf73 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Fri, 10 Jun 2016 00:07:37 +0300 Subject: [PATCH 79/85] net/mlx5: E-Switch, Modify node guid on vf set MAC In RoCE, the RDMA-CM needs the node guid to establish connection between nodes. Today, the node guid exposed to mlx5 Ethernet VFs is zero, therefore RDMA-CM on the VF is broken. Whenever the administrator sets a MAC for a VF, derive the node guid from it and set it as well in the following way: MAC: e4:1d:2d:b3:f4:01 -> node_guid: e4:1d:2d:ff:fe:b3:f4:01 Fixes: 77256579c6b43 ('net/mlx5: E-Switch, Introduce Vport...') Signed-off-by: Noa Osherovich Signed-off-by: Majd Dibbiny Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 23 ++++++++++- .../net/ethernet/mellanox/mlx5/core/vport.c | 38 +++++++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 9 ++++- include/linux/mlx5/vport.h | 2 + 4 files changed, 68 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index cfec20cffd26..9b1855b199a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1725,11 +1725,24 @@ void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) (esw && MLX5_CAP_GEN(esw->dev, vport_group_manager) && mlx5_core_is_pf(esw->dev)) #define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports) +static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN]) +{ + ((u8 *)node_guid)[7] = mac[0]; + ((u8 *)node_guid)[6] = mac[1]; + ((u8 *)node_guid)[5] = mac[2]; + ((u8 *)node_guid)[4] = 0xff; + ((u8 *)node_guid)[3] = 0xfe; + ((u8 *)node_guid)[2] = mac[3]; + ((u8 *)node_guid)[1] = mac[4]; + ((u8 *)node_guid)[0] = mac[5]; +} + int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, int vport, u8 mac[ETH_ALEN]) { - int err = 0; struct mlx5_vport *evport; + u64 node_guid; + int err = 0; if (!ESW_ALLOWED(esw)) return -EPERM; @@ -1753,11 +1766,17 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, return err; } + node_guid_gen_from_mac(&node_guid, mac); + err = mlx5_modify_nic_vport_node_guid(esw->dev, vport, node_guid); + if (err) + mlx5_core_warn(esw->dev, + "Failed to set vport %d node guid, err = %d. RDMA_CM will not function properly for this VF.\n", + vport, err); + mutex_lock(&esw->state_lock); if (evport->enabled) err = esw_vport_ingress_config(esw, evport); mutex_unlock(&esw->state_lock); - return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index b69dadcfb897..daf44cd4c566 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -508,6 +508,44 @@ int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid) } EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid); +int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, + u32 vport, u64 node_guid) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *nic_vport_context; + u8 *guid; + void *in; + int err; + + if (!vport) + return -EINVAL; + if (!MLX5_CAP_GEN(mdev, vport_group_manager)) + return -EACCES; + if (!MLX5_CAP_ESW(mdev, nic_vport_node_guid_modify)) + return -ENOTSUPP; + + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, + field_select.node_guid, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + MLX5_SET(modify_nic_vport_context_in, in, other_vport, !!vport); + + nic_vport_context = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + guid = MLX5_ADDR_OF(nic_vport_context, nic_vport_context, + node_guid); + MLX5_SET64(nic_vport_context, nic_vport_context, node_guid, node_guid); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + kvfree(in); + + return err; +} + int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev, u16 *qkey_viol_cntr) { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 986a615f623c..e955a2859009 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -501,7 +501,9 @@ struct mlx5_ifc_e_switch_cap_bits { u8 vport_svlan_insert[0x1]; u8 vport_cvlan_insert_if_not_exist[0x1]; u8 vport_cvlan_insert_overwrite[0x1]; - u8 reserved_at_5[0x1b]; + u8 reserved_at_5[0x19]; + u8 nic_vport_node_guid_modify[0x1]; + u8 nic_vport_port_guid_modify[0x1]; u8 reserved_at_20[0x7e0]; }; @@ -4584,7 +4586,10 @@ struct mlx5_ifc_modify_nic_vport_context_out_bits { }; struct mlx5_ifc_modify_nic_vport_field_select_bits { - u8 reserved_at_0[0x19]; + u8 reserved_at_0[0x16]; + u8 node_guid[0x1]; + u8 port_guid[0x1]; + u8 reserved_at_18[0x1]; u8 mtu[0x1]; u8 change_event[0x1]; u8 promisc[0x1]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 301da4a5e6bf..6c16c198f680 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -50,6 +50,8 @@ int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu); int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev, u64 *system_image_guid); int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid); +int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, + u32 vport, u64 node_guid); int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev, u16 *qkey_viol_cntr); int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, From 62e3c24ac4f0ee307c41a3652636f88a3f8d165c Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 10 Jun 2016 00:07:38 +0300 Subject: [PATCH 80/85] net/mlx5: E-Switch, always set mc_promisc for allmulti vports Set the mc_promisc flag also in the case of adding new mc address to existing allmulti vport. Fixes: a35f71f27a61 ('net/mlx5: E-Switch, Implement promiscuous rx modes vf request handling') Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 9b1855b199a1..aebbd6ccb9fe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -651,6 +651,7 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw, esw_fdb_set_vport_rule(esw, mac, vport_idx); + iter_vaddr->mc_promisc = true; break; case MLX5_ACTION_DEL: if (!iter_vaddr) From 811afeaa3797bdf4eabed286100811b33005c9e0 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Fri, 10 Jun 2016 00:07:39 +0300 Subject: [PATCH 81/85] net/mlx5e: Use ndo_stop explicitly at shutdown flow The current implementation copies the flow of ndo_stop instead of calling it explicitly, Fixed it. Fixes: 5fc7197d3a25 ("net/mlx5: Add pci shutdown callback") Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index fd4392999eee..f5c8d5db25a8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3192,10 +3192,7 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv) flush_workqueue(priv->wq); if (test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) { netif_device_detach(netdev); - mutex_lock(&priv->state_lock); - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) - mlx5e_close_locked(netdev); - mutex_unlock(&priv->state_lock); + mlx5e_close(netdev); } else { unregister_netdev(netdev); } From 0ca00fc1f808602137dc6d51f17747b3bb0fc34d Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Fri, 10 Jun 2016 00:07:40 +0300 Subject: [PATCH 82/85] net/mlx5e: Fix blue flame quota logic Blue flame is a latency enhancement feature that allows the driver to write the packet data directly to the NIC's registers thus making the read of the packet data from host memory redundant. We maintain a quota for the blue flame which is reloaded whenever we identify that the hardware is processing send requests and processes them fast enough so by the time we post the next send request it was able to process all the pending ones. This indicates that the hardware is capable of processing more blue flame requests efficiently. The blue flame quota is decremented whenever we send using blue flame. The current code erroneously clears the budget if we did not use blue flame for the current post send operation and we fix it here. Fixes: 88a85f99e51f ('net/mlx5e: TX latency optimization to save DMA reads') Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 229ab16fb8d3..b000ddc29553 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -317,7 +317,8 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) while ((sq->pc & wq->sz_m1) > sq->edge) mlx5e_send_nop(sq, false); - sq->bf_budget = bf ? sq->bf_budget - 1 : 0; + if (bf) + sq->bf_budget--; sq->stats.packets++; sq->stats.bytes += num_bytes; From ca8bdaf13abbdcbb12ff12164aa2d5b522ec524d Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 8 Jun 2016 19:21:17 +0100 Subject: [PATCH 83/85] stmmac: fix parameter to dwmac4_set_umac_addr() The dwmac4_set_umac_addr() takes a struct mac_device_info as the first parameter, but is being passed a ioaddr instead from dwmac4_set_filter(). Fix the warning/bug by changing the first parameter. drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c:159:46: warning: incorrect type in argument 1 (different address spaces) drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c:159:46: expected struct mac_device_info *hw drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c:159:46: got void [noderef] *ioaddr Note, only compile tested this as do not have any hardware with it in. Signed-off-by: Ben Dooks Acked-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 4f7283d05588..44da877d2483 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -156,7 +156,7 @@ static void dwmac4_set_filter(struct mac_device_info *hw, struct netdev_hw_addr *ha; netdev_for_each_uc_addr(ha, dev) { - dwmac4_set_umac_addr(ioaddr, ha->addr, reg); + dwmac4_set_umac_addr(hw, ha->addr, reg); reg++; } } From 719c44d340beeecd22cbda91b00ef55585b3c1a0 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 7 Jun 2016 12:06:34 -0400 Subject: [PATCH 84/85] packet: compat support for sock_fprog Socket option PACKET_FANOUT_DATA takes a struct sock_fprog as argument if PACKET_FANOUT has mode PACKET_FANOUT_CBPF. This structure contains a pointer into user memory. If userland is 32-bit and kernel is 64-bit the two disagree about the layout of struct sock_fprog. Add compat setsockopt support to convert a 32-bit compat_sock_fprog to a 64-bit sock_fprog. This is analogous to compat_sock_fprog support for SO_REUSEPORT added in commit 1957598840f4 ("soreuseport: add compat case for setsockopt SO_ATTACH_REUSEPORT_CBPF"). Reported-by: Daniel Borkmann Signed-off-by: Willem de Bruijn Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/compat.h | 1 + net/compat.c | 17 +++++++++++++++-- net/packet/af_packet.c | 25 +++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/include/net/compat.h b/include/net/compat.h index 48103cf94e97..13de0ccaa059 100644 --- a/include/net/compat.h +++ b/include/net/compat.h @@ -42,6 +42,7 @@ int compat_sock_get_timestampns(struct sock *, struct timespec __user *); int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *, struct sockaddr __user **, struct iovec **); +struct sock_fprog __user *get_compat_bpf_fprog(char __user *optval); asmlinkage long compat_sys_sendmsg(int, struct compat_msghdr __user *, unsigned int); asmlinkage long compat_sys_sendmmsg(int, struct compat_mmsghdr __user *, diff --git a/net/compat.c b/net/compat.c index 1373947efb50..1cd2ec046164 100644 --- a/net/compat.c +++ b/net/compat.c @@ -309,8 +309,8 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm) __scm_destroy(scm); } -static int do_set_attach_filter(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) +/* allocate a 64-bit sock_fprog on the user stack for duration of syscall. */ +struct sock_fprog __user *get_compat_bpf_fprog(char __user *optval) { struct compat_sock_fprog __user *fprog32 = (struct compat_sock_fprog __user *)optval; struct sock_fprog __user *kfprog = compat_alloc_user_space(sizeof(struct sock_fprog)); @@ -323,6 +323,19 @@ static int do_set_attach_filter(struct socket *sock, int level, int optname, __get_user(ptr, &fprog32->filter) || __put_user(len, &kfprog->len) || __put_user(compat_ptr(ptr), &kfprog->filter)) + return NULL; + + return kfprog; +} +EXPORT_SYMBOL_GPL(get_compat_bpf_fprog); + +static int do_set_attach_filter(struct socket *sock, int level, int optname, + char __user *optval, unsigned int optlen) +{ + struct sock_fprog __user *kfprog; + + kfprog = get_compat_bpf_fprog(optval); + if (!kfprog) return -EFAULT; return sock_setsockopt(sock, level, optname, (char __user *)kfprog, diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 4040eb92d9c9..9bff6ef16fa7 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -93,6 +93,7 @@ #include #endif #include +#include #include "internal.h" @@ -3940,6 +3941,27 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, } +#ifdef CONFIG_COMPAT +static int compat_packet_setsockopt(struct socket *sock, int level, int optname, + char __user *optval, unsigned int optlen) +{ + struct packet_sock *po = pkt_sk(sock->sk); + + if (level != SOL_PACKET) + return -ENOPROTOOPT; + + if (optname == PACKET_FANOUT_DATA && + po->fanout && po->fanout->type == PACKET_FANOUT_CBPF) { + optval = (char __user *)get_compat_bpf_fprog(optval); + if (!optval) + return -EFAULT; + optlen = sizeof(struct sock_fprog); + } + + return packet_setsockopt(sock, level, optname, optval, optlen); +} +#endif + static int packet_notifier(struct notifier_block *this, unsigned long msg, void *ptr) { @@ -4416,6 +4438,9 @@ static const struct proto_ops packet_ops = { .shutdown = sock_no_shutdown, .setsockopt = packet_setsockopt, .getsockopt = packet_getsockopt, +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_packet_setsockopt, +#endif .sendmsg = packet_sendmsg, .recvmsg = packet_recvmsg, .mmap = packet_mmap, From 50219538ffc0493a2b451a3aa0191138ef8bfe9d Mon Sep 17 00:00:00 2001 From: Shrikrishna Khare Date: Wed, 8 Jun 2016 07:40:53 -0700 Subject: [PATCH 85/85] vmxnet3: segCnt can be 1 for LRO packets The device emulation may send segCnt of 1 for LRO packets. Signed-off-by: Shrikrishna Khare Signed-off-by: Jin Heo Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 2 +- drivers/net/vmxnet3/vmxnet3_int.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index db8022ae415b..08885bc8d6db 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1369,7 +1369,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, rcdlro = (struct Vmxnet3_RxCompDescExt *)rcd; segCnt = rcdlro->segCnt; - BUG_ON(segCnt <= 1); + WARN_ON_ONCE(segCnt == 0); mss = rcdlro->mss; if (unlikely(segCnt <= 1)) segCnt = 0; diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index c4825392d64b..3d2b64e63408 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -69,10 +69,10 @@ /* * Version numbers */ -#define VMXNET3_DRIVER_VERSION_STRING "1.4.7.0-k" +#define VMXNET3_DRIVER_VERSION_STRING "1.4.8.0-k" /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */ -#define VMXNET3_DRIVER_VERSION_NUM 0x01040700 +#define VMXNET3_DRIVER_VERSION_NUM 0x01040800 #if defined(CONFIG_PCI_MSI) /* RSS only makes sense if MSI-X is supported. */