From 70d72b7e060e0a16b9ded9d0fbd3dff109743de3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 24 Apr 2016 01:17:14 +0200 Subject: [PATCH 01/21] netfilter: conntrack: init all_locks to avoid debug warning Else we get 'BUG: spinlock bad magic on CPU#' on resize when spin lock debugging is enabled. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index afde5f5e728a..895d11dced3c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -66,7 +66,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_locks); __cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock); EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock); -static __read_mostly spinlock_t nf_conntrack_locks_all_lock; +static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); static __read_mostly bool nf_conntrack_locks_all; void nf_conntrack_lock(spinlock_t *lock) __acquires(lock) From cec5913c1515e5619907016658838af4eff78dd3 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Thu, 21 Apr 2016 00:47:08 -0700 Subject: [PATCH 02/21] netfilter: IDLETIMER: fix race condition when destroy the target Workqueue maybe still in running while we destroy the IDLETIMER target, thus cause a use after free error, add cancel_work_sync() to avoid such situation. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_IDLETIMER.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 29d2c31f406c..daf45da448fa 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -236,6 +236,7 @@ static void idletimer_tg_destroy(const struct xt_tgdtor_param *par) list_del(&info->timer->entry); del_timer_sync(&info->timer->timer); + cancel_work_sync(&info->timer->work); sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr); kfree(info->timer->attr.attr.name); kfree(info->timer); From 5c08b0f5026fcc13efb947c4d1f2ca3558145f68 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 3 May 2016 12:08:43 +0300 Subject: [PATCH 03/21] iwlwifi: mvm: don't override the rate with the AMSDU len The TSO code creates A-MSDUs from a single large send. Each A-MSDU is an skb and skb->len doesn't include the number of bytes which need to be added for the headers being added (subframe header, TCP header, IP header, SNAP, padding). To be able to set the right value in the Tx command, we put the number of bytes added by those headers in driver_data in iwl_mvm_tx_tso and use this value in iwl_mvm_set_tx_cmd. The problem by setting this value in driver_data is that it overrides the ieee80211_tx_info. The bug manifested itself when we send P2P related frames in CCK since the rate in ieee80211_tx_info is zero-ed. This of course is a violation of the P2P specification. To fix this, copy the original ieee80211_tx_info to the stack and pass it to the functions which need it. Assign the number of bytes added by the headers to the driver_data inside the skb itself. Fixes: a6d5e32f247c ("iwlwifi: mvm: send large SKBs to the transport") Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 83 ++++++++++++--------- 1 file changed, 48 insertions(+), 35 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 75870e68a7c3..34731e29c589 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -105,6 +105,7 @@ void iwl_mvm_set_tx_cmd(struct iwl_mvm *mvm, struct sk_buff *skb, struct iwl_tx_cmd *tx_cmd, struct ieee80211_tx_info *info, u8 sta_id) { + struct ieee80211_tx_info *skb_info = IEEE80211_SKB_CB(skb); struct ieee80211_hdr *hdr = (void *)skb->data; __le16 fc = hdr->frame_control; u32 tx_flags = le32_to_cpu(tx_cmd->tx_flags); @@ -185,7 +186,7 @@ void iwl_mvm_set_tx_cmd(struct iwl_mvm *mvm, struct sk_buff *skb, tx_cmd->tx_flags = cpu_to_le32(tx_flags); /* Total # bytes to be transmitted */ tx_cmd->len = cpu_to_le16((u16)skb->len + - (uintptr_t)info->driver_data[0]); + (uintptr_t)skb_info->driver_data[0]); tx_cmd->next_frame_len = 0; tx_cmd->life_time = cpu_to_le32(TX_CMD_LIFE_TIME_INFINITE); tx_cmd->sta_id = sta_id; @@ -327,10 +328,11 @@ static void iwl_mvm_set_tx_cmd_crypto(struct iwl_mvm *mvm, */ static struct iwl_device_cmd * iwl_mvm_set_tx_params(struct iwl_mvm *mvm, struct sk_buff *skb, - int hdrlen, struct ieee80211_sta *sta, u8 sta_id) + struct ieee80211_tx_info *info, int hdrlen, + struct ieee80211_sta *sta, u8 sta_id) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_tx_info *skb_info = IEEE80211_SKB_CB(skb); struct iwl_device_cmd *dev_cmd; struct iwl_tx_cmd *tx_cmd; @@ -350,10 +352,10 @@ iwl_mvm_set_tx_params(struct iwl_mvm *mvm, struct sk_buff *skb, iwl_mvm_set_tx_cmd_rate(mvm, tx_cmd, info, sta, hdr->frame_control); - memset(&info->status, 0, sizeof(info->status)); - memset(info->driver_data, 0, sizeof(info->driver_data)); + memset(&skb_info->status, 0, sizeof(skb_info->status)); + memset(skb_info->driver_data, 0, sizeof(skb_info->driver_data)); - info->driver_data[1] = dev_cmd; + skb_info->driver_data[1] = dev_cmd; return dev_cmd; } @@ -361,22 +363,25 @@ iwl_mvm_set_tx_params(struct iwl_mvm *mvm, struct sk_buff *skb, int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_tx_info *skb_info = IEEE80211_SKB_CB(skb); + struct ieee80211_tx_info info; struct iwl_device_cmd *dev_cmd; struct iwl_tx_cmd *tx_cmd; u8 sta_id; int hdrlen = ieee80211_hdrlen(hdr->frame_control); - if (WARN_ON_ONCE(info->flags & IEEE80211_TX_CTL_AMPDU)) + memcpy(&info, skb->cb, sizeof(info)); + + if (WARN_ON_ONCE(info.flags & IEEE80211_TX_CTL_AMPDU)) return -1; - if (WARN_ON_ONCE(info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM && - (!info->control.vif || - info->hw_queue != info->control.vif->cab_queue))) + if (WARN_ON_ONCE(info.flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM && + (!info.control.vif || + info.hw_queue != info.control.vif->cab_queue))) return -1; /* This holds the amsdu headers length */ - info->driver_data[0] = (void *)(uintptr_t)0; + skb_info->driver_data[0] = (void *)(uintptr_t)0; /* * IWL_MVM_OFFCHANNEL_QUEUE is used for ROC packets that can be used @@ -385,7 +390,7 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) * and hence needs to be sent on the aux queue */ if (IEEE80211_SKB_CB(skb)->hw_queue == IWL_MVM_OFFCHANNEL_QUEUE && - info->control.vif->type == NL80211_IFTYPE_STATION) + info.control.vif->type == NL80211_IFTYPE_STATION) IEEE80211_SKB_CB(skb)->hw_queue = mvm->aux_queue; /* @@ -398,14 +403,14 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) * AUX station. */ sta_id = mvm->aux_sta.sta_id; - if (info->control.vif) { + if (info.control.vif) { struct iwl_mvm_vif *mvmvif = - iwl_mvm_vif_from_mac80211(info->control.vif); + iwl_mvm_vif_from_mac80211(info.control.vif); - if (info->control.vif->type == NL80211_IFTYPE_P2P_DEVICE || - info->control.vif->type == NL80211_IFTYPE_AP) + if (info.control.vif->type == NL80211_IFTYPE_P2P_DEVICE || + info.control.vif->type == NL80211_IFTYPE_AP) sta_id = mvmvif->bcast_sta.sta_id; - else if (info->control.vif->type == NL80211_IFTYPE_STATION && + else if (info.control.vif->type == NL80211_IFTYPE_STATION && is_multicast_ether_addr(hdr->addr1)) { u8 ap_sta_id = ACCESS_ONCE(mvmvif->ap_sta_id); @@ -414,19 +419,18 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) } } - IWL_DEBUG_TX(mvm, "station Id %d, queue=%d\n", sta_id, info->hw_queue); + IWL_DEBUG_TX(mvm, "station Id %d, queue=%d\n", sta_id, info.hw_queue); - dev_cmd = iwl_mvm_set_tx_params(mvm, skb, hdrlen, NULL, sta_id); + dev_cmd = iwl_mvm_set_tx_params(mvm, skb, &info, hdrlen, NULL, sta_id); if (!dev_cmd) return -1; - /* From now on, we cannot access info->control */ tx_cmd = (struct iwl_tx_cmd *)dev_cmd->payload; /* Copy MAC header from skb into command buffer */ memcpy(tx_cmd->hdr, hdr, hdrlen); - if (iwl_trans_tx(mvm->trans, skb, dev_cmd, info->hw_queue)) { + if (iwl_trans_tx(mvm->trans, skb, dev_cmd, info.hw_queue)) { iwl_trans_free_tx_cmd(mvm->trans, dev_cmd); return -1; } @@ -445,11 +449,11 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) #ifdef CONFIG_INET static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb, + struct ieee80211_tx_info *info, struct ieee80211_sta *sta, struct sk_buff_head *mpdus_skb) { struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_hdr *hdr = (void *)skb->data; unsigned int mss = skb_shinfo(skb)->gso_size; struct sk_buff *tmp, *next; @@ -544,6 +548,8 @@ static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb, /* This skb fits in one single A-MSDU */ if (num_subframes * mss >= tcp_payload_len) { + struct ieee80211_tx_info *skb_info = IEEE80211_SKB_CB(skb); + /* * Compute the length of all the data added for the A-MSDU. * This will be used to compute the length to write in the TX @@ -552,11 +558,10 @@ static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb, * already had one set of SNAP / IP / TCP headers. */ num_subframes = DIV_ROUND_UP(tcp_payload_len, mss); - info = IEEE80211_SKB_CB(skb); amsdu_add = num_subframes * sizeof(struct ethhdr) + (num_subframes - 1) * (snap_ip_tcp + pad); /* This holds the amsdu headers length */ - info->driver_data[0] = (void *)(uintptr_t)amsdu_add; + skb_info->driver_data[0] = (void *)(uintptr_t)amsdu_add; __skb_queue_tail(mpdus_skb, skb); return 0; @@ -596,11 +601,14 @@ segment: ip_hdr(tmp)->id = htons(ip_base_id + i * num_subframes); if (tcp_payload_len > mss) { + struct ieee80211_tx_info *skb_info = + IEEE80211_SKB_CB(tmp); + num_subframes = DIV_ROUND_UP(tcp_payload_len, mss); - info = IEEE80211_SKB_CB(tmp); amsdu_add = num_subframes * sizeof(struct ethhdr) + (num_subframes - 1) * (snap_ip_tcp + pad); - info->driver_data[0] = (void *)(uintptr_t)amsdu_add; + skb_info->driver_data[0] = + (void *)(uintptr_t)amsdu_add; skb_shinfo(tmp)->gso_size = mss; } else { qc = ieee80211_get_qos_ctl((void *)tmp->data); @@ -622,6 +630,7 @@ segment: } #else /* CONFIG_INET */ static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb, + struct ieee80211_tx_info *info, struct ieee80211_sta *sta, struct sk_buff_head *mpdus_skb) { @@ -636,10 +645,10 @@ static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb, * Sets the fields in the Tx cmd that are crypto related */ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb, + struct ieee80211_tx_info *info, struct ieee80211_sta *sta) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct iwl_mvm_sta *mvmsta; struct iwl_device_cmd *dev_cmd; struct iwl_tx_cmd *tx_cmd; @@ -660,7 +669,8 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb, if (WARN_ON_ONCE(mvmsta->sta_id == IWL_MVM_STATION_COUNT)) return -1; - dev_cmd = iwl_mvm_set_tx_params(mvm, skb, hdrlen, sta, mvmsta->sta_id); + dev_cmd = iwl_mvm_set_tx_params(mvm, skb, info, hdrlen, + sta, mvmsta->sta_id); if (!dev_cmd) goto drop; @@ -736,7 +746,8 @@ int iwl_mvm_tx_skb(struct iwl_mvm *mvm, struct sk_buff *skb, struct ieee80211_sta *sta) { struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_tx_info *skb_info = IEEE80211_SKB_CB(skb); + struct ieee80211_tx_info info; struct sk_buff_head mpdus_skbs; unsigned int payload_len; int ret; @@ -747,21 +758,23 @@ int iwl_mvm_tx_skb(struct iwl_mvm *mvm, struct sk_buff *skb, if (WARN_ON_ONCE(mvmsta->sta_id == IWL_MVM_STATION_COUNT)) return -1; + memcpy(&info, skb->cb, sizeof(info)); + /* This holds the amsdu headers length */ - info->driver_data[0] = (void *)(uintptr_t)0; + skb_info->driver_data[0] = (void *)(uintptr_t)0; if (!skb_is_gso(skb)) - return iwl_mvm_tx_mpdu(mvm, skb, sta); + return iwl_mvm_tx_mpdu(mvm, skb, &info, sta); payload_len = skb_tail_pointer(skb) - skb_transport_header(skb) - tcp_hdrlen(skb) + skb->data_len; if (payload_len <= skb_shinfo(skb)->gso_size) - return iwl_mvm_tx_mpdu(mvm, skb, sta); + return iwl_mvm_tx_mpdu(mvm, skb, &info, sta); __skb_queue_head_init(&mpdus_skbs); - ret = iwl_mvm_tx_tso(mvm, skb, sta, &mpdus_skbs); + ret = iwl_mvm_tx_tso(mvm, skb, &info, sta, &mpdus_skbs); if (ret) return ret; @@ -771,7 +784,7 @@ int iwl_mvm_tx_skb(struct iwl_mvm *mvm, struct sk_buff *skb, while (!skb_queue_empty(&mpdus_skbs)) { skb = __skb_dequeue(&mpdus_skbs); - ret = iwl_mvm_tx_mpdu(mvm, skb, sta); + ret = iwl_mvm_tx_mpdu(mvm, skb, &info, sta); if (ret) { __skb_queue_purge(&mpdus_skbs); return ret; From eda3fc50daa93b08774a18d51883c5a5d8d85e15 Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Tue, 3 May 2016 16:39:19 -0400 Subject: [PATCH 04/21] netfilter: nfnetlink_acct: validate NFACCT_QUOTA parameter If a quota bit is set in NFACCT_FLAGS but the NFACCT_QUOTA parameter is missing then a NULL pointer dereference is triggered. CAP_NET_ADMIN is required to trigger the bug. Signed-off-by: Phil Turnbull Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_acct.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 4c2b4c0c4d5f..dbd0803b1827 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -96,6 +96,8 @@ static int nfnl_acct_new(struct net *net, struct sock *nfnl, return -EINVAL; if (flags & NFACCT_F_OVERQUOTA) return -EINVAL; + if ((flags & NFACCT_F_QUOTA) && !tb[NFACCT_QUOTA]) + return -EINVAL; size += sizeof(u64); } From 4a91cb61bb995e5571098188092e296192309c77 Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Sun, 24 Apr 2016 17:45:00 +0200 Subject: [PATCH 05/21] uapi glibc compat: fix compile errors when glibc net/if.h included before linux/if.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit glibc's net/if.h contains copies of definitions from linux/if.h and these conflict and cause build failures if both files are included by application source code. Changes in uapi headers, which fixed header file dependencies to include linux/if.h when it was needed, e.g. commit 1ffad83d, made the net/if.h and linux/if.h incompatibilities visible as build failures for userspace applications like iproute2 and xtables-addons. This patch fixes compile errors when glibc net/if.h is included before linux/if.h: ./linux/if.h:99:21: error: redeclaration of enumerator ‘IFF_NOARP’ ./linux/if.h:98:23: error: redeclaration of enumerator ‘IFF_RUNNING’ ./linux/if.h:97:26: error: redeclaration of enumerator ‘IFF_NOTRAILERS’ ./linux/if.h:96:27: error: redeclaration of enumerator ‘IFF_POINTOPOINT’ ./linux/if.h:95:24: error: redeclaration of enumerator ‘IFF_LOOPBACK’ ./linux/if.h:94:21: error: redeclaration of enumerator ‘IFF_DEBUG’ ./linux/if.h:93:25: error: redeclaration of enumerator ‘IFF_BROADCAST’ ./linux/if.h:92:19: error: redeclaration of enumerator ‘IFF_UP’ ./linux/if.h:252:8: error: redefinition of ‘struct ifconf’ ./linux/if.h:203:8: error: redefinition of ‘struct ifreq’ ./linux/if.h:169:8: error: redefinition of ‘struct ifmap’ ./linux/if.h:107:23: error: redeclaration of enumerator ‘IFF_DYNAMIC’ ./linux/if.h:106:25: error: redeclaration of enumerator ‘IFF_AUTOMEDIA’ ./linux/if.h:105:23: error: redeclaration of enumerator ‘IFF_PORTSEL’ ./linux/if.h:104:25: error: redeclaration of enumerator ‘IFF_MULTICAST’ ./linux/if.h:103:21: error: redeclaration of enumerator ‘IFF_SLAVE’ ./linux/if.h:102:22: error: redeclaration of enumerator ‘IFF_MASTER’ ./linux/if.h:101:24: error: redeclaration of enumerator ‘IFF_ALLMULTI’ ./linux/if.h:100:23: error: redeclaration of enumerator ‘IFF_PROMISC’ The cases where linux/if.h is included before net/if.h need a similar fix in the glibc side, or the order of include files can be changed userspace code as a workaround. This change was tested in x86 userspace on Debian unstable with scripts/headers_compile_test.sh: $ make headers_install && \ cd usr/include && ../../scripts/headers_compile_test.sh -l -k ... cc -Wall -c -nostdinc -I /usr/lib/gcc/i586-linux-gnu/5/include -I /usr/lib/gcc/i586-linux-gnu/5/include-fixed -I . -I /home/mcfrisk/src/linux-2.6/usr/headers_compile_test_include.2uX2zH -I /home/mcfrisk/src/linux-2.6/usr/headers_compile_test_include.2uX2zH/i586-linux-gnu -o /dev/null ./linux/if.h_libc_before_kernel.h PASSED libc before kernel test: ./linux/if.h Reported-by: Jan Engelhardt Reported-by: Josh Boyer Reported-by: Stephen Hemminger Reported-by: Waldemar Brodkorb Cc: Gabriel Laskar Signed-off-by: Mikko Rapeli Signed-off-by: David S. Miller --- include/uapi/linux/if.h | 28 ++++++++++++++++++++ include/uapi/linux/libc-compat.h | 44 ++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/include/uapi/linux/if.h b/include/uapi/linux/if.h index f80277569f24..e601c8c3bdc7 100644 --- a/include/uapi/linux/if.h +++ b/include/uapi/linux/if.h @@ -19,14 +19,20 @@ #ifndef _LINUX_IF_H #define _LINUX_IF_H +#include /* for compatibility with glibc */ #include /* for "__kernel_caddr_t" et al */ #include /* for "struct sockaddr" et al */ #include /* for "__user" et al */ +#if __UAPI_DEF_IF_IFNAMSIZ #define IFNAMSIZ 16 +#endif /* __UAPI_DEF_IF_IFNAMSIZ */ #define IFALIASZ 256 #include +/* For glibc compatibility. An empty enum does not compile. */ +#if __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 && \ + __UAPI_DEF_IF_NET_DEVICE_FLAGS != 0 /** * enum net_device_flags - &struct net_device flags * @@ -68,6 +74,8 @@ * @IFF_ECHO: echo sent packets. Volatile. */ enum net_device_flags { +/* for compatibility with glibc net/if.h */ +#if __UAPI_DEF_IF_NET_DEVICE_FLAGS IFF_UP = 1<<0, /* sysfs */ IFF_BROADCAST = 1<<1, /* volatile */ IFF_DEBUG = 1<<2, /* sysfs */ @@ -84,11 +92,17 @@ enum net_device_flags { IFF_PORTSEL = 1<<13, /* sysfs */ IFF_AUTOMEDIA = 1<<14, /* sysfs */ IFF_DYNAMIC = 1<<15, /* sysfs */ +#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS */ +#if __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO IFF_LOWER_UP = 1<<16, /* volatile */ IFF_DORMANT = 1<<17, /* volatile */ IFF_ECHO = 1<<18, /* volatile */ +#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */ }; +#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 && __UAPI_DEF_IF_NET_DEVICE_FLAGS != 0 */ +/* for compatibility with glibc net/if.h */ +#if __UAPI_DEF_IF_NET_DEVICE_FLAGS #define IFF_UP IFF_UP #define IFF_BROADCAST IFF_BROADCAST #define IFF_DEBUG IFF_DEBUG @@ -105,9 +119,13 @@ enum net_device_flags { #define IFF_PORTSEL IFF_PORTSEL #define IFF_AUTOMEDIA IFF_AUTOMEDIA #define IFF_DYNAMIC IFF_DYNAMIC +#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS */ + +#if __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO #define IFF_LOWER_UP IFF_LOWER_UP #define IFF_DORMANT IFF_DORMANT #define IFF_ECHO IFF_ECHO +#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */ #define IFF_VOLATILE (IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|IFF_ECHO|\ IFF_MASTER|IFF_SLAVE|IFF_RUNNING|IFF_LOWER_UP|IFF_DORMANT) @@ -166,6 +184,8 @@ enum { * being very small might be worth keeping for clean configuration. */ +/* for compatibility with glibc net/if.h */ +#if __UAPI_DEF_IF_IFMAP struct ifmap { unsigned long mem_start; unsigned long mem_end; @@ -175,6 +195,7 @@ struct ifmap { unsigned char port; /* 3 bytes spare */ }; +#endif /* __UAPI_DEF_IF_IFMAP */ struct if_settings { unsigned int type; /* Type of physical device or protocol */ @@ -200,6 +221,8 @@ struct if_settings { * remainder may be interface specific. */ +/* for compatibility with glibc net/if.h */ +#if __UAPI_DEF_IF_IFREQ struct ifreq { #define IFHWADDRLEN 6 union @@ -223,6 +246,7 @@ struct ifreq { struct if_settings ifru_settings; } ifr_ifru; }; +#endif /* __UAPI_DEF_IF_IFREQ */ #define ifr_name ifr_ifrn.ifrn_name /* interface name */ #define ifr_hwaddr ifr_ifru.ifru_hwaddr /* MAC address */ @@ -249,6 +273,8 @@ struct ifreq { * must know all networks accessible). */ +/* for compatibility with glibc net/if.h */ +#if __UAPI_DEF_IF_IFCONF struct ifconf { int ifc_len; /* size of buffer */ union { @@ -256,6 +282,8 @@ struct ifconf { struct ifreq __user *ifcu_req; } ifc_ifcu; }; +#endif /* __UAPI_DEF_IF_IFCONF */ + #define ifc_buf ifc_ifcu.ifcu_buf /* buffer address */ #define ifc_req ifc_ifcu.ifcu_req /* array of structures */ diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h index 7d024ceb075d..d5e38c73377c 100644 --- a/include/uapi/linux/libc-compat.h +++ b/include/uapi/linux/libc-compat.h @@ -51,6 +51,40 @@ /* We have included glibc headers... */ #if defined(__GLIBC__) +/* Coordinate with glibc net/if.h header. */ +#if defined(_NET_IF_H) + +/* GLIBC headers included first so don't define anything + * that would already be defined. */ + +#define __UAPI_DEF_IF_IFCONF 0 +#define __UAPI_DEF_IF_IFMAP 0 +#define __UAPI_DEF_IF_IFNAMSIZ 0 +#define __UAPI_DEF_IF_IFREQ 0 +/* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */ +#define __UAPI_DEF_IF_NET_DEVICE_FLAGS 0 +/* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */ +#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO +#define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1 +#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */ + +#else /* _NET_IF_H */ + +/* Linux headers included first, and we must define everything + * we need. The expectation is that glibc will check the + * __UAPI_DEF_* defines and adjust appropriately. */ + +#define __UAPI_DEF_IF_IFCONF 1 +#define __UAPI_DEF_IF_IFMAP 1 +#define __UAPI_DEF_IF_IFNAMSIZ 1 +#define __UAPI_DEF_IF_IFREQ 1 +/* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */ +#define __UAPI_DEF_IF_NET_DEVICE_FLAGS 1 +/* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */ +#define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1 + +#endif /* _NET_IF_H */ + /* Coordinate with glibc netinet/in.h header. */ #if defined(_NETINET_IN_H) @@ -117,6 +151,16 @@ * that we need. */ #else /* !defined(__GLIBC__) */ +/* Definitions for if.h */ +#define __UAPI_DEF_IF_IFCONF 1 +#define __UAPI_DEF_IF_IFMAP 1 +#define __UAPI_DEF_IF_IFNAMSIZ 1 +#define __UAPI_DEF_IF_IFREQ 1 +/* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */ +#define __UAPI_DEF_IF_NET_DEVICE_FLAGS 1 +/* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */ +#define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1 + /* Definitions for in.h */ #define __UAPI_DEF_IN_ADDR 1 #define __UAPI_DEF_IN_IPPROTO 1 From 7fa816b92c52e2c304f2ff6401e0d51e1d229ca5 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sat, 7 May 2016 13:17:11 +0200 Subject: [PATCH 06/21] ravb: Add missing free_irq() call to ravb_close() When reopening the network device on ra7795/salvator-x, e.g. after a DHCP timeout: IP-Config: Reopening network devices... genirq: Flags mismatch irq 139. 00000000 (eth0:ch24:emac) vs. 00000000 (eth0:ch24:emac) ravb e6800000.ethernet eth0: cannot request IRQ eth0:ch24:emac IP-Config: Failed to open eth0 IP-Config: No network devices available The "mismatch" is due to requesting an IRQ that is already in use, while IRQF_PROBE_SHARED wasn't set. However, the real cause is that ravb_close() doesn't release the R-Car Gen3-specific secondary IRQ. Add the missing free_irq() call to fix this. Fixes: 22d4df8ff3a3cc72 ("ravb: Add support for r8a7795 SoC") Signed-off-by: Geert Uytterhoeven Acked-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 9e2a0bd8f5a8..4277d0c12101 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1506,6 +1506,8 @@ static int ravb_close(struct net_device *ndev) priv->phydev = NULL; } + if (priv->chip_id == RCAR_GEN3) + free_irq(priv->emac_irq, ndev); free_irq(ndev->irq, ndev); napi_disable(&priv->napi[RAVB_NC]); From 79e48650320e6fba48369fccf13fd045315b19b8 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Sun, 8 May 2016 12:10:14 -0400 Subject: [PATCH 07/21] net: fix a kernel infoleak in x25 module Stack object "dte_facilities" is allocated in x25_rx_call_request(), which is supposed to be initialized in x25_negotiate_facilities. However, 5 fields (8 bytes in total) are not initialized. This object is then copied to userland via copy_to_user, thus infoleak occurs. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller --- net/x25/x25_facilities.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c index 7ecd04c21360..997ff7b2509b 100644 --- a/net/x25/x25_facilities.c +++ b/net/x25/x25_facilities.c @@ -277,6 +277,7 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk, memset(&theirs, 0, sizeof(theirs)); memcpy(new, ours, sizeof(*new)); + memset(dte, 0, sizeof(*dte)); len = x25_parse_facilities(skb, &theirs, dte, &x25->vc_facil_mask); if (len < 0) From 161de2caf68c549c266e571ffba8e2163886fb10 Mon Sep 17 00:00:00 2001 From: "xypron.glpk@gmx.de" Date: Mon, 9 May 2016 00:46:18 +0200 Subject: [PATCH 08/21] net: thunderx: avoid exposing kernel stack Reserved fields should be set to zero to avoid exposing bits from the kernel stack. Signed-off-by: Heinrich Schuchardt Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index fa05e347262f..06b819db51b1 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -533,6 +533,7 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs, nicvf_config_vlan_stripping(nic, nic->netdev->features); /* Enable Receive queue */ + memset(&rq_cfg, 0, sizeof(struct rq_cfg)); rq_cfg.ena = 1; rq_cfg.tcp_ena = 0; nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, *(u64 *)&rq_cfg); @@ -565,6 +566,7 @@ void nicvf_cmp_queue_config(struct nicvf *nic, struct queue_set *qs, qidx, (u64)(cq->dmem.phys_base)); /* Enable Completion queue */ + memset(&cq_cfg, 0, sizeof(struct cq_cfg)); cq_cfg.ena = 1; cq_cfg.reset = 0; cq_cfg.caching = 0; @@ -613,6 +615,7 @@ static void nicvf_snd_queue_config(struct nicvf *nic, struct queue_set *qs, qidx, (u64)(sq->dmem.phys_base)); /* Enable send queue & set queue size */ + memset(&sq_cfg, 0, sizeof(struct sq_cfg)); sq_cfg.ena = 1; sq_cfg.reset = 0; sq_cfg.ldwb = 0; @@ -649,6 +652,7 @@ static void nicvf_rbdr_config(struct nicvf *nic, struct queue_set *qs, /* Enable RBDR & set queue size */ /* Buffer size should be in multiples of 128 bytes */ + memset(&rbdr_cfg, 0, sizeof(struct rbdr_cfg)); rbdr_cfg.ena = 1; rbdr_cfg.reset = 0; rbdr_cfg.ldwb = 0; From d99079e2fbd5ac38884f498ca99435d525152a88 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 9 May 2016 08:05:06 -0400 Subject: [PATCH 09/21] export tc ife uapi header Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/uapi/linux/tc_act/Kbuild | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/tc_act/Kbuild b/include/uapi/linux/tc_act/Kbuild index 242cf0c6e33d..e3969bd939e4 100644 --- a/include/uapi/linux/tc_act/Kbuild +++ b/include/uapi/linux/tc_act/Kbuild @@ -10,3 +10,4 @@ header-y += tc_skbedit.h header-y += tc_vlan.h header-y += tc_bpf.h header-y += tc_connmark.h +header-y += tc_ife.h From e5df49d564fe993c68f5cff6d96972a6358b4958 Mon Sep 17 00:00:00 2001 From: Elad Kanfi Date: Mon, 9 May 2016 20:13:19 +0300 Subject: [PATCH 10/21] net: nps_enet: Tx handler synchronization Below is a description of a possible problematic sequence. CPU-A is sending a frame and CPU-B handles the interrupt that indicates the frame was sent. CPU-B reads an invalid value of tx_packet_sent. CPU-A CPU-B ----- ----- nps_enet_send_frame . . tx_skb = skb tx_packet_sent = true order HW to start tx . . HW complete tx ------> get tx complete interrupt . . if(tx_packet_sent == true) handle tx_skb end memory transaction (tx_packet_sent actually written) Furthermore there is a dependency between tx_skb and tx_packet_sent. There is no assurance that tx_skb contains a valid pointer at CPU B when it sees tx_packet_sent == true. Solution: Initialize tx_skb to NULL and use it to indicate that packet was sent, in this way tx_packet_sent can be removed. Add a write memory barrier after setting tx_skb in order to make sure that it is valid before HW is informed and IRQ is fired. Fixed sequence will be: CPU-A CPU-B ----- ----- tx_skb = skb wmb() . . order HW to start tx . . HW complete tx ------> get tx complete interrupt . . if(tx_skb != NULL) handle tx_skb tx_skb = NULL Signed-off-by: Elad Kanfi Acked-by: Noam Camus Acked-by: Gilad Ben-Yossef Signed-off-by: David S. Miller --- drivers/net/ethernet/ezchip/nps_enet.c | 15 +++++++++------ drivers/net/ethernet/ezchip/nps_enet.h | 2 -- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c index 1f23845a0694..25ac2def08ce 100644 --- a/drivers/net/ethernet/ezchip/nps_enet.c +++ b/drivers/net/ethernet/ezchip/nps_enet.c @@ -145,7 +145,7 @@ static void nps_enet_tx_handler(struct net_device *ndev) u32 tx_ctrl_nt = (tx_ctrl_value & TX_CTL_NT_MASK) >> TX_CTL_NT_SHIFT; /* Check if we got TX */ - if (!priv->tx_packet_sent || tx_ctrl_ct) + if (!priv->tx_skb || tx_ctrl_ct) return; /* Ack Tx ctrl register */ @@ -160,7 +160,7 @@ static void nps_enet_tx_handler(struct net_device *ndev) } dev_kfree_skb(priv->tx_skb); - priv->tx_packet_sent = false; + priv->tx_skb = NULL; if (netif_queue_stopped(ndev)) netif_wake_queue(ndev); @@ -217,7 +217,7 @@ static irqreturn_t nps_enet_irq_handler(s32 irq, void *dev_instance) u32 tx_ctrl_ct = (tx_ctrl_value & TX_CTL_CT_MASK) >> TX_CTL_CT_SHIFT; u32 rx_ctrl_cr = (rx_ctrl_value & RX_CTL_CR_MASK) >> RX_CTL_CR_SHIFT; - if ((!tx_ctrl_ct && priv->tx_packet_sent) || rx_ctrl_cr) + if ((!tx_ctrl_ct && priv->tx_skb) || rx_ctrl_cr) if (likely(napi_schedule_prep(&priv->napi))) { nps_enet_reg_set(priv, NPS_ENET_REG_BUF_INT_ENABLE, 0); __napi_schedule(&priv->napi); @@ -387,8 +387,6 @@ static void nps_enet_send_frame(struct net_device *ndev, /* Write the length of the Frame */ tx_ctrl_value |= length << TX_CTL_NT_SHIFT; - /* Indicate SW is done */ - priv->tx_packet_sent = true; tx_ctrl_value |= NPS_ENET_ENABLE << TX_CTL_CT_SHIFT; /* Send Frame */ nps_enet_reg_set(priv, NPS_ENET_REG_TX_CTL, tx_ctrl_value); @@ -465,7 +463,7 @@ static s32 nps_enet_open(struct net_device *ndev) s32 err; /* Reset private variables */ - priv->tx_packet_sent = false; + priv->tx_skb = NULL; priv->ge_mac_cfg_2_value = 0; priv->ge_mac_cfg_3_value = 0; @@ -534,6 +532,11 @@ static netdev_tx_t nps_enet_start_xmit(struct sk_buff *skb, priv->tx_skb = skb; + /* make sure tx_skb is actually written to the memory + * before the HW is informed and the IRQ is fired. + */ + wmb(); + nps_enet_send_frame(ndev, skb); return NETDEV_TX_OK; diff --git a/drivers/net/ethernet/ezchip/nps_enet.h b/drivers/net/ethernet/ezchip/nps_enet.h index d0cab600bce8..3939ca20cc9f 100644 --- a/drivers/net/ethernet/ezchip/nps_enet.h +++ b/drivers/net/ethernet/ezchip/nps_enet.h @@ -165,14 +165,12 @@ * struct nps_enet_priv - Storage of ENET's private information. * @regs_base: Base address of ENET memory-mapped control registers. * @irq: For RX/TX IRQ number. - * @tx_packet_sent: SW indication if frame is being sent. * @tx_skb: socket buffer of sent frame. * @napi: Structure for NAPI. */ struct nps_enet_priv { void __iomem *regs_base; s32 irq; - bool tx_packet_sent; struct sk_buff *tx_skb; struct napi_struct napi; u32 ge_mac_cfg_2_value; From 05c00d82f4d170987ac29607e7f3c27223b52d1e Mon Sep 17 00:00:00 2001 From: Elad Kanfi Date: Mon, 9 May 2016 20:13:20 +0300 Subject: [PATCH 11/21] net: nps_enet: bug fix - handle lost tx interrupts The tx interrupt is of edge type, and in case such interrupt is triggered while it is masked it will not be handled even after tx interrupts are re-enabled in the end of NAPI poll. This will cause tx network to stop in the following scenario: * Rx is being handled, hence interrupts are masked. * Tx interrupt is triggered after checking if there is some tx to handle and before re-enabling the interrupts. In this situation only rx transaction will release tx requests. In order to handle the tx that was missed( if there was one ), a NAPI reschdule was added after enabling the interrupts. Signed-off-by: Elad Kanfi Acked-by: Noam Camus Acked-by: Gilad Ben-Yossef Signed-off-by: David S. Miller --- drivers/net/ethernet/ezchip/nps_enet.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c index 25ac2def08ce..085f9125cf42 100644 --- a/drivers/net/ethernet/ezchip/nps_enet.c +++ b/drivers/net/ethernet/ezchip/nps_enet.c @@ -183,6 +183,9 @@ static int nps_enet_poll(struct napi_struct *napi, int budget) work_done = nps_enet_rx_handler(ndev); if (work_done < budget) { u32 buf_int_enable_value = 0; + u32 tx_ctrl_value = nps_enet_reg_get(priv, NPS_ENET_REG_TX_CTL); + u32 tx_ctrl_ct = + (tx_ctrl_value & TX_CTL_CT_MASK) >> TX_CTL_CT_SHIFT; napi_complete(napi); @@ -192,6 +195,18 @@ static int nps_enet_poll(struct napi_struct *napi, int budget) nps_enet_reg_set(priv, NPS_ENET_REG_BUF_INT_ENABLE, buf_int_enable_value); + + /* in case we will get a tx interrupt while interrupts + * are masked, we will lose it since the tx is edge interrupt. + * specifically, while executing the code section above, + * between nps_enet_tx_handler and the interrupts enable, all + * tx requests will be stuck until we will get an rx interrupt. + * the two code lines below will solve this situation by + * re-adding ourselves to the poll list. + */ + + if (priv->tx_skb && !tx_ctrl_ct) + napi_reschedule(napi); } return work_done; From 10a81980fc47e64ffac26a073139813d3f697b64 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 9 May 2016 20:55:16 -0700 Subject: [PATCH 12/21] tcp: refresh skb timestamp at retransmit time In the very unlikely case __tcp_retransmit_skb() can not use the cloning done in tcp_transmit_skb(), we need to refresh skb_mstamp before doing the copy and transmit, otherwise TCP TS val will be an exact copy of original transmit. Fixes: 7faee5c0d514 ("tcp: remove TCP_SKB_CB(skb)->when") Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 441ae9da3a23..79a03b87a771 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2640,8 +2640,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) */ if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) || skb_headroom(skb) >= 0xFFFF)) { - struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, - GFP_ATOMIC); + struct sk_buff *nskb; + + skb_mstamp_get(&skb->skb_mstamp); + nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : -ENOBUFS; } else { From 84a527a41f38a80353f185d05e41b021e1ff672b Mon Sep 17 00:00:00 2001 From: Shaohui Xie Date: Tue, 10 May 2016 17:42:26 +0800 Subject: [PATCH 13/21] net: phylib: fix interrupts re-enablement in phy_start If phy was suspended and is starting, current driver always enable phy's interrupts, if phy works in polling, phy can raise unexpected interrupt which will not be handled, the interrupt will block system enter suspend again. So interrupts should only be re-enabled if phy works in interrupt. Signed-off-by: Shaohui Xie Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 5590b9c182c9..445fc5aef308 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -790,9 +790,11 @@ void phy_start(struct phy_device *phydev) break; case PHY_HALTED: /* make sure interrupts are re-enabled for the PHY */ - err = phy_enable_interrupts(phydev); - if (err < 0) - break; + if (phydev->irq != PHY_POLL) { + err = phy_enable_interrupts(phydev); + if (err < 0) + break; + } phydev->state = PHY_RESUMING; do_resume = true; From 5026c9b1bafcb309bf467b60494b3bcd6364b99c Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Tue, 10 May 2016 16:49:26 -0400 Subject: [PATCH 14/21] net sched: vlan action fix late binding Late vlan action binding was broken and is fixed with this patch. //add a vlan action to pop and give it an instance id of 1 sudo tc actions add action vlan pop index 1 //create filter which binds to vlan action id 1 sudo tc filter add dev $DEV parent ffff: protocol ip prio 1 u32 \ match ip dst 17.0.0.1/32 flowid 1:1 action vlan index 1 current message(before bug fix) was: RTNETLINK answers: Invalid argument We have an error talking to the kernel Signed-off-by: Jamal Hadi Salim Reviewed-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_vlan.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index bab8ae0cefc0..c45f926dafb9 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -77,7 +77,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, int action; __be16 push_vid = 0; __be16 push_proto = 0; - int ret = 0; + int ret = 0, exists = 0; int err; if (!nla) @@ -90,15 +90,25 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, if (!tb[TCA_VLAN_PARMS]) return -EINVAL; parm = nla_data(tb[TCA_VLAN_PARMS]); + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + switch (parm->v_action) { case TCA_VLAN_ACT_POP: break; case TCA_VLAN_ACT_PUSH: - if (!tb[TCA_VLAN_PUSH_VLAN_ID]) + if (!tb[TCA_VLAN_PUSH_VLAN_ID]) { + if (exists) + tcf_hash_release(a, bind); return -EINVAL; + } push_vid = nla_get_u16(tb[TCA_VLAN_PUSH_VLAN_ID]); - if (push_vid >= VLAN_VID_MASK) + if (push_vid >= VLAN_VID_MASK) { + if (exists) + tcf_hash_release(a, bind); return -ERANGE; + } if (tb[TCA_VLAN_PUSH_VLAN_PROTOCOL]) { push_proto = nla_get_be16(tb[TCA_VLAN_PUSH_VLAN_PROTOCOL]); @@ -114,11 +124,13 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, } break; default: + if (exists) + tcf_hash_release(a, bind); return -EINVAL; } action = parm->v_action; - if (!tcf_hash_check(tn, parm->index, a, bind)) { + if (!exists) { ret = tcf_hash_create(tn, parm->index, est, a, sizeof(*v), bind, false); if (ret) @@ -126,8 +138,6 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else { - if (bind) - return 0; tcf_hash_release(a, bind); if (!ovr) return -EEXIST; From a57f19d30b2d5fb632b73729b39d05bae188eaed Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Tue, 10 May 2016 16:49:27 -0400 Subject: [PATCH 15/21] net sched: ipt action fix late binding This was broken and is fixed with this patch. //add an ipt action and give it an instance id of 1 sudo tc actions add action ipt -j mark --set-mark 2 index 1 //create a filter which binds to ipt action id 1 sudo tc filter add dev $DEV parent ffff: protocol ip prio 1 u32\ match ip dst 17.0.0.1/32 flowid 1:10 action ipt index 1 Message before bug fix was: RTNETLINK answers: Invalid argument We have an error talking to the kernel Signed-off-by: Jamal Hadi Salim Reviewed-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_ipt.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 350e134cffb3..8b5270008a6e 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -96,7 +96,7 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla, struct tcf_ipt *ipt; struct xt_entry_target *td, *t; char *tname; - int ret = 0, err; + int ret = 0, err, exists = 0; u32 hook = 0; u32 index = 0; @@ -107,18 +107,23 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla, if (err < 0) return err; - if (tb[TCA_IPT_HOOK] == NULL) - return -EINVAL; - if (tb[TCA_IPT_TARG] == NULL) + if (tb[TCA_IPT_INDEX] != NULL) + index = nla_get_u32(tb[TCA_IPT_INDEX]); + + exists = tcf_hash_check(tn, index, a, bind); + if (exists && bind) + return 0; + + if (tb[TCA_IPT_HOOK] == NULL || tb[TCA_IPT_TARG] == NULL) { + if (exists) + tcf_hash_release(a, bind); return -EINVAL; + } td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]); if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) return -EINVAL; - if (tb[TCA_IPT_INDEX] != NULL) - index = nla_get_u32(tb[TCA_IPT_INDEX]); - if (!tcf_hash_check(tn, index, a, bind)) { ret = tcf_hash_create(tn, index, est, a, sizeof(*ipt), bind, false); From 87dfbdc6c7478018c5489897d4495a27a626cda1 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Tue, 10 May 2016 16:49:28 -0400 Subject: [PATCH 16/21] net sched: mirred action fix late binding The process below was broken and is fixed with this patch. //add an mirred action and give it an instance id of 1 sudo tc actions add action mirred egress mirror dev $MDEV index 1 //create a filter which binds to mirred action id 1 sudo tc filter add dev $DEV parent ffff: protocol ip prio 1 u32\ match ip dst 17.0.0.1/32 flowid 1:10 action mirred index 1 Message before bug fix was: RTNETLINK answers: Invalid argument We have an error talking to the kernel Signed-off-by: Jamal Hadi Salim Reviewed-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_mirred.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index e8a760cf7775..8f3948dd38b8 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -61,7 +61,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, struct tc_mirred *parm; struct tcf_mirred *m; struct net_device *dev; - int ret, ok_push = 0; + int ret, ok_push = 0, exists = 0; if (nla == NULL) return -EINVAL; @@ -71,17 +71,27 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, if (tb[TCA_MIRRED_PARMS] == NULL) return -EINVAL; parm = nla_data(tb[TCA_MIRRED_PARMS]); + + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + switch (parm->eaction) { case TCA_EGRESS_MIRROR: case TCA_EGRESS_REDIR: break; default: + if (exists) + tcf_hash_release(a, bind); return -EINVAL; } if (parm->ifindex) { dev = __dev_get_by_index(net, parm->ifindex); - if (dev == NULL) + if (dev == NULL) { + if (exists) + tcf_hash_release(a, bind); return -ENODEV; + } switch (dev->type) { case ARPHRD_TUNNEL: case ARPHRD_TUNNEL6: @@ -99,7 +109,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, dev = NULL; } - if (!tcf_hash_check(tn, parm->index, a, bind)) { + if (!exists) { if (dev == NULL) return -EINVAL; ret = tcf_hash_create(tn, parm->index, est, a, @@ -108,9 +118,6 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, return ret; ret = ACT_P_CREATED; } else { - if (bind) - return 0; - tcf_hash_release(a, bind); if (!ovr) return -EEXIST; From 0e5538ab2b59ec205411949d839de6dbab663730 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Tue, 10 May 2016 16:49:29 -0400 Subject: [PATCH 17/21] net sched: simple action fix late binding The process below was broken and is fixed with this patch. //add a simple action and give it an instance id of 1 sudo tc actions add action simple sdata "foobar" index 1 //create a filter which binds to simple action id 1 sudo tc filter add dev $DEV parent ffff: protocol ip prio 1 u32\ match ip dst 17.0.0.1/32 flowid 1:10 action simple index 1 Message before fix was: RTNETLINK answers: Invalid argument We have an error talking to the kernel Signed-off-by: Jamal Hadi Salim Reviewed-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_simple.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 75b2be13fbcc..3a33fb648a6d 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -87,7 +87,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, struct tc_defact *parm; struct tcf_defact *d; char *defdata; - int ret = 0, err; + int ret = 0, err, exists = 0; if (nla == NULL) return -EINVAL; @@ -99,13 +99,21 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, if (tb[TCA_DEF_PARMS] == NULL) return -EINVAL; - if (tb[TCA_DEF_DATA] == NULL) - return -EINVAL; parm = nla_data(tb[TCA_DEF_PARMS]); + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + + if (tb[TCA_DEF_DATA] == NULL) { + if (exists) + tcf_hash_release(a, bind); + return -EINVAL; + } + defdata = nla_data(tb[TCA_DEF_DATA]); - if (!tcf_hash_check(tn, parm->index, a, bind)) { + if (!exists) { ret = tcf_hash_create(tn, parm->index, est, a, sizeof(*d), bind, false); if (ret) @@ -122,8 +130,6 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, } else { d = to_defact(a); - if (bind) - return 0; tcf_hash_release(a, bind); if (!ovr) return -EEXIST; From 5e1567aeb7fe0ca478bfad5d17791cce3ddd45c9 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Tue, 10 May 2016 16:49:30 -0400 Subject: [PATCH 18/21] net sched: skbedit action fix late binding The process below was broken and is fixed with this patch. //add a skbedit action and give it an instance id of 1 sudo tc actions add action skbedit mark 10 index 1 //create a filter which binds to skbedit action id 1 sudo tc filter add dev $DEV parent ffff: protocol ip prio 1 u32\ match ip dst 17.0.0.1/32 flowid 1:10 action skbedit index 1 Message before fix was: RTNETLINK answers: Invalid argument We have an error talking to the kernel Signed-off-by: Jamal Hadi Salim Reviewed-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_skbedit.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index cfcdbdc00c9b..69da5a8f0034 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -69,7 +69,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, struct tcf_skbedit *d; u32 flags = 0, *priority = NULL, *mark = NULL; u16 *queue_mapping = NULL; - int ret = 0, err; + int ret = 0, err, exists = 0; if (nla == NULL) return -EINVAL; @@ -96,12 +96,18 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, mark = nla_data(tb[TCA_SKBEDIT_MARK]); } - if (!flags) - return -EINVAL; - parm = nla_data(tb[TCA_SKBEDIT_PARMS]); - if (!tcf_hash_check(tn, parm->index, a, bind)) { + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + + if (!flags) { + tcf_hash_release(a, bind); + return -EINVAL; + } + + if (!exists) { ret = tcf_hash_create(tn, parm->index, est, a, sizeof(*d), bind, false); if (ret) @@ -111,8 +117,6 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else { d = to_skbedit(a); - if (bind) - return 0; tcf_hash_release(a, bind); if (!ovr) return -EEXIST; From 4e8c861550105f7aaa85a19b2571151cb8eceaa2 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Tue, 10 May 2016 16:49:31 -0400 Subject: [PATCH 19/21] net sched: ife action fix late binding The process below was broken and is fixed with this patch. //add an ife action and give it an instance id of 1 sudo tc actions add action ife encode \ type 0xDEAD allow mark dst 02:15:15:15:15:15 index 1 //create a filter which binds to ife action id 1 sudo tc filter add dev $DEV parent ffff: protocol ip prio 1 u32\ match ip dst 17.0.0.1/32 flowid 1:11 action ife index 1 Message before fix was: RTNETLINK answers: Invalid argument We have an error talking to the kernel Signed-off-by: Jamal Hadi Salim Reviewed-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_ife.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index c589a9ba506a..343d011aa818 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -423,7 +423,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, u16 ife_type = 0; u8 *daddr = NULL; u8 *saddr = NULL; - int ret = 0; + int ret = 0, exists = 0; int err; err = nla_parse_nested(tb, TCA_IFE_MAX, nla, ife_policy); @@ -435,25 +435,29 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_IFE_PARMS]); + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + if (parm->flags & IFE_ENCODE) { /* Until we get issued the ethertype, we cant have * a default.. **/ if (!tb[TCA_IFE_TYPE]) { + if (exists) + tcf_hash_release(a, bind); pr_info("You MUST pass etherype for encoding\n"); return -EINVAL; } } - if (!tcf_hash_check(tn, parm->index, a, bind)) { + if (!exists) { ret = tcf_hash_create(tn, parm->index, est, a, sizeof(*ife), bind, false); if (ret) return ret; ret = ACT_P_CREATED; } else { - if (bind) /* dont override defaults */ - return 0; tcf_hash_release(a, bind); if (!ovr) return -EEXIST; @@ -495,6 +499,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, NULL); if (err) { metadata_parse_err: + if (exists) + tcf_hash_release(a, bind); if (ret == ACT_P_CREATED) _tcf_ife_cleanup(a, bind); From 16ec3d4fbb967bd0e1c8d9dce9ef70e915a86615 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Wed, 11 May 2016 10:29:26 -0700 Subject: [PATCH 20/21] openvswitch: Fix cached ct with helper. When using conntrack helpers from OVS, a common configuration is to perform a lookup without specifying a helper, then go through a firewalling policy, only to decide to attach a helper afterwards. In this case, the initial lookup will cause a ct entry to be attached to the skb, then the later commit with helper should attach the helper and confirm the connection. However, the helper attachment has been missing. If the user has enabled automatic helper attachment, then this issue will be masked as it will be applied in init_conntrack(). It is also masked if the action is executed from ovs_packet_cmd_execute() as that will construct a fresh skb. This patch fixes the issue by making an explicit call to try to assign the helper if there is a discrepancy between the action's helper and the current skb->nfct. Fixes: cae3a2627520 ("openvswitch: Allow attaching helpers to ct action") Signed-off-by: Joe Stringer Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index b5fea1101faa..10c84d882881 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -776,6 +776,19 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, return -EINVAL; } + /* Userspace may decide to perform a ct lookup without a helper + * specified followed by a (recirculate and) commit with one. + * Therefore, for unconfirmed connections which we will commit, + * we need to attach the helper here. + */ + if (!nf_ct_is_confirmed(ct) && info->commit && + info->helper && !nfct_help(ct)) { + int err = __nf_ct_try_assign_helper(ct, info->ct, + GFP_ATOMIC); + if (err) + return err; + } + /* Call the helper only if: * - nf_conntrack_in() was executed above ("!cached") for a * confirmed connection, or From e271c7b4420ddbb9fae82a2b31a5ab3edafcf4fe Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Wed, 11 May 2016 15:53:57 +0200 Subject: [PATCH 21/21] gre: do not keep the GRE header around in collect medata mode For ipgre interface in collect metadata mode, it doesn't make sense for the interface to be of ARPHRD_IPGRE type. The outer header of received packets is not needed, as all the information from it is present in metadata_dst. We already don't set ipgre_header_ops for collect metadata interfaces, which is the only consumer of mac_header pointing to the outer IP header. Just set the interface type to ARPHRD_NONE in collect metadata mode for ipgre (not gretap, that still correctly stays ARPHRD_ETHER) and reset mac_header. Fixes: a64b04d86d14 ("gre: do not assign header_ops in collect metadata mode") Fixes: 2e15ea390e6f4 ("ip_gre: Add support to collect tunnel metadata.") Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 205a2b8a5a84..4cc84212cce1 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -398,7 +398,10 @@ static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) iph->saddr, iph->daddr, tpi->key); if (tunnel) { - skb_pop_mac_header(skb); + if (tunnel->dev->type != ARPHRD_NONE) + skb_pop_mac_header(skb); + else + skb_reset_mac_header(skb); if (tunnel->collect_md) { __be16 flags; __be64 tun_id; @@ -1031,6 +1034,8 @@ static void ipgre_netlink_parms(struct net_device *dev, struct ip_tunnel *t = netdev_priv(dev); t->collect_md = true; + if (dev->type == ARPHRD_IPGRE) + dev->type = ARPHRD_NONE; } }