1
0
Fork 0
alistair23-linux/net/bridge/br_netfilter_hooks.c

1199 lines
31 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Handle firewalling
* Linux ethernet bridge
*
* Authors:
* Lennert Buytenhek <buytenh@gnu.org>
* Bart De Schuymer <bdschuym@pandora.be>
*
* Lennert dedicates this file to Kerstin Wurdinger.
*/
#include <linux/module.h>
#include <linux/kernel.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 02:04:11 -06:00
#include <linux/slab.h>
#include <linux/ip.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/if_arp.h>
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
#include <linux/if_pppox.h>
#include <linux/ppp_defs.h>
#include <linux/netfilter_bridge.h>
#include <uapi/linux/netfilter_bridge.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_arp.h>
#include <linux/in_route.h>
#include <linux/rculist.h>
#include <linux/inetdevice.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/addrconf.h>
#include <net/route.h>
#include <net/netfilter/br_netfilter.h>
#include <net/netns/generic.h>
#include <linux/uaccess.h>
#include "br_private.h"
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
netns: make struct pernet_operations::id unsigned int Make struct pernet_operations::id unsigned. There are 2 reasons to do so: 1) This field is really an index into an zero based array and thus is unsigned entity. Using negative value is out-of-bound access by definition. 2) On x86_64 unsigned 32-bit data which are mixed with pointers via array indexing or offsets added or subtracted to pointers are preffered to signed 32-bit data. "int" being used as an array index needs to be sign-extended to 64-bit before being used. void f(long *p, int i) { g(p[i]); } roughly translates to movsx rsi, esi mov rdi, [rsi+...] call g MOVSX is 3 byte instruction which isn't necessary if the variable is unsigned because x86_64 is zero extending by default. Now, there is net_generic() function which, you guessed it right, uses "int" as an array index: static inline void *net_generic(const struct net *net, int id) { ... ptr = ng->ptr[id - 1]; ... } And this function is used a lot, so those sign extensions add up. Patch snipes ~1730 bytes on allyesconfig kernel (without all junk messing with code generation): add/remove: 0/0 grow/shrink: 70/598 up/down: 396/-2126 (-1730) Unfortunately some functions actually grow bigger. This is a semmingly random artefact of code generation with register allocator being used differently. gcc decides that some variable needs to live in new r8+ registers and every access now requires REX prefix. Or it is shifted into r12, so [r12+0] addressing mode has to be used which is longer than [r8] However, overall balance is in negative direction: add/remove: 0/0 grow/shrink: 70/598 up/down: 396/-2126 (-1730) function old new delta nfsd4_lock 3886 3959 +73 tipc_link_build_proto_msg 1096 1140 +44 mac80211_hwsim_new_radio 2776 2808 +32 tipc_mon_rcv 1032 1058 +26 svcauth_gss_legacy_init 1413 1429 +16 tipc_bcbase_select_primary 379 392 +13 nfsd4_exchange_id 1247 1260 +13 nfsd4_setclientid_confirm 782 793 +11 ... put_client_renew_locked 494 480 -14 ip_set_sockfn_get 730 716 -14 geneve_sock_add 829 813 -16 nfsd4_sequence_done 721 703 -18 nlmclnt_lookup_host 708 686 -22 nfsd4_lockt 1085 1063 -22 nfs_get_client 1077 1050 -27 tcf_bpf_init 1106 1076 -30 nfsd4_encode_fattr 5997 5930 -67 Total: Before=154856051, After=154854321, chg -0.00% Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-16 18:58:21 -07:00
static unsigned int brnf_net_id __read_mostly;
struct brnf_net {
bool enabled;
#ifdef CONFIG_SYSCTL
struct ctl_table_header *ctl_hdr;
#endif
/* default value is 1 */
int call_iptables;
int call_ip6tables;
int call_arptables;
/* default value is 0 */
int filter_vlan_tagged;
int filter_pppoe_tagged;
int pass_vlan_indev;
};
bridge: netfilter: don't call iptables on vlan packets if sysctl is off When net.bridge.bridge-nf-filter-vlan-tagged is 0 (default), vlan packets arriving should not be sent to ip(6)tables by bridge netfilter. However, it turns out that we currently always send VLAN packets to netfilter, if .. a), CONFIG_VLAN_8021Q is enabled ; or b), CONFIG_VLAN_8021Q is not set but rx vlan offload is enabled on the bridge port. This is because bridge netfilter treats skb with skb->protocol == ETH_P_IP{V6} as "non-vlan packet". With rx vlan offload on or CONFIG_VLAN_8021Q=y, the vlan header has already been removed here, and we cannot rely on skb->protocol alone. Fix this by only using skb->protocol if the skb has no vlan tag, or if a vlan tag is present and filter-vlan-tagged bridge netfilter sysctl is enabled. We cannot remove the skb->protocol == htons(ETH_P_8021Q) test because the vlan tag is still around in the CONFIG_VLAN_8021Q=n && "ethtool -K $itf rxvlan off" case. reproducer: iptables -t raw -I PREROUTING -i br0 iptables -t raw -I PREROUTING -i br0.1 Then send packets to an ip address configured on br0.1 interface. Even with net.bridge.bridge-nf-filter-vlan-tagged=0, the 1st rule will match instead of the 2nd one. With this patch applied, the 2nd rule will match instead. In the non-local address case, netfilter won't be consulted after this patch unless the sysctl is switched on. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-03-05 18:22:54 -07:00
#define IS_IP(skb) \
(!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IP))
bridge: netfilter: don't call iptables on vlan packets if sysctl is off When net.bridge.bridge-nf-filter-vlan-tagged is 0 (default), vlan packets arriving should not be sent to ip(6)tables by bridge netfilter. However, it turns out that we currently always send VLAN packets to netfilter, if .. a), CONFIG_VLAN_8021Q is enabled ; or b), CONFIG_VLAN_8021Q is not set but rx vlan offload is enabled on the bridge port. This is because bridge netfilter treats skb with skb->protocol == ETH_P_IP{V6} as "non-vlan packet". With rx vlan offload on or CONFIG_VLAN_8021Q=y, the vlan header has already been removed here, and we cannot rely on skb->protocol alone. Fix this by only using skb->protocol if the skb has no vlan tag, or if a vlan tag is present and filter-vlan-tagged bridge netfilter sysctl is enabled. We cannot remove the skb->protocol == htons(ETH_P_8021Q) test because the vlan tag is still around in the CONFIG_VLAN_8021Q=n && "ethtool -K $itf rxvlan off" case. reproducer: iptables -t raw -I PREROUTING -i br0 iptables -t raw -I PREROUTING -i br0.1 Then send packets to an ip address configured on br0.1 interface. Even with net.bridge.bridge-nf-filter-vlan-tagged=0, the 1st rule will match instead of the 2nd one. With this patch applied, the 2nd rule will match instead. In the non-local address case, netfilter won't be consulted after this patch unless the sysctl is switched on. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-03-05 18:22:54 -07:00
#define IS_IPV6(skb) \
(!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IPV6))
bridge: netfilter: don't call iptables on vlan packets if sysctl is off When net.bridge.bridge-nf-filter-vlan-tagged is 0 (default), vlan packets arriving should not be sent to ip(6)tables by bridge netfilter. However, it turns out that we currently always send VLAN packets to netfilter, if .. a), CONFIG_VLAN_8021Q is enabled ; or b), CONFIG_VLAN_8021Q is not set but rx vlan offload is enabled on the bridge port. This is because bridge netfilter treats skb with skb->protocol == ETH_P_IP{V6} as "non-vlan packet". With rx vlan offload on or CONFIG_VLAN_8021Q=y, the vlan header has already been removed here, and we cannot rely on skb->protocol alone. Fix this by only using skb->protocol if the skb has no vlan tag, or if a vlan tag is present and filter-vlan-tagged bridge netfilter sysctl is enabled. We cannot remove the skb->protocol == htons(ETH_P_8021Q) test because the vlan tag is still around in the CONFIG_VLAN_8021Q=n && "ethtool -K $itf rxvlan off" case. reproducer: iptables -t raw -I PREROUTING -i br0 iptables -t raw -I PREROUTING -i br0.1 Then send packets to an ip address configured on br0.1 interface. Even with net.bridge.bridge-nf-filter-vlan-tagged=0, the 1st rule will match instead of the 2nd one. With this patch applied, the 2nd rule will match instead. In the non-local address case, netfilter won't be consulted after this patch unless the sysctl is switched on. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-03-05 18:22:54 -07:00
#define IS_ARP(skb) \
(!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_ARP))
bridge: netfilter: don't call iptables on vlan packets if sysctl is off When net.bridge.bridge-nf-filter-vlan-tagged is 0 (default), vlan packets arriving should not be sent to ip(6)tables by bridge netfilter. However, it turns out that we currently always send VLAN packets to netfilter, if .. a), CONFIG_VLAN_8021Q is enabled ; or b), CONFIG_VLAN_8021Q is not set but rx vlan offload is enabled on the bridge port. This is because bridge netfilter treats skb with skb->protocol == ETH_P_IP{V6} as "non-vlan packet". With rx vlan offload on or CONFIG_VLAN_8021Q=y, the vlan header has already been removed here, and we cannot rely on skb->protocol alone. Fix this by only using skb->protocol if the skb has no vlan tag, or if a vlan tag is present and filter-vlan-tagged bridge netfilter sysctl is enabled. We cannot remove the skb->protocol == htons(ETH_P_8021Q) test because the vlan tag is still around in the CONFIG_VLAN_8021Q=n && "ethtool -K $itf rxvlan off" case. reproducer: iptables -t raw -I PREROUTING -i br0 iptables -t raw -I PREROUTING -i br0.1 Then send packets to an ip address configured on br0.1 interface. Even with net.bridge.bridge-nf-filter-vlan-tagged=0, the 1st rule will match instead of the 2nd one. With this patch applied, the 2nd rule will match instead. In the non-local address case, netfilter won't be consulted after this patch unless the sysctl is switched on. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-03-05 18:22:54 -07:00
static inline __be16 vlan_proto(const struct sk_buff *skb)
{
if (skb_vlan_tag_present(skb))
return skb->protocol;
else if (skb->protocol == htons(ETH_P_8021Q))
return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
else
return 0;
}
static inline bool is_vlan_ip(const struct sk_buff *skb, const struct net *net)
{
struct brnf_net *brnet = net_generic(net, brnf_net_id);
return vlan_proto(skb) == htons(ETH_P_IP) && brnet->filter_vlan_tagged;
}
static inline bool is_vlan_ipv6(const struct sk_buff *skb,
const struct net *net)
{
struct brnf_net *brnet = net_generic(net, brnf_net_id);
return vlan_proto(skb) == htons(ETH_P_IPV6) &&
brnet->filter_vlan_tagged;
}
static inline bool is_vlan_arp(const struct sk_buff *skb, const struct net *net)
{
struct brnf_net *brnet = net_generic(net, brnf_net_id);
return vlan_proto(skb) == htons(ETH_P_ARP) && brnet->filter_vlan_tagged;
}
static inline __be16 pppoe_proto(const struct sk_buff *skb)
{
return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
sizeof(struct pppoe_hdr)));
}
static inline bool is_pppoe_ip(const struct sk_buff *skb, const struct net *net)
{
struct brnf_net *brnet = net_generic(net, brnf_net_id);
return skb->protocol == htons(ETH_P_PPP_SES) &&
pppoe_proto(skb) == htons(PPP_IP) && brnet->filter_pppoe_tagged;
}
static inline bool is_pppoe_ipv6(const struct sk_buff *skb,
const struct net *net)
{
struct brnf_net *brnet = net_generic(net, brnf_net_id);
return skb->protocol == htons(ETH_P_PPP_SES) &&
pppoe_proto(skb) == htons(PPP_IPV6) &&
brnet->filter_pppoe_tagged;
}
/* largest possible L2 header, see br_nf_dev_queue_xmit() */
#define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN)
struct brnf_frag_data {
char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH];
u8 encap_size;
u8 size;
u16 vlan_tci;
__be16 vlan_proto;
};
static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage);
static void nf_bridge_info_free(struct sk_buff *skb)
{
skb_ext_del(skb, SKB_EXT_BRIDGE_NF);
}
static inline struct net_device *bridge_parent(const struct net_device *dev)
{
struct net_bridge_port *port;
port = br_port_get_rcu(dev);
return port ? port->br->dev : NULL;
}
static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb)
{
return skb_ext_add(skb, SKB_EXT_BRIDGE_NF);
}
unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
{
switch (skb->protocol) {
case __cpu_to_be16(ETH_P_8021Q):
return VLAN_HLEN;
case __cpu_to_be16(ETH_P_PPP_SES):
return PPPOE_SES_HLEN;
default:
return 0;
}
}
static inline void nf_bridge_pull_encap_header(struct sk_buff *skb)
{
unsigned int len = nf_bridge_encap_header_len(skb);
skb_pull(skb, len);
skb->network_header += len;
}
static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb)
{
unsigned int len = nf_bridge_encap_header_len(skb);
skb_pull_rcsum(skb, len);
skb->network_header += len;
}
/* When handing a packet over to the IP layer
* check whether we have a skb that is in the
* expected format
*/
static int br_validate_ipv4(struct net *net, struct sk_buff *skb)
{
const struct iphdr *iph;
u32 len;
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto inhdr_error;
iph = ip_hdr(skb);
/* Basic sanity checks */
if (iph->ihl < 5 || iph->version != 4)
goto inhdr_error;
if (!pskb_may_pull(skb, iph->ihl*4))
goto inhdr_error;
iph = ip_hdr(skb);
if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
goto csum_error;
len = ntohs(iph->tot_len);
if (skb->len < len) {
__IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
} else if (len < (iph->ihl*4))
goto inhdr_error;
if (pskb_trim_rcsum(skb, len)) {
__IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
goto drop;
}
memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
/* We should really parse IP options here but until
* somebody who actually uses IP options complains to
* us we'll just silently ignore the options because
* we're lazy!
*/
return 0;
csum_error:
__IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS);
inhdr_error:
__IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
drop:
return -1;
}
void nf_bridge_update_protocol(struct sk_buff *skb)
{
const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
switch (nf_bridge->orig_proto) {
case BRNF_PROTO_8021Q:
skb->protocol = htons(ETH_P_8021Q);
break;
case BRNF_PROTO_PPPOE:
skb->protocol = htons(ETH_P_PPP_SES);
break;
case BRNF_PROTO_UNCHANGED:
break;
}
}
/* Obtain the correct destination MAC address, while preserving the original
* source MAC address. If we already know this address, we just copy it. If we
* don't, we use the neighbour framework to find out. In both cases, we make
* sure that br_handle_frame_finish() is called afterwards.
*/
int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct neighbour *neigh;
struct dst_entry *dst;
skb->dev = bridge_parent(skb->dev);
if (!skb->dev)
goto free_skb;
dst = skb_dst(skb);
neigh = dst_neigh_lookup_skb(dst, skb);
if (neigh) {
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
int ret;
if ((neigh->nud_state & NUD_CONNECTED) && neigh->hh.hh_len) {
neigh_hh_bridge(&neigh->hh, skb);
skb->dev = nf_bridge->physindev;
ret = br_handle_frame_finish(net, sk, skb);
} else {
/* the neighbour function below overwrites the complete
* MAC header, so we save the Ethernet source address and
* protocol number.
*/
skb_copy_from_linear_data_offset(skb,
-(ETH_HLEN-ETH_ALEN),
nf_bridge->neigh_header,
ETH_HLEN-ETH_ALEN);
/* tell br_dev_xmit to continue with forwarding */
nf_bridge->bridged_dnat = 1;
/* FIXME Need to refragment */
ret = neigh->output(neigh, skb);
}
neigh_release(neigh);
return ret;
}
free_skb:
kfree_skb(skb);
return 0;
}
static inline bool
br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb,
const struct nf_bridge_info *nf_bridge)
{
return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr;
}
/* This requires some explaining. If DNAT has taken place,
* we will need to fix up the destination Ethernet address.
* This is also true when SNAT takes place (for the reply direction).
*
* There are two cases to consider:
* 1. The packet was DNAT'ed to a device in the same bridge
* port group as it was received on. We can still bridge
* the packet.
* 2. The packet was DNAT'ed to a different device, either
* a non-bridged device or another bridge port group.
* The packet will need to be routed.
*
* The correct way of distinguishing between these two cases is to
* call ip_route_input() and to look at skb->dst->dev, which is
* changed to the destination device if ip_route_input() succeeds.
*
* Let's first consider the case that ip_route_input() succeeds:
*
* If the output device equals the logical bridge device the packet
* came in on, we can consider this bridging. The corresponding MAC
* address will be obtained in br_nf_pre_routing_finish_bridge.
* Otherwise, the packet is considered to be routed and we just
* change the destination MAC address so that the packet will
* later be passed up to the IP stack to be routed. For a redirected
* packet, ip_route_input() will give back the localhost as output device,
* which differs from the bridge device.
*
* Let's now consider the case that ip_route_input() fails:
*
* This can be because the destination address is martian, in which case
* the packet will be dropped.
* If IP forwarding is disabled, ip_route_input() will fail, while
* ip_route_output_key() can return success. The source
* address for ip_route_output_key() is set to zero, so ip_route_output_key()
* thinks we're handling a locally generated packet and won't care
* if IP forwarding is enabled. If the output device equals the logical bridge
* device, we proceed as if ip_route_input() succeeded. If it differs from the
* logical bridge port or if ip_route_output_key() fails we drop the packet.
*/
static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
struct iphdr *iph = ip_hdr(skb);
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
struct rtable *rt;
int err;
nf_bridge->frag_max_size = IPCB(skb)->frag_max_size;
if (nf_bridge->pkt_otherhost) {
skb->pkt_type = PACKET_OTHERHOST;
nf_bridge->pkt_otherhost = false;
}
nf_bridge->in_prerouting = 0;
if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) {
if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
struct in_device *in_dev = __in_dev_get_rcu(dev);
/* If err equals -EHOSTUNREACH the error is due to a
* martian destination or due to the fact that
* forwarding is disabled. For most martian packets,
* ip_route_output_key() will fail. It won't fail for 2 types of
* martian destinations: loopback destinations and destination
* 0.0.0.0. In both cases the packet will be dropped because the
* destination is the loopback device and not the bridge. */
if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev))
goto free_skb;
rt = ip_route_output(net, iph->daddr, 0,
RT_TOS(iph->tos), 0);
if (!IS_ERR(rt)) {
/* - Bridged-and-DNAT'ed traffic doesn't
* require ip_forwarding. */
if (rt->dst.dev == dev) {
skb_dst_set(skb, &rt->dst);
goto bridged_dnat;
}
ip_rt_put(rt);
}
free_skb:
kfree_skb(skb);
return 0;
} else {
if (skb_dst(skb)->dev == dev) {
bridged_dnat:
skb->dev = nf_bridge->physindev;
nf_bridge_update_protocol(skb);
nf_bridge_push_encap_header(skb);
br_nf_hook_thresh(NF_BR_PRE_ROUTING,
net, sk, skb, skb->dev,
NULL,
br_nf_pre_routing_finish_bridge);
return 0;
}
ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr);
skb->pkt_type = PACKET_HOST;
}
} else {
rt = bridge_parent_rtable(nf_bridge->physindev);
if (!rt) {
kfree_skb(skb);
return 0;
}
skb_dst_set_noref(skb, &rt->dst);
}
skb->dev = nf_bridge->physindev;
nf_bridge_update_protocol(skb);
nf_bridge_push_encap_header(skb);
br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL,
br_handle_frame_finish);
return 0;
}
static struct net_device *brnf_get_logical_dev(struct sk_buff *skb,
const struct net_device *dev,
const struct net *net)
{
struct net_device *vlan, *br;
struct brnf_net *brnet = net_generic(net, brnf_net_id);
br = bridge_parent(dev);
if (brnet->pass_vlan_indev == 0 || !skb_vlan_tag_present(skb))
return br;
vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto,
skb_vlan_tag_get(skb) & VLAN_VID_MASK);
return vlan ? vlan : br;
}
/* Some common code for IPv4/IPv6 */
struct net_device *setup_pre_routing(struct sk_buff *skb, const struct net *net)
{
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
if (skb->pkt_type == PACKET_OTHERHOST) {
skb->pkt_type = PACKET_HOST;
nf_bridge->pkt_otherhost = true;
}
nf_bridge->in_prerouting = 1;
nf_bridge->physindev = skb->dev;
skb->dev = brnf_get_logical_dev(skb, skb->dev, net);
if (skb->protocol == htons(ETH_P_8021Q))
nf_bridge->orig_proto = BRNF_PROTO_8021Q;
else if (skb->protocol == htons(ETH_P_PPP_SES))
nf_bridge->orig_proto = BRNF_PROTO_PPPOE;
/* Must drop socket now because of tproxy. */
skb_orphan(skb);
return skb->dev;
}
/* Direct IPv6 traffic to br_nf_pre_routing_ipv6.
* Replicate the checks that IPv4 does on packet reception.
* Set skb->dev to the bridge device (i.e. parent of the
* receiving device) to make netfilter happy, the REDIRECT
* target in particular. Save the original destination IP
* address to be able to detect DNAT afterwards. */
static unsigned int br_nf_pre_routing(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_bridge_info *nf_bridge;
struct net_bridge_port *p;
struct net_bridge *br;
__u32 len = nf_bridge_encap_header_len(skb);
struct brnf_net *brnet;
if (unlikely(!pskb_may_pull(skb, len)))
return NF_DROP;
p = br_port_get_rcu(state->in);
if (p == NULL)
return NF_DROP;
br = p->br;
brnet = net_generic(state->net, brnf_net_id);
if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) ||
is_pppoe_ipv6(skb, state->net)) {
if (!brnet->call_ip6tables &&
!br_opt_get(br, BROPT_NF_CALL_IP6TABLES))
return NF_ACCEPT;
if (!ipv6_mod_enabled()) {
pr_warn_once("Module ipv6 is disabled, so call_ip6tables is not supported.");
return NF_DROP;
}
nf_bridge_pull_encap_header_rcsum(skb);
return br_nf_pre_routing_ipv6(priv, skb, state);
}
if (!brnet->call_iptables && !br_opt_get(br, BROPT_NF_CALL_IPTABLES))
return NF_ACCEPT;
if (!IS_IP(skb) && !is_vlan_ip(skb, state->net) &&
!is_pppoe_ip(skb, state->net))
return NF_ACCEPT;
nf_bridge_pull_encap_header_rcsum(skb);
if (br_validate_ipv4(state->net, skb))
return NF_DROP;
if (!nf_bridge_alloc(skb))
return NF_DROP;
if (!setup_pre_routing(skb, state->net))
return NF_DROP;
nf_bridge = nf_bridge_info_get(skb);
nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr;
skb->protocol = htons(ETH_P_IP);
skb->transport_header = skb->network_header + ip_hdr(skb)->ihl * 4;
2015-09-15 19:04:16 -06:00
NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb,
skb->dev, NULL,
br_nf_pre_routing_finish);
return NF_STOLEN;
}
/* PF_BRIDGE/FORWARD *************************************************/
static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
struct net_device *in;
if (!IS_ARP(skb) && !is_vlan_arp(skb, net)) {
if (skb->protocol == htons(ETH_P_IP))
nf_bridge->frag_max_size = IPCB(skb)->frag_max_size;
if (skb->protocol == htons(ETH_P_IPV6))
nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size;
in = nf_bridge->physindev;
if (nf_bridge->pkt_otherhost) {
skb->pkt_type = PACKET_OTHERHOST;
nf_bridge->pkt_otherhost = false;
}
netfilter: bridge-netfilter: fix crash in br_nf_forward_finish() [ 4593.956206] BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 [ 4593.956219] IP: [<ffffffffa03357a4>] br_nf_forward_finish+0x154/0x170 [bridge] [ 4593.956232] PGD 195ece067 PUD 1ba005067 PMD 0 [ 4593.956241] Oops: 0000 [#1] SMP [ 4593.956248] last sysfs file: /sys/devices/LNXSYSTM:00/LNXSYBUS:00/PNP0A08:00/device:08/ATK0110:00/hwmon/hwmon0/temp2_label [ 4593.956253] CPU 3 ... [ 4593.956380] Pid: 29512, comm: kvm Not tainted 2.6.34-rc7-net #195 P6T DELUXE/System Product Name [ 4593.956384] RIP: 0010:[<ffffffffa03357a4>] [<ffffffffa03357a4>] br_nf_forward_finish+0x154/0x170 [bridge] [ 4593.956395] RSP: 0018:ffff880001e63b78 EFLAGS: 00010246 [ 4593.956399] RAX: 0000000000000608 RBX: ffff880057181700 RCX: ffff8801b813d000 [ 4593.956402] RDX: 0000000000000000 RSI: 0000000000000002 RDI: ffff880057181700 [ 4593.956406] RBP: ffff880001e63ba8 R08: ffff8801b9d97000 R09: ffffffffa0335650 [ 4593.956410] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801b813d000 [ 4593.956413] R13: ffffffff81ab3940 R14: ffff880057181700 R15: 0000000000000002 [ 4593.956418] FS: 00007fc40d380710(0000) GS:ffff880001e60000(0000) knlGS:0000000000000000 [ 4593.956422] CS: 0010 DS: 002b ES: 002b CR0: 000000008005003b [ 4593.956426] CR2: 0000000000000018 CR3: 00000001ba1d7000 CR4: 00000000000026e0 [ 4593.956429] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 4593.956433] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 4593.956437] Process kvm (pid: 29512, threadinfo ffff8801ba566000, task ffff8801b8003870) [ 4593.956441] Stack: [ 4593.956443] 0000000100000020 ffff880001e63ba0 ffff880001e63ba0 ffff880057181700 [ 4593.956451] <0> ffffffffa0335650 ffffffff81ab3940 ffff880001e63bd8 ffffffffa03350e6 [ 4593.956462] <0> ffff880001e63c40 000000000000024d ffff880057181700 0000000080000000 [ 4593.956474] Call Trace: [ 4593.956478] <IRQ> [ 4593.956488] [<ffffffffa0335650>] ? br_nf_forward_finish+0x0/0x170 [bridge] [ 4593.956496] [<ffffffffa03350e6>] NF_HOOK_THRESH+0x56/0x60 [bridge] [ 4593.956504] [<ffffffffa0335282>] br_nf_forward_arp+0x112/0x120 [bridge] [ 4593.956511] [<ffffffff813f7184>] nf_iterate+0x64/0xa0 [ 4593.956519] [<ffffffffa032f920>] ? br_forward_finish+0x0/0x60 [bridge] [ 4593.956524] [<ffffffff813f722c>] nf_hook_slow+0x6c/0x100 [ 4593.956531] [<ffffffffa032f920>] ? br_forward_finish+0x0/0x60 [bridge] [ 4593.956538] [<ffffffffa032f800>] ? __br_forward+0x0/0xc0 [bridge] [ 4593.956545] [<ffffffffa032f86d>] __br_forward+0x6d/0xc0 [bridge] [ 4593.956550] [<ffffffff813c5d8e>] ? skb_clone+0x3e/0x70 [ 4593.956557] [<ffffffffa032f462>] deliver_clone+0x32/0x60 [bridge] [ 4593.956564] [<ffffffffa032f6b6>] br_flood+0xa6/0xe0 [bridge] [ 4593.956571] [<ffffffffa032f800>] ? __br_forward+0x0/0xc0 [bridge] Don't call nf_bridge_update_protocol() for ARP traffic as skb->nf_bridge isn't used in the ARP case. Reported-by: Stephen Hemminger <shemminger@vyatta.com> Signed-off-by: Bart De Schuymer <bdschuym@pandora.be> Signed-off-by: Patrick McHardy <kaber@trash.net>
2010-05-13 06:55:34 -06:00
nf_bridge_update_protocol(skb);
} else {
in = *((struct net_device **)(skb->cb));
}
nf_bridge_push_encap_header(skb);
br_nf_hook_thresh(NF_BR_FORWARD, net, sk, skb, in, skb->dev,
br_forward_finish);
return 0;
}
bridge: netfilter: don't call iptables on vlan packets if sysctl is off When net.bridge.bridge-nf-filter-vlan-tagged is 0 (default), vlan packets arriving should not be sent to ip(6)tables by bridge netfilter. However, it turns out that we currently always send VLAN packets to netfilter, if .. a), CONFIG_VLAN_8021Q is enabled ; or b), CONFIG_VLAN_8021Q is not set but rx vlan offload is enabled on the bridge port. This is because bridge netfilter treats skb with skb->protocol == ETH_P_IP{V6} as "non-vlan packet". With rx vlan offload on or CONFIG_VLAN_8021Q=y, the vlan header has already been removed here, and we cannot rely on skb->protocol alone. Fix this by only using skb->protocol if the skb has no vlan tag, or if a vlan tag is present and filter-vlan-tagged bridge netfilter sysctl is enabled. We cannot remove the skb->protocol == htons(ETH_P_8021Q) test because the vlan tag is still around in the CONFIG_VLAN_8021Q=n && "ethtool -K $itf rxvlan off" case. reproducer: iptables -t raw -I PREROUTING -i br0 iptables -t raw -I PREROUTING -i br0.1 Then send packets to an ip address configured on br0.1 interface. Even with net.bridge.bridge-nf-filter-vlan-tagged=0, the 1st rule will match instead of the 2nd one. With this patch applied, the 2nd rule will match instead. In the non-local address case, netfilter won't be consulted after this patch unless the sysctl is switched on. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-03-05 18:22:54 -07:00
/* This is the 'purely bridged' case. For IP, we pass the packet to
* netfilter with indev and outdev set to the bridge device,
* but we are still able to filter on the 'real' indev/outdev
* because of the physdev module. For ARP, indev and outdev are the
* bridge ports. */
static unsigned int br_nf_forward_ip(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_bridge_info *nf_bridge;
struct net_device *parent;
u_int8_t pf;
nf_bridge = nf_bridge_info_get(skb);
if (!nf_bridge)
return NF_ACCEPT;
/* Need exclusive nf_bridge_info since we might have multiple
* different physoutdevs. */
if (!nf_bridge_unshare(skb))
return NF_DROP;
nf_bridge = nf_bridge_info_get(skb);
if (!nf_bridge)
return NF_DROP;
parent = bridge_parent(state->out);
if (!parent)
return NF_DROP;
if (IS_IP(skb) || is_vlan_ip(skb, state->net) ||
is_pppoe_ip(skb, state->net))
pf = NFPROTO_IPV4;
else if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) ||
is_pppoe_ipv6(skb, state->net))
pf = NFPROTO_IPV6;
else
return NF_ACCEPT;
nf_bridge_pull_encap_header(skb);
if (skb->pkt_type == PACKET_OTHERHOST) {
skb->pkt_type = PACKET_HOST;
nf_bridge->pkt_otherhost = true;
}
if (pf == NFPROTO_IPV4) {
if (br_validate_ipv4(state->net, skb))
return NF_DROP;
IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
}
if (pf == NFPROTO_IPV6) {
if (br_validate_ipv6(state->net, skb))
return NF_DROP;
IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
}
nf_bridge->physoutdev = skb->dev;
if (pf == NFPROTO_IPV4)
skb->protocol = htons(ETH_P_IP);
else
skb->protocol = htons(ETH_P_IPV6);
2015-09-15 19:04:16 -06:00
NF_HOOK(pf, NF_INET_FORWARD, state->net, NULL, skb,
brnf_get_logical_dev(skb, state->in, state->net),
parent, br_nf_forward_finish);
return NF_STOLEN;
}
static unsigned int br_nf_forward_arp(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct net_bridge_port *p;
struct net_bridge *br;
struct net_device **d = (struct net_device **)(skb->cb);
struct brnf_net *brnet;
p = br_port_get_rcu(state->out);
if (p == NULL)
return NF_ACCEPT;
br = p->br;
brnet = net_generic(state->net, brnf_net_id);
if (!brnet->call_arptables && !br_opt_get(br, BROPT_NF_CALL_ARPTABLES))
return NF_ACCEPT;
bridge: netfilter: don't call iptables on vlan packets if sysctl is off When net.bridge.bridge-nf-filter-vlan-tagged is 0 (default), vlan packets arriving should not be sent to ip(6)tables by bridge netfilter. However, it turns out that we currently always send VLAN packets to netfilter, if .. a), CONFIG_VLAN_8021Q is enabled ; or b), CONFIG_VLAN_8021Q is not set but rx vlan offload is enabled on the bridge port. This is because bridge netfilter treats skb with skb->protocol == ETH_P_IP{V6} as "non-vlan packet". With rx vlan offload on or CONFIG_VLAN_8021Q=y, the vlan header has already been removed here, and we cannot rely on skb->protocol alone. Fix this by only using skb->protocol if the skb has no vlan tag, or if a vlan tag is present and filter-vlan-tagged bridge netfilter sysctl is enabled. We cannot remove the skb->protocol == htons(ETH_P_8021Q) test because the vlan tag is still around in the CONFIG_VLAN_8021Q=n && "ethtool -K $itf rxvlan off" case. reproducer: iptables -t raw -I PREROUTING -i br0 iptables -t raw -I PREROUTING -i br0.1 Then send packets to an ip address configured on br0.1 interface. Even with net.bridge.bridge-nf-filter-vlan-tagged=0, the 1st rule will match instead of the 2nd one. With this patch applied, the 2nd rule will match instead. In the non-local address case, netfilter won't be consulted after this patch unless the sysctl is switched on. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-03-05 18:22:54 -07:00
if (!IS_ARP(skb)) {
if (!is_vlan_arp(skb, state->net))
return NF_ACCEPT;
nf_bridge_pull_encap_header(skb);
}
netfilter: bridge: make sure to pull arp header in br_nf_forward_arp() commit 5604285839aaedfb23ebe297799c6e558939334d upstream. syzbot is kind enough to remind us we need to call skb_may_pull() BUG: KMSAN: uninit-value in br_nf_forward_arp+0xe61/0x1230 net/bridge/br_netfilter_hooks.c:665 CPU: 1 PID: 11631 Comm: syz-executor.1 Not tainted 5.4.0-rc8-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: <IRQ> __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x220 lib/dump_stack.c:118 kmsan_report+0x128/0x220 mm/kmsan/kmsan_report.c:108 __msan_warning+0x64/0xc0 mm/kmsan/kmsan_instr.c:245 br_nf_forward_arp+0xe61/0x1230 net/bridge/br_netfilter_hooks.c:665 nf_hook_entry_hookfn include/linux/netfilter.h:135 [inline] nf_hook_slow+0x18b/0x3f0 net/netfilter/core.c:512 nf_hook include/linux/netfilter.h:260 [inline] NF_HOOK include/linux/netfilter.h:303 [inline] __br_forward+0x78f/0xe30 net/bridge/br_forward.c:109 br_flood+0xef0/0xfe0 net/bridge/br_forward.c:234 br_handle_frame_finish+0x1a77/0x1c20 net/bridge/br_input.c:162 nf_hook_bridge_pre net/bridge/br_input.c:245 [inline] br_handle_frame+0xfb6/0x1eb0 net/bridge/br_input.c:348 __netif_receive_skb_core+0x20b9/0x51a0 net/core/dev.c:4830 __netif_receive_skb_one_core net/core/dev.c:4927 [inline] __netif_receive_skb net/core/dev.c:5043 [inline] process_backlog+0x610/0x13c0 net/core/dev.c:5874 napi_poll net/core/dev.c:6311 [inline] net_rx_action+0x7a6/0x1aa0 net/core/dev.c:6379 __do_softirq+0x4a1/0x83a kernel/softirq.c:293 do_softirq_own_stack+0x49/0x80 arch/x86/entry/entry_64.S:1091 </IRQ> do_softirq kernel/softirq.c:338 [inline] __local_bh_enable_ip+0x184/0x1d0 kernel/softirq.c:190 local_bh_enable+0x36/0x40 include/linux/bottom_half.h:32 rcu_read_unlock_bh include/linux/rcupdate.h:688 [inline] __dev_queue_xmit+0x38e8/0x4200 net/core/dev.c:3819 dev_queue_xmit+0x4b/0x60 net/core/dev.c:3825 packet_snd net/packet/af_packet.c:2959 [inline] packet_sendmsg+0x8234/0x9100 net/packet/af_packet.c:2984 sock_sendmsg_nosec net/socket.c:637 [inline] sock_sendmsg net/socket.c:657 [inline] __sys_sendto+0xc44/0xc70 net/socket.c:1952 __do_sys_sendto net/socket.c:1964 [inline] __se_sys_sendto+0x107/0x130 net/socket.c:1960 __x64_sys_sendto+0x6e/0x90 net/socket.c:1960 do_syscall_64+0xb6/0x160 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x45a679 Code: ad b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f0a3c9e5c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000000006 RCX: 000000000045a679 RDX: 000000000000000e RSI: 0000000020000200 RDI: 0000000000000003 RBP: 000000000075bf20 R08: 00000000200000c0 R09: 0000000000000014 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f0a3c9e66d4 R13: 00000000004c8ec1 R14: 00000000004dfe28 R15: 00000000ffffffff Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:149 [inline] kmsan_internal_poison_shadow+0x5c/0x110 mm/kmsan/kmsan.c:132 kmsan_slab_alloc+0x97/0x100 mm/kmsan/kmsan_hooks.c:86 slab_alloc_node mm/slub.c:2773 [inline] __kmalloc_node_track_caller+0xe27/0x11a0 mm/slub.c:4381 __kmalloc_reserve net/core/skbuff.c:141 [inline] __alloc_skb+0x306/0xa10 net/core/skbuff.c:209 alloc_skb include/linux/skbuff.h:1049 [inline] alloc_skb_with_frags+0x18c/0xa80 net/core/skbuff.c:5662 sock_alloc_send_pskb+0xafd/0x10a0 net/core/sock.c:2244 packet_alloc_skb net/packet/af_packet.c:2807 [inline] packet_snd net/packet/af_packet.c:2902 [inline] packet_sendmsg+0x63a6/0x9100 net/packet/af_packet.c:2984 sock_sendmsg_nosec net/socket.c:637 [inline] sock_sendmsg net/socket.c:657 [inline] __sys_sendto+0xc44/0xc70 net/socket.c:1952 __do_sys_sendto net/socket.c:1964 [inline] __se_sys_sendto+0x107/0x130 net/socket.c:1960 __x64_sys_sendto+0x6e/0x90 net/socket.c:1960 do_syscall_64+0xb6/0x160 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: c4e70a87d975 ("netfilter: bridge: rename br_netfilter.c to br_netfilter_hooks.c") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: syzbot <syzkaller@googlegroups.com> Reviewed-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-12-07 15:43:39 -07:00
if (unlikely(!pskb_may_pull(skb, sizeof(struct arphdr))))
return NF_DROP;
if (arp_hdr(skb)->ar_pln != 4) {
if (is_vlan_arp(skb, state->net))
nf_bridge_push_encap_header(skb);
return NF_ACCEPT;
}
*d = state->in;
2015-09-15 19:04:16 -06:00
NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->net, state->sk, skb,
state->in, state->out, br_nf_forward_finish);
return NF_STOLEN;
}
static int br_nf_push_frag_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct brnf_frag_data *data;
int err;
data = this_cpu_ptr(&brnf_frag_data_storage);
err = skb_cow_head(skb, data->size);
if (err) {
kfree_skb(skb);
return 0;
}
if (data->vlan_proto)
__vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci);
skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size);
__skb_push(skb, data->encap_size);
nf_bridge_info_free(skb);
return br_dev_queue_push_xmit(net, sk, skb);
}
static int
br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *))
{
unsigned int mtu = ip_skb_dst_mtu(sk, skb);
struct iphdr *iph = ip_hdr(skb);
if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
(IPCB(skb)->frag_max_size &&
IPCB(skb)->frag_max_size > mtu))) {
IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
return -EMSGSIZE;
}
return ip_do_fragment(net, sk, skb, output);
}
static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
{
const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
if (nf_bridge->orig_proto == BRNF_PROTO_PPPOE)
return PPPOE_SES_HLEN;
return 0;
}
static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
unsigned int mtu, mtu_reserved;
mtu_reserved = nf_bridge_mtu_reduction(skb);
mtu = skb->dev->mtu;
netfilter: bridge: reset skb->pkt_type after NF_INET_POST_ROUTING traversal [ Upstream commit 44f64f23bae2f0fad25503bc7ab86cd08d04cd47 ] Netfilter changes PACKET_OTHERHOST to PACKET_HOST before invoking the hooks as, while it's an expected value for a bridge, routing expects PACKET_HOST. The change is undone later on after hook traversal. This can be seen with pairs of functions updating skb>pkt_type and then reverting it to its original value: For hook NF_INET_PRE_ROUTING: setup_pre_routing / br_nf_pre_routing_finish For hook NF_INET_FORWARD: br_nf_forward_ip / br_nf_forward_finish But the third case where netfilter does this, for hook NF_INET_POST_ROUTING, the packet type is changed in br_nf_post_routing but never reverted. A comment says: /* We assume any code from br_dev_queue_push_xmit onwards doesn't care * about the value of skb->pkt_type. */ But when having a tunnel (say vxlan) attached to a bridge we have the following call trace: br_nf_pre_routing br_nf_pre_routing_ipv6 br_nf_pre_routing_finish br_nf_forward_ip br_nf_forward_finish br_nf_post_routing <- pkt_type is updated to PACKET_HOST br_nf_dev_queue_xmit <- but not reverted to its original value vxlan_xmit vxlan_xmit_one skb_tunnel_check_pmtu <- a check on pkt_type is performed In this specific case, this creates issues such as when an ICMPv6 PTB should be sent back. When CONFIG_BRIDGE_NETFILTER is enabled, the PTB isn't sent (as skb_tunnel_check_pmtu checks if pkt_type is PACKET_HOST and returns early). If the comment is right and no one cares about the value of skb->pkt_type after br_dev_queue_push_xmit (which isn't true), resetting it to its original value should be safe. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Antoine Tenart <atenart@kernel.org> Reviewed-by: Florian Westphal <fw@strlen.de> Link: https://lore.kernel.org/r/20201123174902.622102-1-atenart@kernel.org Signed-off-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2020-11-23 10:49:02 -07:00
if (nf_bridge->pkt_otherhost) {
skb->pkt_type = PACKET_OTHERHOST;
nf_bridge->pkt_otherhost = false;
}
if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu)
mtu = nf_bridge->frag_max_size;
if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) {
nf_bridge_info_free(skb);
return br_dev_queue_push_xmit(net, sk, skb);
}
/* This is wrong! We should preserve the original fragment
* boundaries by preserving frag_list rather than refragmenting.
*/
if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) &&
skb->protocol == htons(ETH_P_IP)) {
struct brnf_frag_data *data;
if (br_validate_ipv4(net, skb))
goto drop;
IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
nf_bridge_update_protocol(skb);
data = this_cpu_ptr(&brnf_frag_data_storage);
if (skb_vlan_tag_present(skb)) {
data->vlan_tci = skb->vlan_tci;
data->vlan_proto = skb->vlan_proto;
} else {
data->vlan_proto = 0;
}
data->encap_size = nf_bridge_encap_header_len(skb);
data->size = ETH_HLEN + data->encap_size;
skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
data->size);
return br_nf_ip_fragment(net, sk, skb, br_nf_push_frag_xmit);
}
if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) &&
skb->protocol == htons(ETH_P_IPV6)) {
const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
struct brnf_frag_data *data;
if (br_validate_ipv6(net, skb))
goto drop;
IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
nf_bridge_update_protocol(skb);
data = this_cpu_ptr(&brnf_frag_data_storage);
data->encap_size = nf_bridge_encap_header_len(skb);
data->size = ETH_HLEN + data->encap_size;
skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
data->size);
if (v6ops)
return v6ops->fragment(net, sk, skb, br_nf_push_frag_xmit);
kfree_skb(skb);
return -EMSGSIZE;
}
nf_bridge_info_free(skb);
return br_dev_queue_push_xmit(net, sk, skb);
drop:
kfree_skb(skb);
return 0;
}
/* PF_BRIDGE/POST_ROUTING ********************************************/
static unsigned int br_nf_post_routing(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
struct net_device *realoutdev = bridge_parent(skb->dev);
u_int8_t pf;
/* if nf_bridge is set, but ->physoutdev is NULL, this packet came in
* on a bridge, but was delivered locally and is now being routed:
*
* POST_ROUTING was already invoked from the ip stack.
*/
if (!nf_bridge || !nf_bridge->physoutdev)
return NF_ACCEPT;
if (!realoutdev)
return NF_DROP;
if (IS_IP(skb) || is_vlan_ip(skb, state->net) ||
is_pppoe_ip(skb, state->net))
pf = NFPROTO_IPV4;
else if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) ||
is_pppoe_ipv6(skb, state->net))
pf = NFPROTO_IPV6;
else
return NF_ACCEPT;
if (skb->pkt_type == PACKET_OTHERHOST) {
skb->pkt_type = PACKET_HOST;
nf_bridge->pkt_otherhost = true;
}
nf_bridge_pull_encap_header(skb);
if (pf == NFPROTO_IPV4)
skb->protocol = htons(ETH_P_IP);
else
skb->protocol = htons(ETH_P_IPV6);
2015-09-15 19:04:16 -06:00
NF_HOOK(pf, NF_INET_POST_ROUTING, state->net, state->sk, skb,
NULL, realoutdev,
br_nf_dev_queue_xmit);
return NF_STOLEN;
}
/* IP/SABOTAGE *****************************************************/
/* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING
* for the second time. */
static unsigned int ip_sabotage_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
if (nf_bridge && !nf_bridge->in_prerouting &&
!netif_is_l3_master(skb->dev) &&
!netif_is_l3_slave(skb->dev)) {
state->okfn(state->net, state->sk, skb);
return NF_STOLEN;
}
return NF_ACCEPT;
}
/* This is called when br_netfilter has called into iptables/netfilter,
* and DNAT has taken place on a bridge-forwarded packet.
*
* neigh->output has created a new MAC header, with local br0 MAC
* as saddr.
*
* This restores the original MAC saddr of the bridged packet
* before invoking bridge forward logic to transmit the packet.
*/
static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
{
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
skb_pull(skb, ETH_HLEN);
nf_bridge->bridged_dnat = 0;
BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN));
skb_copy_to_linear_data_offset(skb, -(ETH_HLEN - ETH_ALEN),
nf_bridge->neigh_header,
ETH_HLEN - ETH_ALEN);
skb->dev = nf_bridge->physindev;
nf_bridge->physoutdev = NULL;
br_handle_frame_finish(dev_net(skb->dev), NULL, skb);
}
static int br_nf_dev_xmit(struct sk_buff *skb)
{
const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
if (nf_bridge && nf_bridge->bridged_dnat) {
br_nf_pre_routing_finish_bridge_slow(skb);
return 1;
}
return 0;
}
static const struct nf_br_ops br_ops = {
.br_dev_xmit_hook = br_nf_dev_xmit,
};
/* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
* br_dev_queue_push_xmit is called afterwards */
static const struct nf_hook_ops br_nf_ops[] = {
{
.hook = br_nf_pre_routing,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_PRE_ROUTING,
.priority = NF_BR_PRI_BRNF,
},
{
.hook = br_nf_forward_ip,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_FORWARD,
.priority = NF_BR_PRI_BRNF - 1,
},
{
.hook = br_nf_forward_arp,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_FORWARD,
.priority = NF_BR_PRI_BRNF,
},
{
.hook = br_nf_post_routing,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_POST_ROUTING,
.priority = NF_BR_PRI_LAST,
},
{
.hook = ip_sabotage_in,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_FIRST,
},
{
.hook = ip_sabotage_in,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP6_PRI_FIRST,
},
};
static int brnf_device_event(struct notifier_block *unused, unsigned long event,
void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct brnf_net *brnet;
struct net *net;
int ret;
if (event != NETDEV_REGISTER || !(dev->priv_flags & IFF_EBRIDGE))
return NOTIFY_DONE;
ASSERT_RTNL();
net = dev_net(dev);
brnet = net_generic(net, brnf_net_id);
if (brnet->enabled)
return NOTIFY_OK;
ret = nf_register_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops));
if (ret)
return NOTIFY_BAD;
brnet->enabled = true;
return NOTIFY_OK;
}
static struct notifier_block brnf_notifier __read_mostly = {
.notifier_call = brnf_device_event,
};
/* recursively invokes nf_hook_slow (again), skipping already-called
* hooks (< NF_BR_PRI_BRNF).
*
* Called with rcu read lock held.
*/
int br_nf_hook_thresh(unsigned int hook, struct net *net,
struct sock *sk, struct sk_buff *skb,
struct net_device *indev,
struct net_device *outdev,
int (*okfn)(struct net *, struct sock *,
struct sk_buff *))
{
const struct nf_hook_entries *e;
struct nf_hook_state state;
struct nf_hook_ops **ops;
unsigned int i;
int ret;
e = rcu_dereference(net->nf.hooks_bridge[hook]);
if (!e)
return okfn(net, sk, skb);
ops = nf_hook_entries_get_hook_ops(e);
for (i = 0; i < e->num_hook_entries &&
ops[i]->priority <= NF_BR_PRI_BRNF; i++)
;
nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev,
sk, net, okfn);
ret = nf_hook_slow(skb, &state, e, i);
if (ret == 1)
ret = okfn(net, sk, skb);
return ret;
}
#ifdef CONFIG_SYSCTL
static
int brnf_sysctl_call_tables(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
if (write && *(int *)(ctl->data))
*(int *)(ctl->data) = 1;
return ret;
}
static struct ctl_table brnf_table[] = {
{
.procname = "bridge-nf-call-arptables",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = brnf_sysctl_call_tables,
},
{
.procname = "bridge-nf-call-iptables",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = brnf_sysctl_call_tables,
},
{
.procname = "bridge-nf-call-ip6tables",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = brnf_sysctl_call_tables,
},
{
.procname = "bridge-nf-filter-vlan-tagged",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = brnf_sysctl_call_tables,
},
{
.procname = "bridge-nf-filter-pppoe-tagged",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = brnf_sysctl_call_tables,
},
{
.procname = "bridge-nf-pass-vlan-input-dev",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = brnf_sysctl_call_tables,
},
{ }
};
static inline void br_netfilter_sysctl_default(struct brnf_net *brnf)
{
brnf->call_iptables = 1;
brnf->call_ip6tables = 1;
brnf->call_arptables = 1;
brnf->filter_vlan_tagged = 0;
brnf->filter_pppoe_tagged = 0;
brnf->pass_vlan_indev = 0;
}
static int br_netfilter_sysctl_init_net(struct net *net)
{
struct ctl_table *table = brnf_table;
struct brnf_net *brnet;
if (!net_eq(net, &init_net)) {
table = kmemdup(table, sizeof(brnf_table), GFP_KERNEL);
if (!table)
return -ENOMEM;
}
brnet = net_generic(net, brnf_net_id);
table[0].data = &brnet->call_arptables;
table[1].data = &brnet->call_iptables;
table[2].data = &brnet->call_ip6tables;
table[3].data = &brnet->filter_vlan_tagged;
table[4].data = &brnet->filter_pppoe_tagged;
table[5].data = &brnet->pass_vlan_indev;
br_netfilter_sysctl_default(brnet);
brnet->ctl_hdr = register_net_sysctl(net, "net/bridge", table);
if (!brnet->ctl_hdr) {
if (!net_eq(net, &init_net))
kfree(table);
return -ENOMEM;
}
return 0;
}
static void br_netfilter_sysctl_exit_net(struct net *net,
struct brnf_net *brnet)
{
netfilter: bridge: prevent UAF in brnf_exit_net() Prevent a UAF in brnf_exit_net(). When unregister_net_sysctl_table() is called the ctl_hdr pointer will obviously be freed and so accessing it righter after is invalid. Fix this by stashing a pointer to the table we want to free before we unregister the sysctl header. Note that syzkaller falsely chased this down to the drm tree so the Fixes tag that syzkaller requested would be wrong. This commit uses a different but the correct Fixes tag. /* Splat */ BUG: KASAN: use-after-free in br_netfilter_sysctl_exit_net net/bridge/br_netfilter_hooks.c:1121 [inline] BUG: KASAN: use-after-free in brnf_exit_net+0x38c/0x3a0 net/bridge/br_netfilter_hooks.c:1141 Read of size 8 at addr ffff8880a4078d60 by task kworker/u4:4/8749 CPU: 0 PID: 8749 Comm: kworker/u4:4 Not tainted 5.2.0-rc5-next-20190618 #17 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: netns cleanup_net Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 print_address_description.cold+0xd4/0x306 mm/kasan/report.c:351 __kasan_report.cold+0x1b/0x36 mm/kasan/report.c:482 kasan_report+0x12/0x20 mm/kasan/common.c:614 __asan_report_load8_noabort+0x14/0x20 mm/kasan/generic_report.c:132 br_netfilter_sysctl_exit_net net/bridge/br_netfilter_hooks.c:1121 [inline] brnf_exit_net+0x38c/0x3a0 net/bridge/br_netfilter_hooks.c:1141 ops_exit_list.isra.0+0xaa/0x150 net/core/net_namespace.c:154 cleanup_net+0x3fb/0x960 net/core/net_namespace.c:553 process_one_work+0x989/0x1790 kernel/workqueue.c:2269 worker_thread+0x98/0xe40 kernel/workqueue.c:2415 kthread+0x354/0x420 kernel/kthread.c:255 ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352 Allocated by task 11374: save_stack+0x23/0x90 mm/kasan/common.c:71 set_track mm/kasan/common.c:79 [inline] __kasan_kmalloc mm/kasan/common.c:489 [inline] __kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:462 kasan_kmalloc+0x9/0x10 mm/kasan/common.c:503 __do_kmalloc mm/slab.c:3645 [inline] __kmalloc+0x15c/0x740 mm/slab.c:3654 kmalloc include/linux/slab.h:552 [inline] kzalloc include/linux/slab.h:743 [inline] __register_sysctl_table+0xc7/0xef0 fs/proc/proc_sysctl.c:1327 register_net_sysctl+0x29/0x30 net/sysctl_net.c:121 br_netfilter_sysctl_init_net net/bridge/br_netfilter_hooks.c:1105 [inline] brnf_init_net+0x379/0x6a0 net/bridge/br_netfilter_hooks.c:1126 ops_init+0xb3/0x410 net/core/net_namespace.c:130 setup_net+0x2d3/0x740 net/core/net_namespace.c:316 copy_net_ns+0x1df/0x340 net/core/net_namespace.c:439 create_new_namespaces+0x400/0x7b0 kernel/nsproxy.c:103 unshare_nsproxy_namespaces+0xc2/0x200 kernel/nsproxy.c:202 ksys_unshare+0x444/0x980 kernel/fork.c:2822 __do_sys_unshare kernel/fork.c:2890 [inline] __se_sys_unshare kernel/fork.c:2888 [inline] __x64_sys_unshare+0x31/0x40 kernel/fork.c:2888 do_syscall_64+0xfd/0x680 arch/x86/entry/common.c:301 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 9: save_stack+0x23/0x90 mm/kasan/common.c:71 set_track mm/kasan/common.c:79 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/common.c:451 kasan_slab_free+0xe/0x10 mm/kasan/common.c:459 __cache_free mm/slab.c:3417 [inline] kfree+0x10a/0x2c0 mm/slab.c:3746 __rcu_reclaim kernel/rcu/rcu.h:215 [inline] rcu_do_batch kernel/rcu/tree.c:2092 [inline] invoke_rcu_callbacks kernel/rcu/tree.c:2310 [inline] rcu_core+0xcc7/0x1500 kernel/rcu/tree.c:2291 __do_softirq+0x25c/0x94c kernel/softirq.c:292 The buggy address belongs to the object at ffff8880a4078d40 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 32 bytes inside of 512-byte region [ffff8880a4078d40, ffff8880a4078f40) The buggy address belongs to the page: page:ffffea0002901e00 refcount:1 mapcount:0 mapping:ffff8880aa400a80 index:0xffff8880a40785c0 flags: 0x1fffc0000000200(slab) raw: 01fffc0000000200 ffffea0001d636c8 ffffea0001b07308 ffff8880aa400a80 raw: ffff8880a40785c0 ffff8880a40780c0 0000000100000004 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8880a4078c00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8880a4078c80: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc > ffff8880a4078d00: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb ^ ffff8880a4078d80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8880a4078e00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Reported-by: syzbot+43a3fa52c0d9c5c94f41@syzkaller.appspotmail.com Fixes: 22567590b2e6 ("netfilter: bridge: namespace bridge netfilter sysctls") Signed-off-by: Christian Brauner <christian@brauner.io> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2019-06-19 11:05:47 -06:00
struct ctl_table *table = brnet->ctl_hdr->ctl_table_arg;
unregister_net_sysctl_table(brnet->ctl_hdr);
if (!net_eq(net, &init_net))
netfilter: bridge: prevent UAF in brnf_exit_net() Prevent a UAF in brnf_exit_net(). When unregister_net_sysctl_table() is called the ctl_hdr pointer will obviously be freed and so accessing it righter after is invalid. Fix this by stashing a pointer to the table we want to free before we unregister the sysctl header. Note that syzkaller falsely chased this down to the drm tree so the Fixes tag that syzkaller requested would be wrong. This commit uses a different but the correct Fixes tag. /* Splat */ BUG: KASAN: use-after-free in br_netfilter_sysctl_exit_net net/bridge/br_netfilter_hooks.c:1121 [inline] BUG: KASAN: use-after-free in brnf_exit_net+0x38c/0x3a0 net/bridge/br_netfilter_hooks.c:1141 Read of size 8 at addr ffff8880a4078d60 by task kworker/u4:4/8749 CPU: 0 PID: 8749 Comm: kworker/u4:4 Not tainted 5.2.0-rc5-next-20190618 #17 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: netns cleanup_net Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 print_address_description.cold+0xd4/0x306 mm/kasan/report.c:351 __kasan_report.cold+0x1b/0x36 mm/kasan/report.c:482 kasan_report+0x12/0x20 mm/kasan/common.c:614 __asan_report_load8_noabort+0x14/0x20 mm/kasan/generic_report.c:132 br_netfilter_sysctl_exit_net net/bridge/br_netfilter_hooks.c:1121 [inline] brnf_exit_net+0x38c/0x3a0 net/bridge/br_netfilter_hooks.c:1141 ops_exit_list.isra.0+0xaa/0x150 net/core/net_namespace.c:154 cleanup_net+0x3fb/0x960 net/core/net_namespace.c:553 process_one_work+0x989/0x1790 kernel/workqueue.c:2269 worker_thread+0x98/0xe40 kernel/workqueue.c:2415 kthread+0x354/0x420 kernel/kthread.c:255 ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352 Allocated by task 11374: save_stack+0x23/0x90 mm/kasan/common.c:71 set_track mm/kasan/common.c:79 [inline] __kasan_kmalloc mm/kasan/common.c:489 [inline] __kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:462 kasan_kmalloc+0x9/0x10 mm/kasan/common.c:503 __do_kmalloc mm/slab.c:3645 [inline] __kmalloc+0x15c/0x740 mm/slab.c:3654 kmalloc include/linux/slab.h:552 [inline] kzalloc include/linux/slab.h:743 [inline] __register_sysctl_table+0xc7/0xef0 fs/proc/proc_sysctl.c:1327 register_net_sysctl+0x29/0x30 net/sysctl_net.c:121 br_netfilter_sysctl_init_net net/bridge/br_netfilter_hooks.c:1105 [inline] brnf_init_net+0x379/0x6a0 net/bridge/br_netfilter_hooks.c:1126 ops_init+0xb3/0x410 net/core/net_namespace.c:130 setup_net+0x2d3/0x740 net/core/net_namespace.c:316 copy_net_ns+0x1df/0x340 net/core/net_namespace.c:439 create_new_namespaces+0x400/0x7b0 kernel/nsproxy.c:103 unshare_nsproxy_namespaces+0xc2/0x200 kernel/nsproxy.c:202 ksys_unshare+0x444/0x980 kernel/fork.c:2822 __do_sys_unshare kernel/fork.c:2890 [inline] __se_sys_unshare kernel/fork.c:2888 [inline] __x64_sys_unshare+0x31/0x40 kernel/fork.c:2888 do_syscall_64+0xfd/0x680 arch/x86/entry/common.c:301 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 9: save_stack+0x23/0x90 mm/kasan/common.c:71 set_track mm/kasan/common.c:79 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/common.c:451 kasan_slab_free+0xe/0x10 mm/kasan/common.c:459 __cache_free mm/slab.c:3417 [inline] kfree+0x10a/0x2c0 mm/slab.c:3746 __rcu_reclaim kernel/rcu/rcu.h:215 [inline] rcu_do_batch kernel/rcu/tree.c:2092 [inline] invoke_rcu_callbacks kernel/rcu/tree.c:2310 [inline] rcu_core+0xcc7/0x1500 kernel/rcu/tree.c:2291 __do_softirq+0x25c/0x94c kernel/softirq.c:292 The buggy address belongs to the object at ffff8880a4078d40 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 32 bytes inside of 512-byte region [ffff8880a4078d40, ffff8880a4078f40) The buggy address belongs to the page: page:ffffea0002901e00 refcount:1 mapcount:0 mapping:ffff8880aa400a80 index:0xffff8880a40785c0 flags: 0x1fffc0000000200(slab) raw: 01fffc0000000200 ffffea0001d636c8 ffffea0001b07308 ffff8880aa400a80 raw: ffff8880a40785c0 ffff8880a40780c0 0000000100000004 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8880a4078c00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8880a4078c80: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc > ffff8880a4078d00: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb ^ ffff8880a4078d80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8880a4078e00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Reported-by: syzbot+43a3fa52c0d9c5c94f41@syzkaller.appspotmail.com Fixes: 22567590b2e6 ("netfilter: bridge: namespace bridge netfilter sysctls") Signed-off-by: Christian Brauner <christian@brauner.io> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2019-06-19 11:05:47 -06:00
kfree(table);
}
static int __net_init brnf_init_net(struct net *net)
{
return br_netfilter_sysctl_init_net(net);
}
#endif
static void __net_exit brnf_exit_net(struct net *net)
{
struct brnf_net *brnet;
brnet = net_generic(net, brnf_net_id);
if (brnet->enabled) {
nf_unregister_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops));
brnet->enabled = false;
}
#ifdef CONFIG_SYSCTL
br_netfilter_sysctl_exit_net(net, brnet);
#endif
}
static struct pernet_operations brnf_net_ops __read_mostly = {
#ifdef CONFIG_SYSCTL
.init = brnf_init_net,
#endif
.exit = brnf_exit_net,
.id = &brnf_net_id,
.size = sizeof(struct brnf_net),
};
static int __init br_netfilter_init(void)
{
int ret;
ret = register_pernet_subsys(&brnf_net_ops);
if (ret < 0)
return ret;
ret = register_netdevice_notifier(&brnf_notifier);
if (ret < 0) {
unregister_pernet_subsys(&brnf_net_ops);
return ret;
}
RCU_INIT_POINTER(nf_br_ops, &br_ops);
printk(KERN_NOTICE "Bridge firewalling registered\n");
return 0;
}
static void __exit br_netfilter_fini(void)
{
RCU_INIT_POINTER(nf_br_ops, NULL);
unregister_netdevice_notifier(&brnf_notifier);
unregister_pernet_subsys(&brnf_net_ops);
}
module_init(br_netfilter_init);
module_exit(br_netfilter_fini);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Lennert Buytenhek <buytenh@gnu.org>");
MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
MODULE_DESCRIPTION("Linux ethernet netfilter firewall bridge");