1
0
Fork 0
alistair23-linux/net/bridge/br_netfilter_hooks.c

1117 lines
29 KiB
C
Raw Normal View History

/*
* Handle firewalling
* Linux ethernet bridge
*
* Authors:
* Lennert Buytenhek <buytenh@gnu.org>
* Bart De Schuymer <bdschuym@pandora.be>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Lennert dedicates this file to Kerstin Wurdinger.
*/
#include <linux/module.h>
#include <linux/kernel.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 02:04:11 -06:00
#include <linux/slab.h>
#include <linux/ip.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/if_arp.h>
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
#include <linux/if_pppox.h>
#include <linux/ppp_defs.h>
#include <linux/netfilter_bridge.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_arp.h>
#include <linux/in_route.h>
#include <linux/rculist.h>
#include <linux/inetdevice.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/addrconf.h>
#include <net/route.h>
#include <net/netfilter/br_netfilter.h>
#include <net/netns/generic.h>
#include <linux/uaccess.h>
#include "br_private.h"
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
netns: make struct pernet_operations::id unsigned int Make struct pernet_operations::id unsigned. There are 2 reasons to do so: 1) This field is really an index into an zero based array and thus is unsigned entity. Using negative value is out-of-bound access by definition. 2) On x86_64 unsigned 32-bit data which are mixed with pointers via array indexing or offsets added or subtracted to pointers are preffered to signed 32-bit data. "int" being used as an array index needs to be sign-extended to 64-bit before being used. void f(long *p, int i) { g(p[i]); } roughly translates to movsx rsi, esi mov rdi, [rsi+...] call g MOVSX is 3 byte instruction which isn't necessary if the variable is unsigned because x86_64 is zero extending by default. Now, there is net_generic() function which, you guessed it right, uses "int" as an array index: static inline void *net_generic(const struct net *net, int id) { ... ptr = ng->ptr[id - 1]; ... } And this function is used a lot, so those sign extensions add up. Patch snipes ~1730 bytes on allyesconfig kernel (without all junk messing with code generation): add/remove: 0/0 grow/shrink: 70/598 up/down: 396/-2126 (-1730) Unfortunately some functions actually grow bigger. This is a semmingly random artefact of code generation with register allocator being used differently. gcc decides that some variable needs to live in new r8+ registers and every access now requires REX prefix. Or it is shifted into r12, so [r12+0] addressing mode has to be used which is longer than [r8] However, overall balance is in negative direction: add/remove: 0/0 grow/shrink: 70/598 up/down: 396/-2126 (-1730) function old new delta nfsd4_lock 3886 3959 +73 tipc_link_build_proto_msg 1096 1140 +44 mac80211_hwsim_new_radio 2776 2808 +32 tipc_mon_rcv 1032 1058 +26 svcauth_gss_legacy_init 1413 1429 +16 tipc_bcbase_select_primary 379 392 +13 nfsd4_exchange_id 1247 1260 +13 nfsd4_setclientid_confirm 782 793 +11 ... put_client_renew_locked 494 480 -14 ip_set_sockfn_get 730 716 -14 geneve_sock_add 829 813 -16 nfsd4_sequence_done 721 703 -18 nlmclnt_lookup_host 708 686 -22 nfsd4_lockt 1085 1063 -22 nfs_get_client 1077 1050 -27 tcf_bpf_init 1106 1076 -30 nfsd4_encode_fattr 5997 5930 -67 Total: Before=154856051, After=154854321, chg -0.00% Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-16 18:58:21 -07:00
static unsigned int brnf_net_id __read_mostly;
struct brnf_net {
bool enabled;
};
#ifdef CONFIG_SYSCTL
static struct ctl_table_header *brnf_sysctl_header;
static int brnf_call_iptables __read_mostly = 1;
static int brnf_call_ip6tables __read_mostly = 1;
static int brnf_call_arptables __read_mostly = 1;
static int brnf_filter_vlan_tagged __read_mostly;
static int brnf_filter_pppoe_tagged __read_mostly;
static int brnf_pass_vlan_indev __read_mostly;
#else
#define brnf_call_iptables 1
#define brnf_call_ip6tables 1
#define brnf_call_arptables 1
#define brnf_filter_vlan_tagged 0
#define brnf_filter_pppoe_tagged 0
#define brnf_pass_vlan_indev 0
#endif
bridge: netfilter: don't call iptables on vlan packets if sysctl is off When net.bridge.bridge-nf-filter-vlan-tagged is 0 (default), vlan packets arriving should not be sent to ip(6)tables by bridge netfilter. However, it turns out that we currently always send VLAN packets to netfilter, if .. a), CONFIG_VLAN_8021Q is enabled ; or b), CONFIG_VLAN_8021Q is not set but rx vlan offload is enabled on the bridge port. This is because bridge netfilter treats skb with skb->protocol == ETH_P_IP{V6} as "non-vlan packet". With rx vlan offload on or CONFIG_VLAN_8021Q=y, the vlan header has already been removed here, and we cannot rely on skb->protocol alone. Fix this by only using skb->protocol if the skb has no vlan tag, or if a vlan tag is present and filter-vlan-tagged bridge netfilter sysctl is enabled. We cannot remove the skb->protocol == htons(ETH_P_8021Q) test because the vlan tag is still around in the CONFIG_VLAN_8021Q=n && "ethtool -K $itf rxvlan off" case. reproducer: iptables -t raw -I PREROUTING -i br0 iptables -t raw -I PREROUTING -i br0.1 Then send packets to an ip address configured on br0.1 interface. Even with net.bridge.bridge-nf-filter-vlan-tagged=0, the 1st rule will match instead of the 2nd one. With this patch applied, the 2nd rule will match instead. In the non-local address case, netfilter won't be consulted after this patch unless the sysctl is switched on. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-03-05 18:22:54 -07:00
#define IS_IP(skb) \
(!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IP))
bridge: netfilter: don't call iptables on vlan packets if sysctl is off When net.bridge.bridge-nf-filter-vlan-tagged is 0 (default), vlan packets arriving should not be sent to ip(6)tables by bridge netfilter. However, it turns out that we currently always send VLAN packets to netfilter, if .. a), CONFIG_VLAN_8021Q is enabled ; or b), CONFIG_VLAN_8021Q is not set but rx vlan offload is enabled on the bridge port. This is because bridge netfilter treats skb with skb->protocol == ETH_P_IP{V6} as "non-vlan packet". With rx vlan offload on or CONFIG_VLAN_8021Q=y, the vlan header has already been removed here, and we cannot rely on skb->protocol alone. Fix this by only using skb->protocol if the skb has no vlan tag, or if a vlan tag is present and filter-vlan-tagged bridge netfilter sysctl is enabled. We cannot remove the skb->protocol == htons(ETH_P_8021Q) test because the vlan tag is still around in the CONFIG_VLAN_8021Q=n && "ethtool -K $itf rxvlan off" case. reproducer: iptables -t raw -I PREROUTING -i br0 iptables -t raw -I PREROUTING -i br0.1 Then send packets to an ip address configured on br0.1 interface. Even with net.bridge.bridge-nf-filter-vlan-tagged=0, the 1st rule will match instead of the 2nd one. With this patch applied, the 2nd rule will match instead. In the non-local address case, netfilter won't be consulted after this patch unless the sysctl is switched on. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-03-05 18:22:54 -07:00
#define IS_IPV6(skb) \
(!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IPV6))
bridge: netfilter: don't call iptables on vlan packets if sysctl is off When net.bridge.bridge-nf-filter-vlan-tagged is 0 (default), vlan packets arriving should not be sent to ip(6)tables by bridge netfilter. However, it turns out that we currently always send VLAN packets to netfilter, if .. a), CONFIG_VLAN_8021Q is enabled ; or b), CONFIG_VLAN_8021Q is not set but rx vlan offload is enabled on the bridge port. This is because bridge netfilter treats skb with skb->protocol == ETH_P_IP{V6} as "non-vlan packet". With rx vlan offload on or CONFIG_VLAN_8021Q=y, the vlan header has already been removed here, and we cannot rely on skb->protocol alone. Fix this by only using skb->protocol if the skb has no vlan tag, or if a vlan tag is present and filter-vlan-tagged bridge netfilter sysctl is enabled. We cannot remove the skb->protocol == htons(ETH_P_8021Q) test because the vlan tag is still around in the CONFIG_VLAN_8021Q=n && "ethtool -K $itf rxvlan off" case. reproducer: iptables -t raw -I PREROUTING -i br0 iptables -t raw -I PREROUTING -i br0.1 Then send packets to an ip address configured on br0.1 interface. Even with net.bridge.bridge-nf-filter-vlan-tagged=0, the 1st rule will match instead of the 2nd one. With this patch applied, the 2nd rule will match instead. In the non-local address case, netfilter won't be consulted after this patch unless the sysctl is switched on. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-03-05 18:22:54 -07:00
#define IS_ARP(skb) \
(!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_ARP))
bridge: netfilter: don't call iptables on vlan packets if sysctl is off When net.bridge.bridge-nf-filter-vlan-tagged is 0 (default), vlan packets arriving should not be sent to ip(6)tables by bridge netfilter. However, it turns out that we currently always send VLAN packets to netfilter, if .. a), CONFIG_VLAN_8021Q is enabled ; or b), CONFIG_VLAN_8021Q is not set but rx vlan offload is enabled on the bridge port. This is because bridge netfilter treats skb with skb->protocol == ETH_P_IP{V6} as "non-vlan packet". With rx vlan offload on or CONFIG_VLAN_8021Q=y, the vlan header has already been removed here, and we cannot rely on skb->protocol alone. Fix this by only using skb->protocol if the skb has no vlan tag, or if a vlan tag is present and filter-vlan-tagged bridge netfilter sysctl is enabled. We cannot remove the skb->protocol == htons(ETH_P_8021Q) test because the vlan tag is still around in the CONFIG_VLAN_8021Q=n && "ethtool -K $itf rxvlan off" case. reproducer: iptables -t raw -I PREROUTING -i br0 iptables -t raw -I PREROUTING -i br0.1 Then send packets to an ip address configured on br0.1 interface. Even with net.bridge.bridge-nf-filter-vlan-tagged=0, the 1st rule will match instead of the 2nd one. With this patch applied, the 2nd rule will match instead. In the non-local address case, netfilter won't be consulted after this patch unless the sysctl is switched on. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-03-05 18:22:54 -07:00
static inline __be16 vlan_proto(const struct sk_buff *skb)
{
if (skb_vlan_tag_present(skb))
return skb->protocol;
else if (skb->protocol == htons(ETH_P_8021Q))
return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
else
return 0;
}
#define IS_VLAN_IP(skb) \
(vlan_proto(skb) == htons(ETH_P_IP) && \
brnf_filter_vlan_tagged)
#define IS_VLAN_IPV6(skb) \
(vlan_proto(skb) == htons(ETH_P_IPV6) && \
brnf_filter_vlan_tagged)
#define IS_VLAN_ARP(skb) \
(vlan_proto(skb) == htons(ETH_P_ARP) && \
brnf_filter_vlan_tagged)
static inline __be16 pppoe_proto(const struct sk_buff *skb)
{
return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
sizeof(struct pppoe_hdr)));
}
#define IS_PPPOE_IP(skb) \
(skb->protocol == htons(ETH_P_PPP_SES) && \
pppoe_proto(skb) == htons(PPP_IP) && \
brnf_filter_pppoe_tagged)
#define IS_PPPOE_IPV6(skb) \
(skb->protocol == htons(ETH_P_PPP_SES) && \
pppoe_proto(skb) == htons(PPP_IPV6) && \
brnf_filter_pppoe_tagged)
/* largest possible L2 header, see br_nf_dev_queue_xmit() */
#define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN)
struct brnf_frag_data {
char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH];
u8 encap_size;
u8 size;
u16 vlan_tci;
__be16 vlan_proto;
};
static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage);
static void nf_bridge_info_free(struct sk_buff *skb)
{
if (skb->nf_bridge) {
nf_bridge_put(skb->nf_bridge);
skb->nf_bridge = NULL;
}
}
static inline struct net_device *bridge_parent(const struct net_device *dev)
{
struct net_bridge_port *port;
port = br_port_get_rcu(dev);
return port ? port->br->dev : NULL;
}
static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb)
{
struct nf_bridge_info *nf_bridge = skb->nf_bridge;
if (refcount_read(&nf_bridge->use) > 1) {
struct nf_bridge_info *tmp = nf_bridge_alloc(skb);
if (tmp) {
memcpy(tmp, nf_bridge, sizeof(struct nf_bridge_info));
refcount_set(&tmp->use, 1);
}
nf_bridge_put(nf_bridge);
nf_bridge = tmp;
}
return nf_bridge;
}
unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
{
switch (skb->protocol) {
case __cpu_to_be16(ETH_P_8021Q):
return VLAN_HLEN;
case __cpu_to_be16(ETH_P_PPP_SES):
return PPPOE_SES_HLEN;
default:
return 0;
}
}
static inline void nf_bridge_pull_encap_header(struct sk_buff *skb)
{
unsigned int len = nf_bridge_encap_header_len(skb);
skb_pull(skb, len);
skb->network_header += len;
}
static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb)
{
unsigned int len = nf_bridge_encap_header_len(skb);
skb_pull_rcsum(skb, len);
skb->network_header += len;
}
/* When handing a packet over to the IP layer
* check whether we have a skb that is in the
* expected format
*/
static int br_validate_ipv4(struct net *net, struct sk_buff *skb)
{
const struct iphdr *iph;
u32 len;
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto inhdr_error;
iph = ip_hdr(skb);
/* Basic sanity checks */
if (iph->ihl < 5 || iph->version != 4)
goto inhdr_error;
if (!pskb_may_pull(skb, iph->ihl*4))
goto inhdr_error;
iph = ip_hdr(skb);
if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
goto inhdr_error;
len = ntohs(iph->tot_len);
if (skb->len < len) {
__IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
} else if (len < (iph->ihl*4))
goto inhdr_error;
if (pskb_trim_rcsum(skb, len)) {
__IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
goto drop;
}
memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
/* We should really parse IP options here but until
* somebody who actually uses IP options complains to
* us we'll just silently ignore the options because
* we're lazy!
*/
return 0;
inhdr_error:
__IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
drop:
return -1;
}
void nf_bridge_update_protocol(struct sk_buff *skb)
{
switch (skb->nf_bridge->orig_proto) {
case BRNF_PROTO_8021Q:
skb->protocol = htons(ETH_P_8021Q);
break;
case BRNF_PROTO_PPPOE:
skb->protocol = htons(ETH_P_PPP_SES);
break;
case BRNF_PROTO_UNCHANGED:
break;
}
}
/* Obtain the correct destination MAC address, while preserving the original
* source MAC address. If we already know this address, we just copy it. If we
* don't, we use the neighbour framework to find out. In both cases, we make
* sure that br_handle_frame_finish() is called afterwards.
*/
int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct neighbour *neigh;
struct dst_entry *dst;
skb->dev = bridge_parent(skb->dev);
if (!skb->dev)
goto free_skb;
dst = skb_dst(skb);
neigh = dst_neigh_lookup_skb(dst, skb);
if (neigh) {
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
int ret;
if (neigh->hh.hh_len) {
neigh_hh_bridge(&neigh->hh, skb);
skb->dev = nf_bridge->physindev;
ret = br_handle_frame_finish(net, sk, skb);
} else {
/* the neighbour function below overwrites the complete
* MAC header, so we save the Ethernet source address and
* protocol number.
*/
skb_copy_from_linear_data_offset(skb,
-(ETH_HLEN-ETH_ALEN),
nf_bridge->neigh_header,
ETH_HLEN-ETH_ALEN);
/* tell br_dev_xmit to continue with forwarding */
nf_bridge->bridged_dnat = 1;
/* FIXME Need to refragment */
ret = neigh->output(neigh, skb);
}
neigh_release(neigh);
return ret;
}
free_skb:
kfree_skb(skb);
return 0;
}
static inline bool
br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb,
const struct nf_bridge_info *nf_bridge)
{
return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr;
}
/* This requires some explaining. If DNAT has taken place,
* we will need to fix up the destination Ethernet address.
* This is also true when SNAT takes place (for the reply direction).
*
* There are two cases to consider:
* 1. The packet was DNAT'ed to a device in the same bridge
* port group as it was received on. We can still bridge
* the packet.
* 2. The packet was DNAT'ed to a different device, either
* a non-bridged device or another bridge port group.
* The packet will need to be routed.
*
* The correct way of distinguishing between these two cases is to
* call ip_route_input() and to look at skb->dst->dev, which is
* changed to the destination device if ip_route_input() succeeds.
*
* Let's first consider the case that ip_route_input() succeeds:
*
* If the output device equals the logical bridge device the packet
* came in on, we can consider this bridging. The corresponding MAC
* address will be obtained in br_nf_pre_routing_finish_bridge.
* Otherwise, the packet is considered to be routed and we just
* change the destination MAC address so that the packet will
* later be passed up to the IP stack to be routed. For a redirected
* packet, ip_route_input() will give back the localhost as output device,
* which differs from the bridge device.
*
* Let's now consider the case that ip_route_input() fails:
*
* This can be because the destination address is martian, in which case
* the packet will be dropped.
* If IP forwarding is disabled, ip_route_input() will fail, while
* ip_route_output_key() can return success. The source
* address for ip_route_output_key() is set to zero, so ip_route_output_key()
* thinks we're handling a locally generated packet and won't care
* if IP forwarding is enabled. If the output device equals the logical bridge
* device, we proceed as if ip_route_input() succeeded. If it differs from the
* logical bridge port or if ip_route_output_key() fails we drop the packet.
*/
static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
struct iphdr *iph = ip_hdr(skb);
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
struct rtable *rt;
int err;
nf_bridge->frag_max_size = IPCB(skb)->frag_max_size;
if (nf_bridge->pkt_otherhost) {
skb->pkt_type = PACKET_OTHERHOST;
nf_bridge->pkt_otherhost = false;
}
nf_bridge->in_prerouting = 0;
if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) {
if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
struct in_device *in_dev = __in_dev_get_rcu(dev);
/* If err equals -EHOSTUNREACH the error is due to a
* martian destination or due to the fact that
* forwarding is disabled. For most martian packets,
* ip_route_output_key() will fail. It won't fail for 2 types of
* martian destinations: loopback destinations and destination
* 0.0.0.0. In both cases the packet will be dropped because the
* destination is the loopback device and not the bridge. */
if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev))
goto free_skb;
rt = ip_route_output(net, iph->daddr, 0,
RT_TOS(iph->tos), 0);
if (!IS_ERR(rt)) {
/* - Bridged-and-DNAT'ed traffic doesn't
* require ip_forwarding. */
if (rt->dst.dev == dev) {
skb_dst_set(skb, &rt->dst);
goto bridged_dnat;
}
ip_rt_put(rt);
}
free_skb:
kfree_skb(skb);
return 0;
} else {
if (skb_dst(skb)->dev == dev) {
bridged_dnat:
skb->dev = nf_bridge->physindev;
nf_bridge_update_protocol(skb);
nf_bridge_push_encap_header(skb);
br_nf_hook_thresh(NF_BR_PRE_ROUTING,
net, sk, skb, skb->dev,
NULL,
br_nf_pre_routing_finish_bridge);
return 0;
}
ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr);
skb->pkt_type = PACKET_HOST;
}
} else {
rt = bridge_parent_rtable(nf_bridge->physindev);
if (!rt) {
kfree_skb(skb);
return 0;
}
skb_dst_set_noref(skb, &rt->dst);
}
skb->dev = nf_bridge->physindev;
nf_bridge_update_protocol(skb);
nf_bridge_push_encap_header(skb);
br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL,
br_handle_frame_finish);
return 0;
}
static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct net_device *dev)
{
struct net_device *vlan, *br;
br = bridge_parent(dev);
if (brnf_pass_vlan_indev == 0 || !skb_vlan_tag_present(skb))
return br;
vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto,
skb_vlan_tag_get(skb) & VLAN_VID_MASK);
return vlan ? vlan : br;
}
/* Some common code for IPv4/IPv6 */
struct net_device *setup_pre_routing(struct sk_buff *skb)
{
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
if (skb->pkt_type == PACKET_OTHERHOST) {
skb->pkt_type = PACKET_HOST;
nf_bridge->pkt_otherhost = true;
}
nf_bridge->in_prerouting = 1;
nf_bridge->physindev = skb->dev;
skb->dev = brnf_get_logical_dev(skb, skb->dev);
if (skb->protocol == htons(ETH_P_8021Q))
nf_bridge->orig_proto = BRNF_PROTO_8021Q;
else if (skb->protocol == htons(ETH_P_PPP_SES))
nf_bridge->orig_proto = BRNF_PROTO_PPPOE;
/* Must drop socket now because of tproxy. */
skb_orphan(skb);
return skb->dev;
}
/* Direct IPv6 traffic to br_nf_pre_routing_ipv6.
* Replicate the checks that IPv4 does on packet reception.
* Set skb->dev to the bridge device (i.e. parent of the
* receiving device) to make netfilter happy, the REDIRECT
* target in particular. Save the original destination IP
* address to be able to detect DNAT afterwards. */
static unsigned int br_nf_pre_routing(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_bridge_info *nf_bridge;
struct net_bridge_port *p;
struct net_bridge *br;
__u32 len = nf_bridge_encap_header_len(skb);
if (unlikely(!pskb_may_pull(skb, len)))
return NF_DROP;
p = br_port_get_rcu(state->in);
if (p == NULL)
return NF_DROP;
br = p->br;
bridge: netfilter: don't call iptables on vlan packets if sysctl is off When net.bridge.bridge-nf-filter-vlan-tagged is 0 (default), vlan packets arriving should not be sent to ip(6)tables by bridge netfilter. However, it turns out that we currently always send VLAN packets to netfilter, if .. a), CONFIG_VLAN_8021Q is enabled ; or b), CONFIG_VLAN_8021Q is not set but rx vlan offload is enabled on the bridge port. This is because bridge netfilter treats skb with skb->protocol == ETH_P_IP{V6} as "non-vlan packet". With rx vlan offload on or CONFIG_VLAN_8021Q=y, the vlan header has already been removed here, and we cannot rely on skb->protocol alone. Fix this by only using skb->protocol if the skb has no vlan tag, or if a vlan tag is present and filter-vlan-tagged bridge netfilter sysctl is enabled. We cannot remove the skb->protocol == htons(ETH_P_8021Q) test because the vlan tag is still around in the CONFIG_VLAN_8021Q=n && "ethtool -K $itf rxvlan off" case. reproducer: iptables -t raw -I PREROUTING -i br0 iptables -t raw -I PREROUTING -i br0.1 Then send packets to an ip address configured on br0.1 interface. Even with net.bridge.bridge-nf-filter-vlan-tagged=0, the 1st rule will match instead of the 2nd one. With this patch applied, the 2nd rule will match instead. In the non-local address case, netfilter won't be consulted after this patch unless the sysctl is switched on. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-03-05 18:22:54 -07:00
if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) {
if (!brnf_call_ip6tables && !br->nf_call_ip6tables)
return NF_ACCEPT;
nf_bridge_pull_encap_header_rcsum(skb);
return br_nf_pre_routing_ipv6(priv, skb, state);
}
if (!brnf_call_iptables && !br->nf_call_iptables)
return NF_ACCEPT;
bridge: netfilter: don't call iptables on vlan packets if sysctl is off When net.bridge.bridge-nf-filter-vlan-tagged is 0 (default), vlan packets arriving should not be sent to ip(6)tables by bridge netfilter. However, it turns out that we currently always send VLAN packets to netfilter, if .. a), CONFIG_VLAN_8021Q is enabled ; or b), CONFIG_VLAN_8021Q is not set but rx vlan offload is enabled on the bridge port. This is because bridge netfilter treats skb with skb->protocol == ETH_P_IP{V6} as "non-vlan packet". With rx vlan offload on or CONFIG_VLAN_8021Q=y, the vlan header has already been removed here, and we cannot rely on skb->protocol alone. Fix this by only using skb->protocol if the skb has no vlan tag, or if a vlan tag is present and filter-vlan-tagged bridge netfilter sysctl is enabled. We cannot remove the skb->protocol == htons(ETH_P_8021Q) test because the vlan tag is still around in the CONFIG_VLAN_8021Q=n && "ethtool -K $itf rxvlan off" case. reproducer: iptables -t raw -I PREROUTING -i br0 iptables -t raw -I PREROUTING -i br0.1 Then send packets to an ip address configured on br0.1 interface. Even with net.bridge.bridge-nf-filter-vlan-tagged=0, the 1st rule will match instead of the 2nd one. With this patch applied, the 2nd rule will match instead. In the non-local address case, netfilter won't be consulted after this patch unless the sysctl is switched on. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-03-05 18:22:54 -07:00
if (!IS_IP(skb) && !IS_VLAN_IP(skb) && !IS_PPPOE_IP(skb))
return NF_ACCEPT;
nf_bridge_pull_encap_header_rcsum(skb);
if (br_validate_ipv4(state->net, skb))
return NF_DROP;
nf_bridge_put(skb->nf_bridge);
if (!nf_bridge_alloc(skb))
return NF_DROP;
if (!setup_pre_routing(skb))
return NF_DROP;
nf_bridge = nf_bridge_info_get(skb);
nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr;
skb->protocol = htons(ETH_P_IP);
2015-09-15 19:04:16 -06:00
NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb,
skb->dev, NULL,
br_nf_pre_routing_finish);
return NF_STOLEN;
}
/* PF_BRIDGE/FORWARD *************************************************/
static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
struct net_device *in;
bridge: netfilter: don't call iptables on vlan packets if sysctl is off When net.bridge.bridge-nf-filter-vlan-tagged is 0 (default), vlan packets arriving should not be sent to ip(6)tables by bridge netfilter. However, it turns out that we currently always send VLAN packets to netfilter, if .. a), CONFIG_VLAN_8021Q is enabled ; or b), CONFIG_VLAN_8021Q is not set but rx vlan offload is enabled on the bridge port. This is because bridge netfilter treats skb with skb->protocol == ETH_P_IP{V6} as "non-vlan packet". With rx vlan offload on or CONFIG_VLAN_8021Q=y, the vlan header has already been removed here, and we cannot rely on skb->protocol alone. Fix this by only using skb->protocol if the skb has no vlan tag, or if a vlan tag is present and filter-vlan-tagged bridge netfilter sysctl is enabled. We cannot remove the skb->protocol == htons(ETH_P_8021Q) test because the vlan tag is still around in the CONFIG_VLAN_8021Q=n && "ethtool -K $itf rxvlan off" case. reproducer: iptables -t raw -I PREROUTING -i br0 iptables -t raw -I PREROUTING -i br0.1 Then send packets to an ip address configured on br0.1 interface. Even with net.bridge.bridge-nf-filter-vlan-tagged=0, the 1st rule will match instead of the 2nd one. With this patch applied, the 2nd rule will match instead. In the non-local address case, netfilter won't be consulted after this patch unless the sysctl is switched on. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-03-05 18:22:54 -07:00
if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) {
if (skb->protocol == htons(ETH_P_IP))
nf_bridge->frag_max_size = IPCB(skb)->frag_max_size;
if (skb->protocol == htons(ETH_P_IPV6))
nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size;
in = nf_bridge->physindev;
if (nf_bridge->pkt_otherhost) {
skb->pkt_type = PACKET_OTHERHOST;
nf_bridge->pkt_otherhost = false;
}
netfilter: bridge-netfilter: fix crash in br_nf_forward_finish() [ 4593.956206] BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 [ 4593.956219] IP: [<ffffffffa03357a4>] br_nf_forward_finish+0x154/0x170 [bridge] [ 4593.956232] PGD 195ece067 PUD 1ba005067 PMD 0 [ 4593.956241] Oops: 0000 [#1] SMP [ 4593.956248] last sysfs file: /sys/devices/LNXSYSTM:00/LNXSYBUS:00/PNP0A08:00/device:08/ATK0110:00/hwmon/hwmon0/temp2_label [ 4593.956253] CPU 3 ... [ 4593.956380] Pid: 29512, comm: kvm Not tainted 2.6.34-rc7-net #195 P6T DELUXE/System Product Name [ 4593.956384] RIP: 0010:[<ffffffffa03357a4>] [<ffffffffa03357a4>] br_nf_forward_finish+0x154/0x170 [bridge] [ 4593.956395] RSP: 0018:ffff880001e63b78 EFLAGS: 00010246 [ 4593.956399] RAX: 0000000000000608 RBX: ffff880057181700 RCX: ffff8801b813d000 [ 4593.956402] RDX: 0000000000000000 RSI: 0000000000000002 RDI: ffff880057181700 [ 4593.956406] RBP: ffff880001e63ba8 R08: ffff8801b9d97000 R09: ffffffffa0335650 [ 4593.956410] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801b813d000 [ 4593.956413] R13: ffffffff81ab3940 R14: ffff880057181700 R15: 0000000000000002 [ 4593.956418] FS: 00007fc40d380710(0000) GS:ffff880001e60000(0000) knlGS:0000000000000000 [ 4593.956422] CS: 0010 DS: 002b ES: 002b CR0: 000000008005003b [ 4593.956426] CR2: 0000000000000018 CR3: 00000001ba1d7000 CR4: 00000000000026e0 [ 4593.956429] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 4593.956433] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 4593.956437] Process kvm (pid: 29512, threadinfo ffff8801ba566000, task ffff8801b8003870) [ 4593.956441] Stack: [ 4593.956443] 0000000100000020 ffff880001e63ba0 ffff880001e63ba0 ffff880057181700 [ 4593.956451] <0> ffffffffa0335650 ffffffff81ab3940 ffff880001e63bd8 ffffffffa03350e6 [ 4593.956462] <0> ffff880001e63c40 000000000000024d ffff880057181700 0000000080000000 [ 4593.956474] Call Trace: [ 4593.956478] <IRQ> [ 4593.956488] [<ffffffffa0335650>] ? br_nf_forward_finish+0x0/0x170 [bridge] [ 4593.956496] [<ffffffffa03350e6>] NF_HOOK_THRESH+0x56/0x60 [bridge] [ 4593.956504] [<ffffffffa0335282>] br_nf_forward_arp+0x112/0x120 [bridge] [ 4593.956511] [<ffffffff813f7184>] nf_iterate+0x64/0xa0 [ 4593.956519] [<ffffffffa032f920>] ? br_forward_finish+0x0/0x60 [bridge] [ 4593.956524] [<ffffffff813f722c>] nf_hook_slow+0x6c/0x100 [ 4593.956531] [<ffffffffa032f920>] ? br_forward_finish+0x0/0x60 [bridge] [ 4593.956538] [<ffffffffa032f800>] ? __br_forward+0x0/0xc0 [bridge] [ 4593.956545] [<ffffffffa032f86d>] __br_forward+0x6d/0xc0 [bridge] [ 4593.956550] [<ffffffff813c5d8e>] ? skb_clone+0x3e/0x70 [ 4593.956557] [<ffffffffa032f462>] deliver_clone+0x32/0x60 [bridge] [ 4593.956564] [<ffffffffa032f6b6>] br_flood+0xa6/0xe0 [bridge] [ 4593.956571] [<ffffffffa032f800>] ? __br_forward+0x0/0xc0 [bridge] Don't call nf_bridge_update_protocol() for ARP traffic as skb->nf_bridge isn't used in the ARP case. Reported-by: Stephen Hemminger <shemminger@vyatta.com> Signed-off-by: Bart De Schuymer <bdschuym@pandora.be> Signed-off-by: Patrick McHardy <kaber@trash.net>
2010-05-13 06:55:34 -06:00
nf_bridge_update_protocol(skb);
} else {
in = *((struct net_device **)(skb->cb));
}
nf_bridge_push_encap_header(skb);
br_nf_hook_thresh(NF_BR_FORWARD, net, sk, skb, in, skb->dev,
br_forward_finish);
return 0;
}
bridge: netfilter: don't call iptables on vlan packets if sysctl is off When net.bridge.bridge-nf-filter-vlan-tagged is 0 (default), vlan packets arriving should not be sent to ip(6)tables by bridge netfilter. However, it turns out that we currently always send VLAN packets to netfilter, if .. a), CONFIG_VLAN_8021Q is enabled ; or b), CONFIG_VLAN_8021Q is not set but rx vlan offload is enabled on the bridge port. This is because bridge netfilter treats skb with skb->protocol == ETH_P_IP{V6} as "non-vlan packet". With rx vlan offload on or CONFIG_VLAN_8021Q=y, the vlan header has already been removed here, and we cannot rely on skb->protocol alone. Fix this by only using skb->protocol if the skb has no vlan tag, or if a vlan tag is present and filter-vlan-tagged bridge netfilter sysctl is enabled. We cannot remove the skb->protocol == htons(ETH_P_8021Q) test because the vlan tag is still around in the CONFIG_VLAN_8021Q=n && "ethtool -K $itf rxvlan off" case. reproducer: iptables -t raw -I PREROUTING -i br0 iptables -t raw -I PREROUTING -i br0.1 Then send packets to an ip address configured on br0.1 interface. Even with net.bridge.bridge-nf-filter-vlan-tagged=0, the 1st rule will match instead of the 2nd one. With this patch applied, the 2nd rule will match instead. In the non-local address case, netfilter won't be consulted after this patch unless the sysctl is switched on. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-03-05 18:22:54 -07:00
/* This is the 'purely bridged' case. For IP, we pass the packet to
* netfilter with indev and outdev set to the bridge device,
* but we are still able to filter on the 'real' indev/outdev
* because of the physdev module. For ARP, indev and outdev are the
* bridge ports. */
static unsigned int br_nf_forward_ip(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_bridge_info *nf_bridge;
struct net_device *parent;
u_int8_t pf;
if (!skb->nf_bridge)
return NF_ACCEPT;
/* Need exclusive nf_bridge_info since we might have multiple
* different physoutdevs. */
if (!nf_bridge_unshare(skb))
return NF_DROP;
nf_bridge = nf_bridge_info_get(skb);
if (!nf_bridge)
return NF_DROP;
parent = bridge_parent(state->out);
if (!parent)
return NF_DROP;
bridge: netfilter: don't call iptables on vlan packets if sysctl is off When net.bridge.bridge-nf-filter-vlan-tagged is 0 (default), vlan packets arriving should not be sent to ip(6)tables by bridge netfilter. However, it turns out that we currently always send VLAN packets to netfilter, if .. a), CONFIG_VLAN_8021Q is enabled ; or b), CONFIG_VLAN_8021Q is not set but rx vlan offload is enabled on the bridge port. This is because bridge netfilter treats skb with skb->protocol == ETH_P_IP{V6} as "non-vlan packet". With rx vlan offload on or CONFIG_VLAN_8021Q=y, the vlan header has already been removed here, and we cannot rely on skb->protocol alone. Fix this by only using skb->protocol if the skb has no vlan tag, or if a vlan tag is present and filter-vlan-tagged bridge netfilter sysctl is enabled. We cannot remove the skb->protocol == htons(ETH_P_8021Q) test because the vlan tag is still around in the CONFIG_VLAN_8021Q=n && "ethtool -K $itf rxvlan off" case. reproducer: iptables -t raw -I PREROUTING -i br0 iptables -t raw -I PREROUTING -i br0.1 Then send packets to an ip address configured on br0.1 interface. Even with net.bridge.bridge-nf-filter-vlan-tagged=0, the 1st rule will match instead of the 2nd one. With this patch applied, the 2nd rule will match instead. In the non-local address case, netfilter won't be consulted after this patch unless the sysctl is switched on. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-03-05 18:22:54 -07:00
if (IS_IP(skb) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb))
pf = NFPROTO_IPV4;
bridge: netfilter: don't call iptables on vlan packets if sysctl is off When net.bridge.bridge-nf-filter-vlan-tagged is 0 (default), vlan packets arriving should not be sent to ip(6)tables by bridge netfilter. However, it turns out that we currently always send VLAN packets to netfilter, if .. a), CONFIG_VLAN_8021Q is enabled ; or b), CONFIG_VLAN_8021Q is not set but rx vlan offload is enabled on the bridge port. This is because bridge netfilter treats skb with skb->protocol == ETH_P_IP{V6} as "non-vlan packet". With rx vlan offload on or CONFIG_VLAN_8021Q=y, the vlan header has already been removed here, and we cannot rely on skb->protocol alone. Fix this by only using skb->protocol if the skb has no vlan tag, or if a vlan tag is present and filter-vlan-tagged bridge netfilter sysctl is enabled. We cannot remove the skb->protocol == htons(ETH_P_8021Q) test because the vlan tag is still around in the CONFIG_VLAN_8021Q=n && "ethtool -K $itf rxvlan off" case. reproducer: iptables -t raw -I PREROUTING -i br0 iptables -t raw -I PREROUTING -i br0.1 Then send packets to an ip address configured on br0.1 interface. Even with net.bridge.bridge-nf-filter-vlan-tagged=0, the 1st rule will match instead of the 2nd one. With this patch applied, the 2nd rule will match instead. In the non-local address case, netfilter won't be consulted after this patch unless the sysctl is switched on. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-03-05 18:22:54 -07:00
else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb))
pf = NFPROTO_IPV6;
else
return NF_ACCEPT;
nf_bridge_pull_encap_header(skb);
if (skb->pkt_type == PACKET_OTHERHOST) {
skb->pkt_type = PACKET_HOST;
nf_bridge->pkt_otherhost = true;
}
if (pf == NFPROTO_IPV4) {
if (br_validate_ipv4(state->net, skb))
return NF_DROP;
IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
}
if (pf == NFPROTO_IPV6) {
if (br_validate_ipv6(state->net, skb))
return NF_DROP;
IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
}
nf_bridge->physoutdev = skb->dev;
if (pf == NFPROTO_IPV4)
skb->protocol = htons(ETH_P_IP);
else
skb->protocol = htons(ETH_P_IPV6);
2015-09-15 19:04:16 -06:00
NF_HOOK(pf, NF_INET_FORWARD, state->net, NULL, skb,
brnf_get_logical_dev(skb, state->in),
parent, br_nf_forward_finish);
return NF_STOLEN;
}
static unsigned int br_nf_forward_arp(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct net_bridge_port *p;
struct net_bridge *br;
struct net_device **d = (struct net_device **)(skb->cb);
p = br_port_get_rcu(state->out);
if (p == NULL)
return NF_ACCEPT;
br = p->br;
if (!brnf_call_arptables && !br->nf_call_arptables)
return NF_ACCEPT;
bridge: netfilter: don't call iptables on vlan packets if sysctl is off When net.bridge.bridge-nf-filter-vlan-tagged is 0 (default), vlan packets arriving should not be sent to ip(6)tables by bridge netfilter. However, it turns out that we currently always send VLAN packets to netfilter, if .. a), CONFIG_VLAN_8021Q is enabled ; or b), CONFIG_VLAN_8021Q is not set but rx vlan offload is enabled on the bridge port. This is because bridge netfilter treats skb with skb->protocol == ETH_P_IP{V6} as "non-vlan packet". With rx vlan offload on or CONFIG_VLAN_8021Q=y, the vlan header has already been removed here, and we cannot rely on skb->protocol alone. Fix this by only using skb->protocol if the skb has no vlan tag, or if a vlan tag is present and filter-vlan-tagged bridge netfilter sysctl is enabled. We cannot remove the skb->protocol == htons(ETH_P_8021Q) test because the vlan tag is still around in the CONFIG_VLAN_8021Q=n && "ethtool -K $itf rxvlan off" case. reproducer: iptables -t raw -I PREROUTING -i br0 iptables -t raw -I PREROUTING -i br0.1 Then send packets to an ip address configured on br0.1 interface. Even with net.bridge.bridge-nf-filter-vlan-tagged=0, the 1st rule will match instead of the 2nd one. With this patch applied, the 2nd rule will match instead. In the non-local address case, netfilter won't be consulted after this patch unless the sysctl is switched on. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-03-05 18:22:54 -07:00
if (!IS_ARP(skb)) {
if (!IS_VLAN_ARP(skb))
return NF_ACCEPT;
nf_bridge_pull_encap_header(skb);
}
if (arp_hdr(skb)->ar_pln != 4) {
if (IS_VLAN_ARP(skb))
nf_bridge_push_encap_header(skb);
return NF_ACCEPT;
}
*d = state->in;
2015-09-15 19:04:16 -06:00
NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->net, state->sk, skb,
state->in, state->out, br_nf_forward_finish);
return NF_STOLEN;
}
static int br_nf_push_frag_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct brnf_frag_data *data;
int err;
data = this_cpu_ptr(&brnf_frag_data_storage);
err = skb_cow_head(skb, data->size);
if (err) {
kfree_skb(skb);
return 0;
}
if (data->vlan_tci) {
skb->vlan_tci = data->vlan_tci;
skb->vlan_proto = data->vlan_proto;
}
skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size);
__skb_push(skb, data->encap_size);
nf_bridge_info_free(skb);
return br_dev_queue_push_xmit(net, sk, skb);
}
static int
br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *))
{
unsigned int mtu = ip_skb_dst_mtu(sk, skb);
struct iphdr *iph = ip_hdr(skb);
if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
(IPCB(skb)->frag_max_size &&
IPCB(skb)->frag_max_size > mtu))) {
IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
return -EMSGSIZE;
}
return ip_do_fragment(net, sk, skb, output);
}
static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
{
if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE)
return PPPOE_SES_HLEN;
return 0;
}
static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
unsigned int mtu, mtu_reserved;
mtu_reserved = nf_bridge_mtu_reduction(skb);
mtu = skb->dev->mtu;
if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu)
mtu = nf_bridge->frag_max_size;
if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) {
nf_bridge_info_free(skb);
return br_dev_queue_push_xmit(net, sk, skb);
}
/* This is wrong! We should preserve the original fragment
* boundaries by preserving frag_list rather than refragmenting.
*/
if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) &&
skb->protocol == htons(ETH_P_IP)) {
struct brnf_frag_data *data;
if (br_validate_ipv4(net, skb))
goto drop;
IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
nf_bridge_update_protocol(skb);
data = this_cpu_ptr(&brnf_frag_data_storage);
data->vlan_tci = skb->vlan_tci;
data->vlan_proto = skb->vlan_proto;
data->encap_size = nf_bridge_encap_header_len(skb);
data->size = ETH_HLEN + data->encap_size;
skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
data->size);
return br_nf_ip_fragment(net, sk, skb, br_nf_push_frag_xmit);
}
if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) &&
skb->protocol == htons(ETH_P_IPV6)) {
const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
struct brnf_frag_data *data;
if (br_validate_ipv6(net, skb))
goto drop;
IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
nf_bridge_update_protocol(skb);
data = this_cpu_ptr(&brnf_frag_data_storage);
data->encap_size = nf_bridge_encap_header_len(skb);
data->size = ETH_HLEN + data->encap_size;
skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
data->size);
if (v6ops)
return v6ops->fragment(net, sk, skb, br_nf_push_frag_xmit);
kfree_skb(skb);
return -EMSGSIZE;
}
nf_bridge_info_free(skb);
return br_dev_queue_push_xmit(net, sk, skb);
drop:
kfree_skb(skb);
return 0;
}
/* PF_BRIDGE/POST_ROUTING ********************************************/
static unsigned int br_nf_post_routing(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
struct net_device *realoutdev = bridge_parent(skb->dev);
u_int8_t pf;
/* if nf_bridge is set, but ->physoutdev is NULL, this packet came in
* on a bridge, but was delivered locally and is now being routed:
*
* POST_ROUTING was already invoked from the ip stack.
*/
if (!nf_bridge || !nf_bridge->physoutdev)
return NF_ACCEPT;
if (!realoutdev)
return NF_DROP;
bridge: netfilter: don't call iptables on vlan packets if sysctl is off When net.bridge.bridge-nf-filter-vlan-tagged is 0 (default), vlan packets arriving should not be sent to ip(6)tables by bridge netfilter. However, it turns out that we currently always send VLAN packets to netfilter, if .. a), CONFIG_VLAN_8021Q is enabled ; or b), CONFIG_VLAN_8021Q is not set but rx vlan offload is enabled on the bridge port. This is because bridge netfilter treats skb with skb->protocol == ETH_P_IP{V6} as "non-vlan packet". With rx vlan offload on or CONFIG_VLAN_8021Q=y, the vlan header has already been removed here, and we cannot rely on skb->protocol alone. Fix this by only using skb->protocol if the skb has no vlan tag, or if a vlan tag is present and filter-vlan-tagged bridge netfilter sysctl is enabled. We cannot remove the skb->protocol == htons(ETH_P_8021Q) test because the vlan tag is still around in the CONFIG_VLAN_8021Q=n && "ethtool -K $itf rxvlan off" case. reproducer: iptables -t raw -I PREROUTING -i br0 iptables -t raw -I PREROUTING -i br0.1 Then send packets to an ip address configured on br0.1 interface. Even with net.bridge.bridge-nf-filter-vlan-tagged=0, the 1st rule will match instead of the 2nd one. With this patch applied, the 2nd rule will match instead. In the non-local address case, netfilter won't be consulted after this patch unless the sysctl is switched on. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-03-05 18:22:54 -07:00
if (IS_IP(skb) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb))
pf = NFPROTO_IPV4;
bridge: netfilter: don't call iptables on vlan packets if sysctl is off When net.bridge.bridge-nf-filter-vlan-tagged is 0 (default), vlan packets arriving should not be sent to ip(6)tables by bridge netfilter. However, it turns out that we currently always send VLAN packets to netfilter, if .. a), CONFIG_VLAN_8021Q is enabled ; or b), CONFIG_VLAN_8021Q is not set but rx vlan offload is enabled on the bridge port. This is because bridge netfilter treats skb with skb->protocol == ETH_P_IP{V6} as "non-vlan packet". With rx vlan offload on or CONFIG_VLAN_8021Q=y, the vlan header has already been removed here, and we cannot rely on skb->protocol alone. Fix this by only using skb->protocol if the skb has no vlan tag, or if a vlan tag is present and filter-vlan-tagged bridge netfilter sysctl is enabled. We cannot remove the skb->protocol == htons(ETH_P_8021Q) test because the vlan tag is still around in the CONFIG_VLAN_8021Q=n && "ethtool -K $itf rxvlan off" case. reproducer: iptables -t raw -I PREROUTING -i br0 iptables -t raw -I PREROUTING -i br0.1 Then send packets to an ip address configured on br0.1 interface. Even with net.bridge.bridge-nf-filter-vlan-tagged=0, the 1st rule will match instead of the 2nd one. With this patch applied, the 2nd rule will match instead. In the non-local address case, netfilter won't be consulted after this patch unless the sysctl is switched on. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-03-05 18:22:54 -07:00
else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb))
pf = NFPROTO_IPV6;
else
return NF_ACCEPT;
/* We assume any code from br_dev_queue_push_xmit onwards doesn't care
* about the value of skb->pkt_type. */
if (skb->pkt_type == PACKET_OTHERHOST) {
skb->pkt_type = PACKET_HOST;
nf_bridge->pkt_otherhost = true;
}
nf_bridge_pull_encap_header(skb);
if (pf == NFPROTO_IPV4)
skb->protocol = htons(ETH_P_IP);
else
skb->protocol = htons(ETH_P_IPV6);
2015-09-15 19:04:16 -06:00
NF_HOOK(pf, NF_INET_POST_ROUTING, state->net, state->sk, skb,
NULL, realoutdev,
br_nf_dev_queue_xmit);
return NF_STOLEN;
}
/* IP/SABOTAGE *****************************************************/
/* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING
* for the second time. */
static unsigned int ip_sabotage_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
if (skb->nf_bridge && !skb->nf_bridge->in_prerouting) {
state->okfn(state->net, state->sk, skb);
return NF_STOLEN;
}
return NF_ACCEPT;
}
/* This is called when br_netfilter has called into iptables/netfilter,
* and DNAT has taken place on a bridge-forwarded packet.
*
* neigh->output has created a new MAC header, with local br0 MAC
* as saddr.
*
* This restores the original MAC saddr of the bridged packet
* before invoking bridge forward logic to transmit the packet.
*/
static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
{
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
skb_pull(skb, ETH_HLEN);
nf_bridge->bridged_dnat = 0;
BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN));
skb_copy_to_linear_data_offset(skb, -(ETH_HLEN - ETH_ALEN),
nf_bridge->neigh_header,
ETH_HLEN - ETH_ALEN);
skb->dev = nf_bridge->physindev;
nf_bridge->physoutdev = NULL;
br_handle_frame_finish(dev_net(skb->dev), NULL, skb);
}
static int br_nf_dev_xmit(struct sk_buff *skb)
{
if (skb->nf_bridge && skb->nf_bridge->bridged_dnat) {
br_nf_pre_routing_finish_bridge_slow(skb);
return 1;
}
return 0;
}
static const struct nf_br_ops br_ops = {
.br_dev_xmit_hook = br_nf_dev_xmit,
};
void br_netfilter_enable(void)
{
}
EXPORT_SYMBOL_GPL(br_netfilter_enable);
/* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
* br_dev_queue_push_xmit is called afterwards */
static const struct nf_hook_ops br_nf_ops[] = {
{
.hook = br_nf_pre_routing,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_PRE_ROUTING,
.priority = NF_BR_PRI_BRNF,
},
{
.hook = br_nf_forward_ip,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_FORWARD,
.priority = NF_BR_PRI_BRNF - 1,
},
{
.hook = br_nf_forward_arp,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_FORWARD,
.priority = NF_BR_PRI_BRNF,
},
{
.hook = br_nf_post_routing,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_POST_ROUTING,
.priority = NF_BR_PRI_LAST,
},
{
.hook = ip_sabotage_in,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_FIRST,
},
{
.hook = ip_sabotage_in,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP6_PRI_FIRST,
},
};
static int brnf_device_event(struct notifier_block *unused, unsigned long event,
void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct brnf_net *brnet;
struct net *net;
int ret;
if (event != NETDEV_REGISTER || !(dev->priv_flags & IFF_EBRIDGE))
return NOTIFY_DONE;
ASSERT_RTNL();
net = dev_net(dev);
brnet = net_generic(net, brnf_net_id);
if (brnet->enabled)
return NOTIFY_OK;
ret = nf_register_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops));
if (ret)
return NOTIFY_BAD;
brnet->enabled = true;
return NOTIFY_OK;
}
static void __net_exit brnf_exit_net(struct net *net)
{
struct brnf_net *brnet = net_generic(net, brnf_net_id);
if (!brnet->enabled)
return;
nf_unregister_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops));
brnet->enabled = false;
}
static struct pernet_operations brnf_net_ops __read_mostly = {
.exit = brnf_exit_net,
.id = &brnf_net_id,
.size = sizeof(struct brnf_net),
};
static struct notifier_block brnf_notifier __read_mostly = {
.notifier_call = brnf_device_event,
};
/* recursively invokes nf_hook_slow (again), skipping already-called
* hooks (< NF_BR_PRI_BRNF).
*
* Called with rcu read lock held.
*/
int br_nf_hook_thresh(unsigned int hook, struct net *net,
struct sock *sk, struct sk_buff *skb,
struct net_device *indev,
struct net_device *outdev,
int (*okfn)(struct net *, struct sock *,
struct sk_buff *))
{
struct nf_hook_entry *elem;
struct nf_hook_state state;
int ret;
for (elem = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]);
elem && nf_hook_entry_priority(elem) <= NF_BR_PRI_BRNF;
elem = rcu_dereference(elem->next))
;
if (!elem)
return okfn(net, sk, skb);
nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev,
sk, net, okfn);
ret = nf_hook_slow(skb, &state, elem);
if (ret == 1)
ret = okfn(net, sk, skb);
return ret;
}
#ifdef CONFIG_SYSCTL
static
int brnf_sysctl_call_tables(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
if (write && *(int *)(ctl->data))
*(int *)(ctl->data) = 1;
return ret;
}
static struct ctl_table brnf_table[] = {
{
.procname = "bridge-nf-call-arptables",
.data = &brnf_call_arptables,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = brnf_sysctl_call_tables,
},
{
.procname = "bridge-nf-call-iptables",
.data = &brnf_call_iptables,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = brnf_sysctl_call_tables,
},
{
.procname = "bridge-nf-call-ip6tables",
.data = &brnf_call_ip6tables,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = brnf_sysctl_call_tables,
},
{
.procname = "bridge-nf-filter-vlan-tagged",
.data = &brnf_filter_vlan_tagged,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = brnf_sysctl_call_tables,
},
{
.procname = "bridge-nf-filter-pppoe-tagged",
.data = &brnf_filter_pppoe_tagged,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = brnf_sysctl_call_tables,
},
{
.procname = "bridge-nf-pass-vlan-input-dev",
.data = &brnf_pass_vlan_indev,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = brnf_sysctl_call_tables,
},
{ }
};
#endif
static int __init br_netfilter_init(void)
{
int ret;
ret = register_pernet_subsys(&brnf_net_ops);
if (ret < 0)
return ret;
ret = register_netdevice_notifier(&brnf_notifier);
if (ret < 0) {
unregister_pernet_subsys(&brnf_net_ops);
return ret;
}
#ifdef CONFIG_SYSCTL
brnf_sysctl_header = register_net_sysctl(&init_net, "net/bridge", brnf_table);
if (brnf_sysctl_header == NULL) {
printk(KERN_WARNING
"br_netfilter: can't register to sysctl.\n");
unregister_netdevice_notifier(&brnf_notifier);
unregister_pernet_subsys(&brnf_net_ops);
return -ENOMEM;
}
#endif
RCU_INIT_POINTER(nf_br_ops, &br_ops);
printk(KERN_NOTICE "Bridge firewalling registered\n");
return 0;
}
static void __exit br_netfilter_fini(void)
{
RCU_INIT_POINTER(nf_br_ops, NULL);
unregister_netdevice_notifier(&brnf_notifier);
unregister_pernet_subsys(&brnf_net_ops);
#ifdef CONFIG_SYSCTL
unregister_net_sysctl_table(brnf_sysctl_header);
#endif
}
module_init(br_netfilter_init);
module_exit(br_netfilter_fini);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Lennert Buytenhek <buytenh@gnu.org>");
MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
MODULE_DESCRIPTION("Linux ethernet netfilter firewall bridge");