2005-04-16 16:20:36 -06:00
|
|
|
/*
|
|
|
|
* Linux Socket Filter Data Structures
|
|
|
|
*/
|
|
|
|
#ifndef __LINUX_FILTER_H__
|
|
|
|
#define __LINUX_FILTER_H__
|
|
|
|
|
2011-07-26 17:09:06 -06:00
|
|
|
#include <linux/atomic.h>
|
2012-04-12 15:47:53 -06:00
|
|
|
#include <linux/compat.h>
|
2013-10-04 01:14:06 -06:00
|
|
|
#include <linux/workqueue.h>
|
2012-10-13 03:46:48 -06:00
|
|
|
#include <uapi/linux/filter.h>
|
2011-05-22 01:08:11 -06:00
|
|
|
|
2012-04-12 15:47:53 -06:00
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
/*
|
|
|
|
* A struct sock_filter is architecture independent.
|
|
|
|
*/
|
|
|
|
struct compat_sock_fprog {
|
|
|
|
u16 len;
|
|
|
|
compat_uptr_t filter; /* struct sock_filter * */
|
|
|
|
};
|
|
|
|
#endif
|
|
|
|
|
net: filter: keep original BPF program around
In order to open up the possibility to internally transform a BPF program
into an alternative and possibly non-trivial reversible representation, we
need to keep the original BPF program around, so that it can be passed back
to user space w/o the need of a complex decoder.
The reason for that use case resides in commit a8fc92778080 ("sk-filter:
Add ability to get socket filter program (v2)"), that is, the ability
to retrieve the currently attached BPF filter from a given socket used
mainly by the checkpoint-restore project, for example.
Therefore, we add two helpers sk_{store,release}_orig_filter for taking
care of that. In the sk_unattached_filter_create() case, there's no such
possibility/requirement to retrieve a loaded BPF program. Therefore, we
can spare us the work in that case.
This approach will simplify and slightly speed up both, sk_get_filter()
and sock_diag_put_filterinfo() handlers as we won't need to successively
decode filters anymore through sk_decode_filter(). As we still need
sk_decode_filter() later on, we're keeping it around.
Joint work with Alexei Starovoitov.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-28 11:58:19 -06:00
|
|
|
struct sock_fprog_kern {
|
|
|
|
u16 len;
|
|
|
|
struct sock_filter *filter;
|
|
|
|
};
|
|
|
|
|
2011-05-22 01:08:11 -06:00
|
|
|
struct sk_buff;
|
|
|
|
struct sock;
|
|
|
|
|
net: filter: keep original BPF program around
In order to open up the possibility to internally transform a BPF program
into an alternative and possibly non-trivial reversible representation, we
need to keep the original BPF program around, so that it can be passed back
to user space w/o the need of a complex decoder.
The reason for that use case resides in commit a8fc92778080 ("sk-filter:
Add ability to get socket filter program (v2)"), that is, the ability
to retrieve the currently attached BPF filter from a given socket used
mainly by the checkpoint-restore project, for example.
Therefore, we add two helpers sk_{store,release}_orig_filter for taking
care of that. In the sk_unattached_filter_create() case, there's no such
possibility/requirement to retrieve a loaded BPF program. Therefore, we
can spare us the work in that case.
This approach will simplify and slightly speed up both, sk_get_filter()
and sock_diag_put_filterinfo() handlers as we won't need to successively
decode filters anymore through sk_decode_filter(). As we still need
sk_decode_filter() later on, we're keeping it around.
Joint work with Alexei Starovoitov.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-28 11:58:19 -06:00
|
|
|
struct sk_filter {
|
2008-04-10 02:33:47 -06:00
|
|
|
atomic_t refcnt;
|
2014-03-28 11:58:18 -06:00
|
|
|
u32 jited:1, /* Is our filter JIT'ed? */
|
|
|
|
len:31; /* Number of filter blocks */
|
net: filter: keep original BPF program around
In order to open up the possibility to internally transform a BPF program
into an alternative and possibly non-trivial reversible representation, we
need to keep the original BPF program around, so that it can be passed back
to user space w/o the need of a complex decoder.
The reason for that use case resides in commit a8fc92778080 ("sk-filter:
Add ability to get socket filter program (v2)"), that is, the ability
to retrieve the currently attached BPF filter from a given socket used
mainly by the checkpoint-restore project, for example.
Therefore, we add two helpers sk_{store,release}_orig_filter for taking
care of that. In the sk_unattached_filter_create() case, there's no such
possibility/requirement to retrieve a loaded BPF program. Therefore, we
can spare us the work in that case.
This approach will simplify and slightly speed up both, sk_get_filter()
and sock_diag_put_filterinfo() handlers as we won't need to successively
decode filters anymore through sk_decode_filter(). As we still need
sk_decode_filter() later on, we're keeping it around.
Joint work with Alexei Starovoitov.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-28 11:58:19 -06:00
|
|
|
struct sock_fprog_kern *orig_prog; /* Original BPF program */
|
2013-10-04 01:14:06 -06:00
|
|
|
struct rcu_head rcu;
|
2011-04-20 03:27:32 -06:00
|
|
|
unsigned int (*bpf_func)(const struct sk_buff *skb,
|
|
|
|
const struct sock_filter *filter);
|
2013-10-04 01:14:06 -06:00
|
|
|
union {
|
|
|
|
struct sock_filter insns[0];
|
|
|
|
struct work_struct work;
|
|
|
|
};
|
2008-04-10 02:33:47 -06:00
|
|
|
};
|
|
|
|
|
2013-10-04 01:14:06 -06:00
|
|
|
static inline unsigned int sk_filter_size(unsigned int proglen)
|
2008-04-10 02:33:47 -06:00
|
|
|
{
|
2013-10-04 01:14:06 -06:00
|
|
|
return max(sizeof(struct sk_filter),
|
|
|
|
offsetof(struct sk_filter, insns[proglen]));
|
2008-04-10 02:33:47 -06:00
|
|
|
}
|
|
|
|
|
net: filter: keep original BPF program around
In order to open up the possibility to internally transform a BPF program
into an alternative and possibly non-trivial reversible representation, we
need to keep the original BPF program around, so that it can be passed back
to user space w/o the need of a complex decoder.
The reason for that use case resides in commit a8fc92778080 ("sk-filter:
Add ability to get socket filter program (v2)"), that is, the ability
to retrieve the currently attached BPF filter from a given socket used
mainly by the checkpoint-restore project, for example.
Therefore, we add two helpers sk_{store,release}_orig_filter for taking
care of that. In the sk_unattached_filter_create() case, there's no such
possibility/requirement to retrieve a loaded BPF program. Therefore, we
can spare us the work in that case.
This approach will simplify and slightly speed up both, sk_get_filter()
and sock_diag_put_filterinfo() handlers as we won't need to successively
decode filters anymore through sk_decode_filter(). As we still need
sk_decode_filter() later on, we're keeping it around.
Joint work with Alexei Starovoitov.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-28 11:58:19 -06:00
|
|
|
#define sk_filter_proglen(fprog) \
|
|
|
|
(fprog->len * sizeof(fprog->filter[0]))
|
|
|
|
|
2014-03-28 11:58:20 -06:00
|
|
|
int sk_filter(struct sock *sk, struct sk_buff *skb);
|
|
|
|
unsigned int sk_run_filter(const struct sk_buff *skb,
|
|
|
|
const struct sock_filter *filter);
|
net: filter: keep original BPF program around
In order to open up the possibility to internally transform a BPF program
into an alternative and possibly non-trivial reversible representation, we
need to keep the original BPF program around, so that it can be passed back
to user space w/o the need of a complex decoder.
The reason for that use case resides in commit a8fc92778080 ("sk-filter:
Add ability to get socket filter program (v2)"), that is, the ability
to retrieve the currently attached BPF filter from a given socket used
mainly by the checkpoint-restore project, for example.
Therefore, we add two helpers sk_{store,release}_orig_filter for taking
care of that. In the sk_unattached_filter_create() case, there's no such
possibility/requirement to retrieve a loaded BPF program. Therefore, we
can spare us the work in that case.
This approach will simplify and slightly speed up both, sk_get_filter()
and sock_diag_put_filterinfo() handlers as we won't need to successively
decode filters anymore through sk_decode_filter(). As we still need
sk_decode_filter() later on, we're keeping it around.
Joint work with Alexei Starovoitov.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-28 11:58:19 -06:00
|
|
|
|
2014-03-28 11:58:20 -06:00
|
|
|
int sk_unattached_filter_create(struct sk_filter **pfp,
|
|
|
|
struct sock_fprog *fprog);
|
|
|
|
void sk_unattached_filter_destroy(struct sk_filter *fp);
|
net: filter: keep original BPF program around
In order to open up the possibility to internally transform a BPF program
into an alternative and possibly non-trivial reversible representation, we
need to keep the original BPF program around, so that it can be passed back
to user space w/o the need of a complex decoder.
The reason for that use case resides in commit a8fc92778080 ("sk-filter:
Add ability to get socket filter program (v2)"), that is, the ability
to retrieve the currently attached BPF filter from a given socket used
mainly by the checkpoint-restore project, for example.
Therefore, we add two helpers sk_{store,release}_orig_filter for taking
care of that. In the sk_unattached_filter_create() case, there's no such
possibility/requirement to retrieve a loaded BPF program. Therefore, we
can spare us the work in that case.
This approach will simplify and slightly speed up both, sk_get_filter()
and sock_diag_put_filterinfo() handlers as we won't need to successively
decode filters anymore through sk_decode_filter(). As we still need
sk_decode_filter() later on, we're keeping it around.
Joint work with Alexei Starovoitov.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-28 11:58:19 -06:00
|
|
|
|
2014-03-28 11:58:20 -06:00
|
|
|
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
|
|
|
|
int sk_detach_filter(struct sock *sk);
|
net: filter: keep original BPF program around
In order to open up the possibility to internally transform a BPF program
into an alternative and possibly non-trivial reversible representation, we
need to keep the original BPF program around, so that it can be passed back
to user space w/o the need of a complex decoder.
The reason for that use case resides in commit a8fc92778080 ("sk-filter:
Add ability to get socket filter program (v2)"), that is, the ability
to retrieve the currently attached BPF filter from a given socket used
mainly by the checkpoint-restore project, for example.
Therefore, we add two helpers sk_{store,release}_orig_filter for taking
care of that. In the sk_unattached_filter_create() case, there's no such
possibility/requirement to retrieve a loaded BPF program. Therefore, we
can spare us the work in that case.
This approach will simplify and slightly speed up both, sk_get_filter()
and sock_diag_put_filterinfo() handlers as we won't need to successively
decode filters anymore through sk_decode_filter(). As we still need
sk_decode_filter() later on, we're keeping it around.
Joint work with Alexei Starovoitov.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-28 11:58:19 -06:00
|
|
|
|
2014-03-28 11:58:20 -06:00
|
|
|
int sk_chk_filter(struct sock_filter *filter, unsigned int flen);
|
|
|
|
int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
|
|
|
|
unsigned int len);
|
|
|
|
void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to);
|
|
|
|
|
|
|
|
void sk_filter_charge(struct sock *sk, struct sk_filter *fp);
|
|
|
|
void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
|
2011-04-20 03:27:32 -06:00
|
|
|
|
|
|
|
#ifdef CONFIG_BPF_JIT
|
2013-05-01 14:24:08 -06:00
|
|
|
#include <stdarg.h>
|
2013-03-28 09:24:53 -06:00
|
|
|
#include <linux/linkage.h>
|
|
|
|
#include <linux/printk.h>
|
|
|
|
|
2014-03-28 11:58:20 -06:00
|
|
|
void bpf_jit_compile(struct sk_filter *fp);
|
|
|
|
void bpf_jit_free(struct sk_filter *fp);
|
2013-03-21 15:22:03 -06:00
|
|
|
|
|
|
|
static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
|
|
|
|
u32 pass, void *image)
|
|
|
|
{
|
2013-05-17 10:57:37 -06:00
|
|
|
pr_err("flen=%u proglen=%u pass=%u image=%pK\n",
|
2013-03-21 15:22:03 -06:00
|
|
|
flen, proglen, pass, image);
|
|
|
|
if (image)
|
2013-05-17 10:57:37 -06:00
|
|
|
print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET,
|
2013-03-21 15:22:03 -06:00
|
|
|
16, 1, image, proglen, false);
|
|
|
|
}
|
2011-04-20 03:27:32 -06:00
|
|
|
#define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns)
|
|
|
|
#else
|
2013-10-04 01:14:06 -06:00
|
|
|
#include <linux/slab.h>
|
2011-04-20 03:27:32 -06:00
|
|
|
static inline void bpf_jit_compile(struct sk_filter *fp)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
static inline void bpf_jit_free(struct sk_filter *fp)
|
|
|
|
{
|
2013-10-04 01:14:06 -06:00
|
|
|
kfree(fp);
|
2011-04-20 03:27:32 -06:00
|
|
|
}
|
|
|
|
#define SK_RUN_FILTER(FILTER, SKB) sk_run_filter(SKB, FILTER->insns)
|
|
|
|
#endif
|
|
|
|
|
2014-01-17 09:09:45 -07:00
|
|
|
static inline int bpf_tell_extensions(void)
|
|
|
|
{
|
net: filter: let bpf_tell_extensions return SKF_AD_MAX
Michal Sekletar added in commit ea02f9411d9f ("net: introduce
SO_BPF_EXTENSIONS") a facility where user space can enquire
the BPF ancillary instruction set, which is imho a step into
the right direction for letting user space high-level to BPF
optimizers make an informed decision for possibly using these
extensions.
The original rationale was to return through a getsockopt(2)
a bitfield of which instructions are supported and which
are not, as of right now, we just return 0 to indicate a
base support for SKF_AD_PROTOCOL up to SKF_AD_PAY_OFFSET.
Limitations of this approach are that this API which we need
to maintain for a long time can only support a maximum of 32
extensions, and needs to be additionally maintained/updated
when each new extension that comes in.
I thought about this a bit more and what we can do here to
overcome this is to just return SKF_AD_MAX. Since we never
remove any extension since we cannot break user space and
always linearly increase SKF_AD_MAX on each newly added
extension, user space can make a decision on what extensions
are supported in the whole set of extensions and which aren't,
by just checking which of them from the whole set have an
offset < SKF_AD_MAX of the underlying kernel.
Since SKF_AD_MAX must be updated each time we add new ones,
we don't need to introduce an additional enum and got
maintenance for free. At some point in time when
SO_BPF_EXTENSIONS becomes ubiquitous for most kernels, then
an application can simply make use of this and easily be run
on newer or older underlying kernels without needing to be
recompiled, of course. Since that is for 3.14, it's not too
late to do this change.
Cc: Michal Sekletar <msekleta@redhat.com>
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Michal Sekletar <msekleta@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-01-20 16:19:37 -07:00
|
|
|
return SKF_AD_MAX;
|
2014-01-17 09:09:45 -07:00
|
|
|
}
|
|
|
|
|
2011-04-20 03:27:32 -06:00
|
|
|
enum {
|
|
|
|
BPF_S_RET_K = 1,
|
|
|
|
BPF_S_RET_A,
|
|
|
|
BPF_S_ALU_ADD_K,
|
|
|
|
BPF_S_ALU_ADD_X,
|
|
|
|
BPF_S_ALU_SUB_K,
|
|
|
|
BPF_S_ALU_SUB_X,
|
|
|
|
BPF_S_ALU_MUL_K,
|
|
|
|
BPF_S_ALU_MUL_X,
|
|
|
|
BPF_S_ALU_DIV_X,
|
2012-09-07 16:03:35 -06:00
|
|
|
BPF_S_ALU_MOD_K,
|
|
|
|
BPF_S_ALU_MOD_X,
|
2011-04-20 03:27:32 -06:00
|
|
|
BPF_S_ALU_AND_K,
|
|
|
|
BPF_S_ALU_AND_X,
|
|
|
|
BPF_S_ALU_OR_K,
|
|
|
|
BPF_S_ALU_OR_X,
|
2012-09-23 20:23:59 -06:00
|
|
|
BPF_S_ALU_XOR_K,
|
|
|
|
BPF_S_ALU_XOR_X,
|
2011-04-20 03:27:32 -06:00
|
|
|
BPF_S_ALU_LSH_K,
|
|
|
|
BPF_S_ALU_LSH_X,
|
|
|
|
BPF_S_ALU_RSH_K,
|
|
|
|
BPF_S_ALU_RSH_X,
|
|
|
|
BPF_S_ALU_NEG,
|
|
|
|
BPF_S_LD_W_ABS,
|
|
|
|
BPF_S_LD_H_ABS,
|
|
|
|
BPF_S_LD_B_ABS,
|
|
|
|
BPF_S_LD_W_LEN,
|
|
|
|
BPF_S_LD_W_IND,
|
|
|
|
BPF_S_LD_H_IND,
|
|
|
|
BPF_S_LD_B_IND,
|
|
|
|
BPF_S_LD_IMM,
|
|
|
|
BPF_S_LDX_W_LEN,
|
|
|
|
BPF_S_LDX_B_MSH,
|
|
|
|
BPF_S_LDX_IMM,
|
|
|
|
BPF_S_MISC_TAX,
|
|
|
|
BPF_S_MISC_TXA,
|
|
|
|
BPF_S_ALU_DIV_K,
|
|
|
|
BPF_S_LD_MEM,
|
|
|
|
BPF_S_LDX_MEM,
|
|
|
|
BPF_S_ST,
|
|
|
|
BPF_S_STX,
|
|
|
|
BPF_S_JMP_JA,
|
|
|
|
BPF_S_JMP_JEQ_K,
|
|
|
|
BPF_S_JMP_JEQ_X,
|
|
|
|
BPF_S_JMP_JGE_K,
|
|
|
|
BPF_S_JMP_JGE_X,
|
|
|
|
BPF_S_JMP_JGT_K,
|
|
|
|
BPF_S_JMP_JGT_X,
|
|
|
|
BPF_S_JMP_JSET_K,
|
|
|
|
BPF_S_JMP_JSET_X,
|
|
|
|
/* Ancillary data */
|
|
|
|
BPF_S_ANC_PROTOCOL,
|
|
|
|
BPF_S_ANC_PKTTYPE,
|
|
|
|
BPF_S_ANC_IFINDEX,
|
|
|
|
BPF_S_ANC_NLATTR,
|
|
|
|
BPF_S_ANC_NLATTR_NEST,
|
|
|
|
BPF_S_ANC_MARK,
|
|
|
|
BPF_S_ANC_QUEUE,
|
|
|
|
BPF_S_ANC_HATYPE,
|
|
|
|
BPF_S_ANC_RXHASH,
|
|
|
|
BPF_S_ANC_CPU,
|
2012-03-31 05:01:20 -06:00
|
|
|
BPF_S_ANC_ALU_XOR_X,
|
2012-04-12 15:47:52 -06:00
|
|
|
BPF_S_ANC_SECCOMP_LD_W,
|
2012-10-26 20:26:17 -06:00
|
|
|
BPF_S_ANC_VLAN_TAG,
|
|
|
|
BPF_S_ANC_VLAN_TAG_PRESENT,
|
filter: add ANC_PAY_OFFSET instruction for loading payload start offset
It is very useful to do dynamic truncation of packets. In particular,
we're interested to push the necessary header bytes to the user space and
cut off user payload that should probably not be transferred for some reasons
(e.g. privacy, speed, or others). With the ancillary extension PAY_OFFSET,
we can load it into the accumulator, and return it. E.g. in bpfc syntax ...
ld #poff ; { 0x20, 0, 0, 0xfffff034 },
ret a ; { 0x16, 0, 0, 0x00000000 },
... as a filter will accomplish this without having to do a big hackery in
a BPF filter itself. Follow-up JIT implementations are welcome.
Thanks to Eric Dumazet for suggesting and discussing this during the
Netfilter Workshop in Copenhagen.
Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-03-19 00:39:31 -06:00
|
|
|
BPF_S_ANC_PAY_OFFSET,
|
2011-04-20 03:27:32 -06:00
|
|
|
};
|
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
#endif /* __LINUX_FILTER_H__ */
|