alistair23-linux/samples/bpf/bpf_load.c
Alexei Starovoitov b896c4f95a samples/bpf: Add simple non-portable kprobe filter example
tracex1_kern.c - C program compiled into BPF.

It attaches to kprobe:netif_receive_skb()

When skb->dev->name == "lo", it prints sample debug message into
trace_pipe via bpf_trace_printk() helper function.

tracex1_user.c - corresponding user space component that:
  - loads BPF program via bpf() syscall
  - opens kprobes:netif_receive_skb event via perf_event_open()
    syscall
  - attaches the program to event via ioctl(event_fd,
    PERF_EVENT_IOC_SET_BPF, prog_fd);
  - prints from trace_pipe

Note, this BPF program is non-portable. It must be recompiled
with current kernel headers. kprobe is not a stable ABI and
BPF+kprobe scripts may no longer be meaningful when kernel
internals change.

No matter in what way the kernel changes, neither the kprobe,
nor the BPF program can ever crash or corrupt the kernel,
assuming the kprobes, perf and BPF subsystem has no bugs.

The verifier will detect that the program is using
bpf_trace_printk() and the kernel will print 'this is a DEBUG
kernel' warning banner, which means that bpf_trace_printk()
should be used for debugging of the BPF program only.

Usage:
$ sudo tracex1
            ping-19826 [000] d.s2 63103.382648: : skb ffff880466b1ca00 len 84
            ping-19826 [000] d.s2 63103.382684: : skb ffff880466b1d300 len 84

            ping-19826 [000] d.s2 63104.382533: : skb ffff880466b1ca00 len 84
            ping-19826 [000] d.s2 63104.382594: : skb ffff880466b1d300 len 84

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1427312966-8434-7-git-send-email-ast@plumgrid.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-04-02 13:25:50 +02:00

313 lines
6.8 KiB
C

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <libelf.h>
#include <gelf.h>
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include <stdbool.h>
#include <stdlib.h>
#include <linux/bpf.h>
#include <linux/filter.h>
#include <linux/perf_event.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <poll.h>
#include "libbpf.h"
#include "bpf_helpers.h"
#include "bpf_load.h"
#define DEBUGFS "/sys/kernel/debug/tracing/"
static char license[128];
static int kern_version;
static bool processed_sec[128];
int map_fd[MAX_MAPS];
int prog_fd[MAX_PROGS];
int event_fd[MAX_PROGS];
int prog_cnt;
static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
{
bool is_socket = strncmp(event, "socket", 6) == 0;
bool is_kprobe = strncmp(event, "kprobe/", 7) == 0;
bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;
enum bpf_prog_type prog_type;
char buf[256];
int fd, efd, err, id;
struct perf_event_attr attr = {};
attr.type = PERF_TYPE_TRACEPOINT;
attr.sample_type = PERF_SAMPLE_RAW;
attr.sample_period = 1;
attr.wakeup_events = 1;
if (is_socket) {
prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
} else if (is_kprobe || is_kretprobe) {
prog_type = BPF_PROG_TYPE_KPROBE;
} else {
printf("Unknown event '%s'\n", event);
return -1;
}
if (is_kprobe || is_kretprobe) {
if (is_kprobe)
event += 7;
else
event += 10;
snprintf(buf, sizeof(buf),
"echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events",
is_kprobe ? 'p' : 'r', event, event);
err = system(buf);
if (err < 0) {
printf("failed to create kprobe '%s' error '%s'\n",
event, strerror(errno));
return -1;
}
}
fd = bpf_prog_load(prog_type, prog, size, license, kern_version);
if (fd < 0) {
printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf);
return -1;
}
prog_fd[prog_cnt++] = fd;
if (is_socket)
return 0;
strcpy(buf, DEBUGFS);
strcat(buf, "events/kprobes/");
strcat(buf, event);
strcat(buf, "/id");
efd = open(buf, O_RDONLY, 0);
if (efd < 0) {
printf("failed to open event %s\n", event);
return -1;
}
err = read(efd, buf, sizeof(buf));
if (err < 0 || err >= sizeof(buf)) {
printf("read from '%s' failed '%s'\n", event, strerror(errno));
return -1;
}
close(efd);
buf[err] = 0;
id = atoi(buf);
attr.config = id;
efd = perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
if (efd < 0) {
printf("event %d fd %d err %s\n", id, efd, strerror(errno));
return -1;
}
event_fd[prog_cnt - 1] = efd;
ioctl(efd, PERF_EVENT_IOC_ENABLE, 0);
ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd);
return 0;
}
static int load_maps(struct bpf_map_def *maps, int len)
{
int i;
for (i = 0; i < len / sizeof(struct bpf_map_def); i++) {
map_fd[i] = bpf_create_map(maps[i].type,
maps[i].key_size,
maps[i].value_size,
maps[i].max_entries);
if (map_fd[i] < 0)
return 1;
}
return 0;
}
static int get_sec(Elf *elf, int i, GElf_Ehdr *ehdr, char **shname,
GElf_Shdr *shdr, Elf_Data **data)
{
Elf_Scn *scn;
scn = elf_getscn(elf, i);
if (!scn)
return 1;
if (gelf_getshdr(scn, shdr) != shdr)
return 2;
*shname = elf_strptr(elf, ehdr->e_shstrndx, shdr->sh_name);
if (!*shname || !shdr->sh_size)
return 3;
*data = elf_getdata(scn, 0);
if (!*data || elf_getdata(scn, *data) != NULL)
return 4;
return 0;
}
static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols,
GElf_Shdr *shdr, struct bpf_insn *insn)
{
int i, nrels;
nrels = shdr->sh_size / shdr->sh_entsize;
for (i = 0; i < nrels; i++) {
GElf_Sym sym;
GElf_Rel rel;
unsigned int insn_idx;
gelf_getrel(data, i, &rel);
insn_idx = rel.r_offset / sizeof(struct bpf_insn);
gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym);
if (insn[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) {
printf("invalid relo for insn[%d].code 0x%x\n",
insn_idx, insn[insn_idx].code);
return 1;
}
insn[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
insn[insn_idx].imm = map_fd[sym.st_value / sizeof(struct bpf_map_def)];
}
return 0;
}
int load_bpf_file(char *path)
{
int fd, i;
Elf *elf;
GElf_Ehdr ehdr;
GElf_Shdr shdr, shdr_prog;
Elf_Data *data, *data_prog, *symbols = NULL;
char *shname, *shname_prog;
if (elf_version(EV_CURRENT) == EV_NONE)
return 1;
fd = open(path, O_RDONLY, 0);
if (fd < 0)
return 1;
elf = elf_begin(fd, ELF_C_READ, NULL);
if (!elf)
return 1;
if (gelf_getehdr(elf, &ehdr) != &ehdr)
return 1;
/* clear all kprobes */
i = system("echo \"\" > /sys/kernel/debug/tracing/kprobe_events");
/* scan over all elf sections to get license and map info */
for (i = 1; i < ehdr.e_shnum; i++) {
if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
continue;
if (0) /* helpful for llvm debugging */
printf("section %d:%s data %p size %zd link %d flags %d\n",
i, shname, data->d_buf, data->d_size,
shdr.sh_link, (int) shdr.sh_flags);
if (strcmp(shname, "license") == 0) {
processed_sec[i] = true;
memcpy(license, data->d_buf, data->d_size);
} else if (strcmp(shname, "version") == 0) {
processed_sec[i] = true;
if (data->d_size != sizeof(int)) {
printf("invalid size of version section %zd\n",
data->d_size);
return 1;
}
memcpy(&kern_version, data->d_buf, sizeof(int));
} else if (strcmp(shname, "maps") == 0) {
processed_sec[i] = true;
if (load_maps(data->d_buf, data->d_size))
return 1;
} else if (shdr.sh_type == SHT_SYMTAB) {
symbols = data;
}
}
/* load programs that need map fixup (relocations) */
for (i = 1; i < ehdr.e_shnum; i++) {
if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
continue;
if (shdr.sh_type == SHT_REL) {
struct bpf_insn *insns;
if (get_sec(elf, shdr.sh_info, &ehdr, &shname_prog,
&shdr_prog, &data_prog))
continue;
insns = (struct bpf_insn *) data_prog->d_buf;
processed_sec[shdr.sh_info] = true;
processed_sec[i] = true;
if (parse_relo_and_apply(data, symbols, &shdr, insns))
continue;
if (memcmp(shname_prog, "kprobe/", 7) == 0 ||
memcmp(shname_prog, "kretprobe/", 10) == 0 ||
memcmp(shname_prog, "socket", 6) == 0)
load_and_attach(shname_prog, insns, data_prog->d_size);
}
}
/* load programs that don't use maps */
for (i = 1; i < ehdr.e_shnum; i++) {
if (processed_sec[i])
continue;
if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
continue;
if (memcmp(shname, "kprobe/", 7) == 0 ||
memcmp(shname, "kretprobe/", 10) == 0 ||
memcmp(shname, "socket", 6) == 0)
load_and_attach(shname, data->d_buf, data->d_size);
}
close(fd);
return 0;
}
void read_trace_pipe(void)
{
int trace_fd;
trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
if (trace_fd < 0)
return;
while (1) {
static char buf[4096];
ssize_t sz;
sz = read(trace_fd, buf, sizeof(buf));
if (sz > 0) {
buf[sz] = 0;
puts(buf);
}
}
}