diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index f1010fe759fe..cf17c7932a6e 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -39,6 +39,7 @@ hostprogs-y += per_socket_stats_example hostprogs-y += load_sock_ops hostprogs-y += xdp_redirect hostprogs-y += xdp_redirect_map +hostprogs-y += xdp_monitor hostprogs-y += syscall_tp # Libbpf dependencies @@ -83,6 +84,7 @@ test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o xdp_redirect-objs := bpf_load.o $(LIBBPF) xdp_redirect_user.o xdp_redirect_map-objs := bpf_load.o $(LIBBPF) xdp_redirect_map_user.o +xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o # Tell kbuild to always build the programs @@ -127,6 +129,7 @@ always += tcp_iw_kern.o always += tcp_clamp_kern.o always += xdp_redirect_kern.o always += xdp_redirect_map_kern.o +always += xdp_monitor_kern.o always += syscall_tp_kern.o HOSTCFLAGS += -I$(objtree)/usr/include @@ -166,6 +169,7 @@ HOSTLOADLIBES_xdp_tx_iptunnel += -lelf HOSTLOADLIBES_test_map_in_map += -lelf HOSTLOADLIBES_xdp_redirect += -lelf HOSTLOADLIBES_xdp_redirect_map += -lelf +HOSTLOADLIBES_xdp_monitor += -lelf HOSTLOADLIBES_syscall_tp += -lelf # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c new file mode 100644 index 000000000000..74f3fd8ed729 --- /dev/null +++ b/samples/bpf/xdp_monitor_kern.c @@ -0,0 +1,88 @@ +/* XDP monitor tool, based on tracepoints + * + * Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat Inc. + */ +#include +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") redirect_err_cnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u64), + .max_entries = 2, + /* TODO: have entries for all possible errno's */ +}; + +/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format + * Code in: kernel/include/trace/events/xdp.h + */ +struct xdp_redirect_ctx { + unsigned short common_type; // offset:0; size:2; signed:0; + unsigned char common_flags; // offset:2; size:1; signed:0; + unsigned char common_preempt_count;// offset:3; size:1; signed:0; + int common_pid; // offset:4; size:4; signed:1; + + int prog_id; // offset:8; size:4; signed:1; + u32 act; // offset:12 size:4; signed:0; + int ifindex; // offset:16 size:4; signed:1; + int err; // offset:20 size:4; signed:1; + int to_ifindex; // offset:24 size:4; signed:1; + u32 map_id; // offset:28 size:4; signed:0; + int map_index; // offset:32 size:4; signed:1; +}; // offset:36 + +enum { + XDP_REDIRECT_SUCCESS = 0, + XDP_REDIRECT_ERROR = 1 +}; + +static __always_inline +int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx) +{ + u32 key = XDP_REDIRECT_ERROR; + int err = ctx->err; + u64 *cnt; + + if (!err) + key = XDP_REDIRECT_SUCCESS; + + cnt = bpf_map_lookup_elem(&redirect_err_cnt, &key); + if (!cnt) + return 0; + *cnt += 1; + + return 0; /* Indicate event was filtered (no further processing)*/ + /* + * Returning 1 here would allow e.g. a perf-record tracepoint + * to see and record these events, but it doesn't work well + * in-practice as stopping perf-record also unload this + * bpf_prog. Plus, there is additional overhead of doing so. + */ +} + +SEC("tracepoint/xdp/xdp_redirect_err") +int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx) +{ + return xdp_redirect_collect_stat(ctx); +} + + +SEC("tracepoint/xdp/xdp_redirect_map_err") +int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx) +{ + return xdp_redirect_collect_stat(ctx); +} + +/* Likely unloaded when prog starts */ +SEC("tracepoint/xdp/xdp_redirect") +int trace_xdp_redirect(struct xdp_redirect_ctx *ctx) +{ + return xdp_redirect_collect_stat(ctx); +} + +/* Likely unloaded when prog starts */ +SEC("tracepoint/xdp/xdp_redirect_map") +int trace_xdp_redirect_map(struct xdp_redirect_ctx *ctx) +{ + return xdp_redirect_collect_stat(ctx); +} diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c new file mode 100644 index 000000000000..b51b4f5e3257 --- /dev/null +++ b/samples/bpf/xdp_monitor_user.c @@ -0,0 +1,295 @@ +/* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. + */ +static const char *__doc__= + "XDP monitor tool, based on tracepoints\n" +; + +static const char *__doc_err_only__= + " NOTICE: Only tracking XDP redirect errors\n" + " Enable TX success stats via '--stats'\n" + " (which comes with a per packet processing overhead)\n" +; + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "libbpf.h" +#include "bpf_load.h" +#include "bpf_util.h" + +static int verbose = 1; +static bool debug = false; + +static const struct option long_options[] = { + {"help", no_argument, NULL, 'h' }, + {"debug", no_argument, NULL, 'D' }, + {"stats", no_argument, NULL, 'S' }, + {"sec", required_argument, NULL, 's' }, + {0, 0, NULL, 0 } +}; + +static void usage(char *argv[]) +{ + int i; + printf("\nDOCUMENTATION:\n%s\n", __doc__); + printf("\n"); + printf(" Usage: %s (options-see-below)\n", + argv[0]); + printf(" Listing options:\n"); + for (i = 0; long_options[i].name != 0; i++) { + printf(" --%-15s", long_options[i].name); + if (long_options[i].flag != NULL) + printf(" flag (internal value:%d)", + *long_options[i].flag); + else + printf("(internal short-option: -%c)", + long_options[i].val); + printf("\n"); + } + printf("\n"); +} + +#define NANOSEC_PER_SEC 1000000000 /* 10^9 */ +__u64 gettime(void) +{ + struct timespec t; + int res; + + res = clock_gettime(CLOCK_MONOTONIC, &t); + if (res < 0) { + fprintf(stderr, "Error with gettimeofday! (%i)\n", res); + exit(EXIT_FAILURE); + } + return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec; +} + +enum { + REDIR_SUCCESS = 0, + REDIR_ERROR = 1, +}; +#define REDIR_RES_MAX 2 +static const char *redir_names[REDIR_RES_MAX] = { + [REDIR_SUCCESS] = "Success", + [REDIR_ERROR] = "Error", +}; +static const char *err2str(int err) +{ + if (err < REDIR_RES_MAX) + return redir_names[err]; + return NULL; +} + +struct record { + __u64 counter; + __u64 timestamp; +}; + +struct stats_record { + struct record xdp_redir[REDIR_RES_MAX]; +}; + +static void stats_print_headers(bool err_only) +{ + if (err_only) + printf("\n%s\n", __doc_err_only__); + + printf("%-14s %-10s %-18s %-9s\n", + "XDP_REDIRECT", "pps ", "pps-human-readable", "measure-period"); +} + +static void stats_print(struct stats_record *rec, + struct stats_record *prev, + bool err_only) +{ + int i = 0; + + if (err_only) + i = REDIR_ERROR; + + for (; i < REDIR_RES_MAX; i++) { + struct record *r = &rec->xdp_redir[i]; + struct record *p = &prev->xdp_redir[i]; + __u64 period = 0; + __u64 packets = 0; + double pps = 0; + double period_ = 0; + + if (p->timestamp) { + packets = r->counter - p->counter; + period = r->timestamp - p->timestamp; + if (period > 0) { + period_ = ((double) period / NANOSEC_PER_SEC); + pps = packets / period_; + } + } + + printf("%-14s %-10.0f %'-18.0f %f\n", + err2str(i), pps, pps, period_); + } +} + +static __u64 get_key32_value64_percpu(int fd, __u32 key) +{ + /* For percpu maps, userspace gets a value per possible CPU */ + unsigned int nr_cpus = bpf_num_possible_cpus(); + __u64 values[nr_cpus]; + __u64 sum = 0; + int i; + + if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { + fprintf(stderr, + "ERR: bpf_map_lookup_elem failed key:0x%X\n", key); + return 0; + } + + /* Sum values from each CPU */ + for (i = 0; i < nr_cpus; i++) { + sum += values[i]; + } + return sum; +} + +static bool stats_collect(int fd, struct stats_record *rec) +{ + int i; + + /* TODO: Detect if someone unloaded the perf event_fd's, as + * this can happen by someone running perf-record -e + */ + + for (i = 0; i < REDIR_RES_MAX; i++) { + rec->xdp_redir[i].timestamp = gettime(); + rec->xdp_redir[i].counter = get_key32_value64_percpu(fd, i); + } + return true; +} + +static void stats_poll(int interval, bool err_only) +{ + struct stats_record rec, prev; + int map_fd; + + memset(&rec, 0, sizeof(rec)); + + /* Trick to pretty printf with thousands separators use %' */ + setlocale(LC_NUMERIC, "en_US"); + + /* Header */ + if (verbose) + printf("\n%s", __doc__); + + /* TODO Need more advanced stats on error types */ + if (verbose) + printf(" - Stats map: %s\n", map_data[0].name); + map_fd = map_data[0].fd; + + stats_print_headers(err_only); + fflush(stdout); + + while (1) { + memcpy(&prev, &rec, sizeof(rec)); + stats_collect(map_fd, &rec); + stats_print(&rec, &prev, err_only); + fflush(stdout); + sleep(interval); + } +} + +void print_bpf_prog_info(void) +{ + int i; + + /* Prog info */ + printf("Loaded BPF prog have %d bpf program(s)\n", prog_cnt); + for (i = 0; i < prog_cnt; i++) { + printf(" - prog_fd[%d] = fd(%d)\n", i, prog_fd[i]); + } + + /* Maps info */ + printf("Loaded BPF prog have %d map(s)\n", map_data_count); + for (i = 0; i < map_data_count; i++) { + char *name = map_data[i].name; + int fd = map_data[i].fd; + + printf(" - map_data[%d] = fd(%d) name:%s\n", i, fd, name); + } + + /* Event info */ + printf("Searching for (max:%d) event file descriptor(s)\n", prog_cnt); + for (i = 0; i < prog_cnt; i++) { + if (event_fd[i] != -1) + printf(" - event_fd[%d] = fd(%d)\n", i, event_fd[i]); + } +} + +int main(int argc, char **argv) +{ + int longindex = 0, opt; + int ret = EXIT_SUCCESS; + char bpf_obj_file[256]; + + /* Default settings: */ + bool errors_only = true; + int interval = 2; + + snprintf(bpf_obj_file, sizeof(bpf_obj_file), "%s_kern.o", argv[0]); + + /* Parse commands line args */ + while ((opt = getopt_long(argc, argv, "h", + long_options, &longindex)) != -1) { + switch (opt) { + case 'D': + debug = true; + break; + case 'S': + errors_only = false; + break; + case 's': + interval = atoi(optarg); + break; + case 'h': + default: + usage(argv); + return EXIT_FAILURE; + } + } + + if (load_bpf_file(bpf_obj_file)) { + printf("ERROR - bpf_log_buf: %s", bpf_log_buf); + return 1; + } + if (!prog_fd[0]) { + printf("ERROR - load_bpf_file: %s\n", strerror(errno)); + return 1; + } + + if (debug) { + print_bpf_prog_info(); + } + + /* Unload/stop tracepoint event by closing fd's */ + if (errors_only) { + /* The prog_fd[i] and event_fd[i] depend on the + * order the functions was defined in _kern.c + */ + close(event_fd[2]); /* tracepoint/xdp/xdp_redirect */ + close(prog_fd[2]); /* func: trace_xdp_redirect */ + close(event_fd[3]); /* tracepoint/xdp/xdp_redirect_map */ + close(prog_fd[3]); /* func: trace_xdp_redirect_map */ + } + + stats_poll(interval, errors_only); + + return ret; +}