diff --git a/include/linux/filter.h b/include/linux/filter.h index 5d565c50bcb2..6791a0ac0139 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -543,7 +543,6 @@ struct bpf_redirect_info { u32 flags; struct bpf_map *map; struct bpf_map *map_to_flush; - unsigned long map_owner; u32 kern_flags; }; @@ -781,6 +780,8 @@ static inline bool bpf_dump_raw_ok(void) struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); +void bpf_clear_redirect_map(struct bpf_map *map); + static inline bool xdp_return_frame_no_direct(void) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 1ecf4c67fcf7..e95cb86b65cf 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -147,9 +147,8 @@ struct _bpf_dtab_netdev { #define devmap_ifindex(fwd, map) \ (!fwd ? 0 : \ - (!map ? 0 : \ - ((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \ - ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0))) + ((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \ + ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0)) #define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \ trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map), \ diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 620bc5024d7d..24aac0d0f412 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -479,6 +479,8 @@ static void cpu_map_free(struct bpf_map *map) * It does __not__ ensure pending flush operations (if any) are * complete. */ + + bpf_clear_redirect_map(map); synchronize_rcu(); /* To ensure all pending flush operations have completed wait for flush diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index ac1df79f3788..141710b82a6c 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -161,6 +161,7 @@ static void dev_map_free(struct bpf_map *map) list_del_rcu(&dtab->list); spin_unlock(&dev_map_lock); + bpf_clear_redirect_map(map); synchronize_rcu(); /* To ensure all pending flush operations have completed wait for flush diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ca90679a7fe5..92246117d2b0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5844,27 +5844,6 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) goto patch_call_imm; } - if (insn->imm == BPF_FUNC_redirect_map) { - /* Note, we cannot use prog directly as imm as subsequent - * rewrites would still change the prog pointer. The only - * stable address we can use is aux, which also works with - * prog clones during blinding. - */ - u64 addr = (unsigned long)prog->aux; - struct bpf_insn r4_ld[] = { - BPF_LD_IMM64(BPF_REG_4, addr), - *insn, - }; - cnt = ARRAY_SIZE(r4_ld); - - new_prog = bpf_patch_insn_data(env, i + delta, r4_ld, cnt); - if (!new_prog) - return -ENOMEM; - - delta += cnt - 1; - env->prog = prog = new_prog; - insn = new_prog->insnsi + i + delta; - } patch_call_imm: fn = env->ops->get_func_proto(insn->imm, env->prog); /* all functions that have prototype and verifier allowed diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c index 4ddf61e158f6..9f8463afda9c 100644 --- a/kernel/bpf/xskmap.c +++ b/kernel/bpf/xskmap.c @@ -75,6 +75,7 @@ static void xsk_map_free(struct bpf_map *map) struct xsk_map *m = container_of(map, struct xsk_map, map); int i; + bpf_clear_redirect_map(map); synchronize_net(); for (i = 0; i < map->max_entries; i++) { diff --git a/net/core/filter.c b/net/core/filter.c index fd423ce3da34..c25eb36f1320 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3246,31 +3246,33 @@ static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index) } } -static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog, - unsigned long aux) +void bpf_clear_redirect_map(struct bpf_map *map) { - return (unsigned long)xdp_prog->aux != aux; + struct bpf_redirect_info *ri; + int cpu; + + for_each_possible_cpu(cpu) { + ri = per_cpu_ptr(&bpf_redirect_info, cpu); + /* Avoid polluting remote cacheline due to writes if + * not needed. Once we pass this test, we need the + * cmpxchg() to make sure it hasn't been changed in + * the meantime by remote CPU. + */ + if (unlikely(READ_ONCE(ri->map) == map)) + cmpxchg(&ri->map, map, NULL); + } } static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog) + struct bpf_prog *xdp_prog, struct bpf_map *map) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); - unsigned long map_owner = ri->map_owner; - struct bpf_map *map = ri->map; u32 index = ri->ifindex; void *fwd = NULL; int err; ri->ifindex = 0; - ri->map = NULL; - ri->map_owner = 0; - - if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) { - err = -EFAULT; - map = NULL; - goto err; - } + WRITE_ONCE(ri->map, NULL); fwd = __xdp_map_lookup_elem(map, index); if (!fwd) { @@ -3296,12 +3298,13 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, struct bpf_prog *xdp_prog) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_map *map = READ_ONCE(ri->map); struct net_device *fwd; u32 index = ri->ifindex; int err; - if (ri->map) - return xdp_do_redirect_map(dev, xdp, xdp_prog); + if (map) + return xdp_do_redirect_map(dev, xdp, xdp_prog, map); fwd = dev_get_by_index_rcu(dev_net(dev), index); ri->ifindex = 0; @@ -3325,24 +3328,17 @@ EXPORT_SYMBOL_GPL(xdp_do_redirect); static int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog) + struct bpf_prog *xdp_prog, + struct bpf_map *map) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); - unsigned long map_owner = ri->map_owner; - struct bpf_map *map = ri->map; u32 index = ri->ifindex; void *fwd = NULL; int err = 0; ri->ifindex = 0; - ri->map = NULL; - ri->map_owner = 0; + WRITE_ONCE(ri->map, NULL); - if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) { - err = -EFAULT; - map = NULL; - goto err; - } fwd = __xdp_map_lookup_elem(map, index); if (unlikely(!fwd)) { err = -EINVAL; @@ -3379,13 +3375,14 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, struct xdp_buff *xdp, struct bpf_prog *xdp_prog) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_map *map = READ_ONCE(ri->map); u32 index = ri->ifindex; struct net_device *fwd; int err = 0; - if (ri->map) - return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog); - + if (map) + return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, + map); ri->ifindex = 0; fwd = dev_get_by_index_rcu(dev_net(dev), index); if (unlikely(!fwd)) { @@ -3416,8 +3413,7 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags) ri->ifindex = ifindex; ri->flags = flags; - ri->map = NULL; - ri->map_owner = 0; + WRITE_ONCE(ri->map, NULL); return XDP_REDIRECT; } @@ -3430,8 +3426,8 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = { .arg2_type = ARG_ANYTHING, }; -BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags, - unsigned long, map_owner) +BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, + u64, flags) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); @@ -3440,15 +3436,11 @@ BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags ri->ifindex = ifindex; ri->flags = flags; - ri->map = map; - ri->map_owner = map_owner; + WRITE_ONCE(ri->map, map); return XDP_REDIRECT; } -/* Note, arg4 is hidden from users and populated by the verifier - * with the right pointer. - */ static const struct bpf_func_proto bpf_xdp_redirect_map_proto = { .func = bpf_xdp_redirect_map, .gpl_only = false,