bpf: btf: Introduce BTF ID
This patch gives an ID to each loaded BTF. The ID is allocated by the idr like the existing prog-id and map-id. The bpf_put(map->btf) is moved to __bpf_map_put() so that the userspace can stop seeing the BTF ID ASAP when the last BTF refcnt is gone. It also makes BTF accessible from userspace through the 1. new BPF_BTF_GET_FD_BY_ID command. It is limited to CAP_SYS_ADMIN which is inline with the BPF_BTF_LOAD cmd and the existing BPF_[MAP|PROG]_GET_FD_BY_ID cmd. 2. new btf_id (and btf_key_id + btf_value_id) in "struct bpf_map_info" Once the BTF ID handler is accessible from userspace, freeing a BTF object has to go through a rcu period. The BPF_BTF_GET_FD_BY_ID cmd can then be done under a rcu_read_lock() instead of taking spin_lock. [Note: A similar rcu usage can be done to the existing bpf_prog_get_fd_by_id() in a follow up patch] When processing the BPF_BTF_GET_FD_BY_ID cmd, refcount_inc_not_zero() is needed because the BTF object could be already in the rcu dead row . btf_get() is removed since its usage is currently limited to btf.c alone. refcount_inc() is used directly instead. Signed-off-by: Martin KaFai Lau <kafai@fb.com> Acked-by: Alexei Starovoitov <ast@fb.com> Acked-by: Song Liu <songliubraving@fb.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>hifive-unleashed-5.1
parent
82e9697250
commit
78958fca7e
|
@ -44,5 +44,7 @@ const struct btf_type *btf_type_id_size(const struct btf *btf,
|
||||||
u32 *ret_size);
|
u32 *ret_size);
|
||||||
void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
|
void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
|
||||||
struct seq_file *m);
|
struct seq_file *m);
|
||||||
|
int btf_get_fd_by_id(u32 id);
|
||||||
|
u32 btf_id(const struct btf *btf);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -96,6 +96,7 @@ enum bpf_cmd {
|
||||||
BPF_PROG_QUERY,
|
BPF_PROG_QUERY,
|
||||||
BPF_RAW_TRACEPOINT_OPEN,
|
BPF_RAW_TRACEPOINT_OPEN,
|
||||||
BPF_BTF_LOAD,
|
BPF_BTF_LOAD,
|
||||||
|
BPF_BTF_GET_FD_BY_ID,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum bpf_map_type {
|
enum bpf_map_type {
|
||||||
|
@ -344,6 +345,7 @@ union bpf_attr {
|
||||||
__u32 start_id;
|
__u32 start_id;
|
||||||
__u32 prog_id;
|
__u32 prog_id;
|
||||||
__u32 map_id;
|
__u32 map_id;
|
||||||
|
__u32 btf_id;
|
||||||
};
|
};
|
||||||
__u32 next_id;
|
__u32 next_id;
|
||||||
__u32 open_flags;
|
__u32 open_flags;
|
||||||
|
@ -2130,6 +2132,9 @@ struct bpf_map_info {
|
||||||
__u32 ifindex;
|
__u32 ifindex;
|
||||||
__u64 netns_dev;
|
__u64 netns_dev;
|
||||||
__u64 netns_ino;
|
__u64 netns_ino;
|
||||||
|
__u32 btf_id;
|
||||||
|
__u32 btf_key_id;
|
||||||
|
__u32 btf_value_id;
|
||||||
} __attribute__((aligned(8)));
|
} __attribute__((aligned(8)));
|
||||||
|
|
||||||
/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
|
/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
|
||||||
|
|
108
kernel/bpf/btf.c
108
kernel/bpf/btf.c
|
@ -11,6 +11,7 @@
|
||||||
#include <linux/file.h>
|
#include <linux/file.h>
|
||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
|
#include <linux/idr.h>
|
||||||
#include <linux/bpf_verifier.h>
|
#include <linux/bpf_verifier.h>
|
||||||
#include <linux/btf.h>
|
#include <linux/btf.h>
|
||||||
|
|
||||||
|
@ -179,6 +180,9 @@
|
||||||
i < btf_type_vlen(struct_type); \
|
i < btf_type_vlen(struct_type); \
|
||||||
i++, member++)
|
i++, member++)
|
||||||
|
|
||||||
|
static DEFINE_IDR(btf_idr);
|
||||||
|
static DEFINE_SPINLOCK(btf_idr_lock);
|
||||||
|
|
||||||
struct btf {
|
struct btf {
|
||||||
union {
|
union {
|
||||||
struct btf_header *hdr;
|
struct btf_header *hdr;
|
||||||
|
@ -193,6 +197,8 @@ struct btf {
|
||||||
u32 types_size;
|
u32 types_size;
|
||||||
u32 data_size;
|
u32 data_size;
|
||||||
refcount_t refcnt;
|
refcount_t refcnt;
|
||||||
|
u32 id;
|
||||||
|
struct rcu_head rcu;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum verifier_phase {
|
enum verifier_phase {
|
||||||
|
@ -598,6 +604,42 @@ static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int btf_alloc_id(struct btf *btf)
|
||||||
|
{
|
||||||
|
int id;
|
||||||
|
|
||||||
|
idr_preload(GFP_KERNEL);
|
||||||
|
spin_lock_bh(&btf_idr_lock);
|
||||||
|
id = idr_alloc_cyclic(&btf_idr, btf, 1, INT_MAX, GFP_ATOMIC);
|
||||||
|
if (id > 0)
|
||||||
|
btf->id = id;
|
||||||
|
spin_unlock_bh(&btf_idr_lock);
|
||||||
|
idr_preload_end();
|
||||||
|
|
||||||
|
if (WARN_ON_ONCE(!id))
|
||||||
|
return -ENOSPC;
|
||||||
|
|
||||||
|
return id > 0 ? 0 : id;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void btf_free_id(struct btf *btf)
|
||||||
|
{
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In map-in-map, calling map_delete_elem() on outer
|
||||||
|
* map will call bpf_map_put on the inner map.
|
||||||
|
* It will then eventually call btf_free_id()
|
||||||
|
* on the inner map. Some of the map_delete_elem()
|
||||||
|
* implementation may have irq disabled, so
|
||||||
|
* we need to use the _irqsave() version instead
|
||||||
|
* of the _bh() version.
|
||||||
|
*/
|
||||||
|
spin_lock_irqsave(&btf_idr_lock, flags);
|
||||||
|
idr_remove(&btf_idr, btf->id);
|
||||||
|
spin_unlock_irqrestore(&btf_idr_lock, flags);
|
||||||
|
}
|
||||||
|
|
||||||
static void btf_free(struct btf *btf)
|
static void btf_free(struct btf *btf)
|
||||||
{
|
{
|
||||||
kvfree(btf->types);
|
kvfree(btf->types);
|
||||||
|
@ -607,15 +649,19 @@ static void btf_free(struct btf *btf)
|
||||||
kfree(btf);
|
kfree(btf);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void btf_get(struct btf *btf)
|
static void btf_free_rcu(struct rcu_head *rcu)
|
||||||
{
|
{
|
||||||
refcount_inc(&btf->refcnt);
|
struct btf *btf = container_of(rcu, struct btf, rcu);
|
||||||
|
|
||||||
|
btf_free(btf);
|
||||||
}
|
}
|
||||||
|
|
||||||
void btf_put(struct btf *btf)
|
void btf_put(struct btf *btf)
|
||||||
{
|
{
|
||||||
if (btf && refcount_dec_and_test(&btf->refcnt))
|
if (btf && refcount_dec_and_test(&btf->refcnt)) {
|
||||||
btf_free(btf);
|
btf_free_id(btf);
|
||||||
|
call_rcu(&btf->rcu, btf_free_rcu);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int env_resolve_init(struct btf_verifier_env *env)
|
static int env_resolve_init(struct btf_verifier_env *env)
|
||||||
|
@ -2006,10 +2052,15 @@ const struct file_operations btf_fops = {
|
||||||
.release = btf_release,
|
.release = btf_release,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static int __btf_new_fd(struct btf *btf)
|
||||||
|
{
|
||||||
|
return anon_inode_getfd("btf", &btf_fops, btf, O_RDONLY | O_CLOEXEC);
|
||||||
|
}
|
||||||
|
|
||||||
int btf_new_fd(const union bpf_attr *attr)
|
int btf_new_fd(const union bpf_attr *attr)
|
||||||
{
|
{
|
||||||
struct btf *btf;
|
struct btf *btf;
|
||||||
int fd;
|
int ret;
|
||||||
|
|
||||||
btf = btf_parse(u64_to_user_ptr(attr->btf),
|
btf = btf_parse(u64_to_user_ptr(attr->btf),
|
||||||
attr->btf_size, attr->btf_log_level,
|
attr->btf_size, attr->btf_log_level,
|
||||||
|
@ -2018,12 +2069,23 @@ int btf_new_fd(const union bpf_attr *attr)
|
||||||
if (IS_ERR(btf))
|
if (IS_ERR(btf))
|
||||||
return PTR_ERR(btf);
|
return PTR_ERR(btf);
|
||||||
|
|
||||||
fd = anon_inode_getfd("btf", &btf_fops, btf,
|
ret = btf_alloc_id(btf);
|
||||||
O_RDONLY | O_CLOEXEC);
|
if (ret) {
|
||||||
if (fd < 0)
|
btf_free(btf);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The BTF ID is published to the userspace.
|
||||||
|
* All BTF free must go through call_rcu() from
|
||||||
|
* now on (i.e. free by calling btf_put()).
|
||||||
|
*/
|
||||||
|
|
||||||
|
ret = __btf_new_fd(btf);
|
||||||
|
if (ret < 0)
|
||||||
btf_put(btf);
|
btf_put(btf);
|
||||||
|
|
||||||
return fd;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct btf *btf_get_by_fd(int fd)
|
struct btf *btf_get_by_fd(int fd)
|
||||||
|
@ -2042,7 +2104,7 @@ struct btf *btf_get_by_fd(int fd)
|
||||||
}
|
}
|
||||||
|
|
||||||
btf = f.file->private_data;
|
btf = f.file->private_data;
|
||||||
btf_get(btf);
|
refcount_inc(&btf->refcnt);
|
||||||
fdput(f);
|
fdput(f);
|
||||||
|
|
||||||
return btf;
|
return btf;
|
||||||
|
@ -2062,3 +2124,29 @@ int btf_get_info_by_fd(const struct btf *btf,
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int btf_get_fd_by_id(u32 id)
|
||||||
|
{
|
||||||
|
struct btf *btf;
|
||||||
|
int fd;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
btf = idr_find(&btf_idr, id);
|
||||||
|
if (!btf || !refcount_inc_not_zero(&btf->refcnt))
|
||||||
|
btf = ERR_PTR(-ENOENT);
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
if (IS_ERR(btf))
|
||||||
|
return PTR_ERR(btf);
|
||||||
|
|
||||||
|
fd = __btf_new_fd(btf);
|
||||||
|
if (fd < 0)
|
||||||
|
btf_put(btf);
|
||||||
|
|
||||||
|
return fd;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 btf_id(const struct btf *btf)
|
||||||
|
{
|
||||||
|
return btf->id;
|
||||||
|
}
|
||||||
|
|
|
@ -252,7 +252,6 @@ static void bpf_map_free_deferred(struct work_struct *work)
|
||||||
|
|
||||||
bpf_map_uncharge_memlock(map);
|
bpf_map_uncharge_memlock(map);
|
||||||
security_bpf_map_free(map);
|
security_bpf_map_free(map);
|
||||||
btf_put(map->btf);
|
|
||||||
/* implementation dependent freeing */
|
/* implementation dependent freeing */
|
||||||
map->ops->map_free(map);
|
map->ops->map_free(map);
|
||||||
}
|
}
|
||||||
|
@ -273,6 +272,7 @@ static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock)
|
||||||
if (atomic_dec_and_test(&map->refcnt)) {
|
if (atomic_dec_and_test(&map->refcnt)) {
|
||||||
/* bpf_map_free_id() must be called first */
|
/* bpf_map_free_id() must be called first */
|
||||||
bpf_map_free_id(map, do_idr_lock);
|
bpf_map_free_id(map, do_idr_lock);
|
||||||
|
btf_put(map->btf);
|
||||||
INIT_WORK(&map->work, bpf_map_free_deferred);
|
INIT_WORK(&map->work, bpf_map_free_deferred);
|
||||||
schedule_work(&map->work);
|
schedule_work(&map->work);
|
||||||
}
|
}
|
||||||
|
@ -2002,6 +2002,12 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
|
||||||
info.map_flags = map->map_flags;
|
info.map_flags = map->map_flags;
|
||||||
memcpy(info.name, map->name, sizeof(map->name));
|
memcpy(info.name, map->name, sizeof(map->name));
|
||||||
|
|
||||||
|
if (map->btf) {
|
||||||
|
info.btf_id = btf_id(map->btf);
|
||||||
|
info.btf_key_id = map->btf_key_id;
|
||||||
|
info.btf_value_id = map->btf_value_id;
|
||||||
|
}
|
||||||
|
|
||||||
if (bpf_map_is_dev_bound(map)) {
|
if (bpf_map_is_dev_bound(map)) {
|
||||||
err = bpf_map_offload_info_fill(&info, map);
|
err = bpf_map_offload_info_fill(&info, map);
|
||||||
if (err)
|
if (err)
|
||||||
|
@ -2059,6 +2065,19 @@ static int bpf_btf_load(const union bpf_attr *attr)
|
||||||
return btf_new_fd(attr);
|
return btf_new_fd(attr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define BPF_BTF_GET_FD_BY_ID_LAST_FIELD btf_id
|
||||||
|
|
||||||
|
static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
|
||||||
|
{
|
||||||
|
if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (!capable(CAP_SYS_ADMIN))
|
||||||
|
return -EPERM;
|
||||||
|
|
||||||
|
return btf_get_fd_by_id(attr->btf_id);
|
||||||
|
}
|
||||||
|
|
||||||
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
|
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
|
||||||
{
|
{
|
||||||
union bpf_attr attr = {};
|
union bpf_attr attr = {};
|
||||||
|
@ -2142,6 +2161,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
|
||||||
case BPF_BTF_LOAD:
|
case BPF_BTF_LOAD:
|
||||||
err = bpf_btf_load(&attr);
|
err = bpf_btf_load(&attr);
|
||||||
break;
|
break;
|
||||||
|
case BPF_BTF_GET_FD_BY_ID:
|
||||||
|
err = bpf_btf_get_fd_by_id(&attr);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
err = -EINVAL;
|
err = -EINVAL;
|
||||||
break;
|
break;
|
||||||
|
|
Loading…
Reference in New Issue