1
0
Fork 0
alistair23-linux/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c

3050 lines
78 KiB
C
Raw Normal View History

/*
* Copyright (c) 2015, Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/mutex.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/vport.h>
#include <linux/mlx5/eswitch.h>
#include "mlx5_core.h"
#include "fs_core.h"
#include "fs_cmd.h"
#include "diag/fs_tracepoint.h"
#include "accel/ipsec.h"
#include "fpga/ipsec.h"
#include "eswitch.h"
#define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
sizeof(struct init_tree_node))
#define ADD_PRIO(num_prios_val, min_level_val, num_levels_val, caps_val,\
...) {.type = FS_TYPE_PRIO,\
.min_ft_level = min_level_val,\
.num_levels = num_levels_val,\
.num_leaf_prios = num_prios_val,\
.caps = caps_val,\
.children = (struct init_tree_node[]) {__VA_ARGS__},\
.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
}
#define ADD_MULTIPLE_PRIO(num_prios_val, num_levels_val, ...)\
ADD_PRIO(num_prios_val, 0, num_levels_val, {},\
__VA_ARGS__)\
#define ADD_NS(def_miss_act, ...) {.type = FS_TYPE_NAMESPACE, \
.def_miss_action = def_miss_act,\
.children = (struct init_tree_node[]) {__VA_ARGS__},\
.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
}
#define INIT_CAPS_ARRAY_SIZE(...) (sizeof((long[]){__VA_ARGS__}) /\
sizeof(long))
#define FS_CAP(cap) (__mlx5_bit_off(flow_table_nic_cap, cap))
#define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \
.caps = (long[]) {__VA_ARGS__} }
#define FS_CHAINING_CAPS FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), \
FS_CAP(flow_table_properties_nic_receive.modify_root), \
FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), \
FS_CAP(flow_table_properties_nic_receive.flow_table_modify))
#define FS_CHAINING_CAPS_EGRESS \
FS_REQUIRED_CAPS( \
FS_CAP(flow_table_properties_nic_transmit.flow_modify_en), \
FS_CAP(flow_table_properties_nic_transmit.modify_root), \
FS_CAP(flow_table_properties_nic_transmit \
.identified_miss_table_mode), \
FS_CAP(flow_table_properties_nic_transmit.flow_table_modify))
#define LEFTOVERS_NUM_LEVELS 1
#define LEFTOVERS_NUM_PRIOS 1
#define BY_PASS_PRIO_NUM_LEVELS 1
#define BY_PASS_MIN_LEVEL (ETHTOOL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\
LEFTOVERS_NUM_PRIOS)
#define ETHTOOL_PRIO_NUM_LEVELS 1
#define ETHTOOL_NUM_PRIOS 11
#define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS)
/* Vlan, mac, ttc, inner ttc, aRFS */
#define KERNEL_NIC_PRIO_NUM_LEVELS 5
#define KERNEL_NIC_NUM_PRIOS 1
/* One more level for tc */
#define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1)
#define KERNEL_NIC_TC_NUM_PRIOS 1
#define KERNEL_NIC_TC_NUM_LEVELS 2
#define ANCHOR_NUM_LEVELS 1
#define ANCHOR_NUM_PRIOS 1
#define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1)
#define OFFLOADS_MAX_FT 1
#define OFFLOADS_NUM_PRIOS 1
#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + 1)
#define LAG_PRIO_NUM_LEVELS 1
#define LAG_NUM_PRIOS 1
#define LAG_MIN_LEVEL (OFFLOADS_MIN_LEVEL + 1)
struct node_caps {
size_t arr_sz;
long *caps;
};
static struct init_tree_node {
enum fs_node_type type;
struct init_tree_node *children;
int ar_size;
struct node_caps caps;
int min_ft_level;
int num_leaf_prios;
int prio;
int num_levels;
enum mlx5_flow_table_miss_action def_miss_action;
} root_fs = {
.type = FS_TYPE_NAMESPACE,
.ar_size = 7,
.children = (struct init_tree_node[]){
ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS,
ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
BY_PASS_PRIO_NUM_LEVELS))),
ADD_PRIO(0, LAG_MIN_LEVEL, 0, FS_CHAINING_CAPS,
ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
ADD_MULTIPLE_PRIO(LAG_NUM_PRIOS,
LAG_PRIO_NUM_LEVELS))),
ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {},
ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS,
OFFLOADS_MAX_FT))),
ADD_PRIO(0, ETHTOOL_MIN_LEVEL, 0, FS_CHAINING_CAPS,
ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
ADD_MULTIPLE_PRIO(ETHTOOL_NUM_PRIOS,
ETHTOOL_PRIO_NUM_LEVELS))),
ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {},
ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
ADD_MULTIPLE_PRIO(KERNEL_NIC_TC_NUM_PRIOS,
KERNEL_NIC_TC_NUM_LEVELS),
ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS,
KERNEL_NIC_PRIO_NUM_LEVELS))),
ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS,
ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS,
LEFTOVERS_NUM_LEVELS))),
ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {},
ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS,
ANCHOR_NUM_LEVELS))),
}
};
static struct init_tree_node egress_root_fs = {
.type = FS_TYPE_NAMESPACE,
.ar_size = 1,
.children = (struct init_tree_node[]) {
ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0,
FS_CHAINING_CAPS_EGRESS,
ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
BY_PASS_PRIO_NUM_LEVELS))),
}
};
#define RDMA_RX_BYPASS_PRIO 0
#define RDMA_RX_KERNEL_PRIO 1
static struct init_tree_node rdma_rx_root_fs = {
.type = FS_TYPE_NAMESPACE,
.ar_size = 2,
.children = (struct init_tree_node[]) {
[RDMA_RX_BYPASS_PRIO] =
ADD_PRIO(0, MLX5_BY_PASS_NUM_REGULAR_PRIOS, 0,
FS_CHAINING_CAPS,
ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_REGULAR_PRIOS,
BY_PASS_PRIO_NUM_LEVELS))),
[RDMA_RX_KERNEL_PRIO] =
ADD_PRIO(0, MLX5_BY_PASS_NUM_REGULAR_PRIOS + 1, 0,
FS_CHAINING_CAPS,
ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_SWITCH_DOMAIN,
ADD_MULTIPLE_PRIO(1, 1))),
}
};
enum fs_i_lock_class {
FS_LOCK_GRANDPARENT,
FS_LOCK_PARENT,
FS_LOCK_CHILD
};
static const struct rhashtable_params rhash_fte = {
.key_len = FIELD_SIZEOF(struct fs_fte, val),
.key_offset = offsetof(struct fs_fte, val),
.head_offset = offsetof(struct fs_fte, hash),
.automatic_shrinking = true,
.min_size = 1,
};
static const struct rhashtable_params rhash_fg = {
.key_len = FIELD_SIZEOF(struct mlx5_flow_group, mask),
.key_offset = offsetof(struct mlx5_flow_group, mask),
.head_offset = offsetof(struct mlx5_flow_group, hash),
.automatic_shrinking = true,
.min_size = 1,
};
static void del_hw_flow_table(struct fs_node *node);
static void del_hw_flow_group(struct fs_node *node);
static void del_hw_fte(struct fs_node *node);
static void del_sw_flow_table(struct fs_node *node);
static void del_sw_flow_group(struct fs_node *node);
static void del_sw_fte(struct fs_node *node);
static void del_sw_prio(struct fs_node *node);
static void del_sw_ns(struct fs_node *node);
/* Delete rule (destination) is special case that
* requires to lock the FTE for all the deletion process.
*/
static void del_sw_hw_rule(struct fs_node *node);
static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
struct mlx5_flow_destination *d2);
static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns);
static struct mlx5_flow_rule *
find_flow_rule(struct fs_fte *fte,
struct mlx5_flow_destination *dest);
static void tree_init_node(struct fs_node *node,
void (*del_hw_func)(struct fs_node *),
void (*del_sw_func)(struct fs_node *))
{
refcount_set(&node->refcount, 1);
INIT_LIST_HEAD(&node->list);
INIT_LIST_HEAD(&node->children);
init_rwsem(&node->lock);
node->del_hw_func = del_hw_func;
node->del_sw_func = del_sw_func;
node->active = false;
}
static void tree_add_node(struct fs_node *node, struct fs_node *parent)
{
if (parent)
refcount_inc(&parent->refcount);
node->parent = parent;
/* Parent is the root */
if (!parent)
node->root = node;
else
node->root = parent->root;
}
static int tree_get_node(struct fs_node *node)
{
return refcount_inc_not_zero(&node->refcount);
}
static void nested_down_read_ref_node(struct fs_node *node,
enum fs_i_lock_class class)
{
if (node) {
down_read_nested(&node->lock, class);
refcount_inc(&node->refcount);
}
}
static void nested_down_write_ref_node(struct fs_node *node,
enum fs_i_lock_class class)
{
if (node) {
down_write_nested(&node->lock, class);
refcount_inc(&node->refcount);
}
}
static void down_write_ref_node(struct fs_node *node, bool locked)
{
if (node) {
if (!locked)
down_write(&node->lock);
refcount_inc(&node->refcount);
}
}
static void up_read_ref_node(struct fs_node *node)
{
refcount_dec(&node->refcount);
up_read(&node->lock);
}
static void up_write_ref_node(struct fs_node *node, bool locked)
{
refcount_dec(&node->refcount);
if (!locked)
up_write(&node->lock);
}
static void tree_put_node(struct fs_node *node, bool locked)
{
struct fs_node *parent_node = node->parent;
if (refcount_dec_and_test(&node->refcount)) {
if (node->del_hw_func)
node->del_hw_func(node);
if (parent_node) {
/* Only root namespace doesn't have parent and we just
* need to free its node.
*/
down_write_ref_node(parent_node, locked);
list_del_init(&node->list);
if (node->del_sw_func)
node->del_sw_func(node);
up_write_ref_node(parent_node, locked);
} else {
kfree(node);
}
node = NULL;
}
if (!node && parent_node)
tree_put_node(parent_node, locked);
}
static int tree_remove_node(struct fs_node *node, bool locked)
{
if (refcount_read(&node->refcount) > 1) {
refcount_dec(&node->refcount);
return -EEXIST;
}
tree_put_node(node, locked);
return 0;
}
static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns,
unsigned int prio)
{
struct fs_prio *iter_prio;
fs_for_each_prio(iter_prio, ns) {
if (iter_prio->prio == prio)
return iter_prio;
}
return NULL;
}
static bool check_valid_spec(const struct mlx5_flow_spec *spec)
{
int i;
for (i = 0; i < MLX5_ST_SZ_DW_MATCH_PARAM; i++)
if (spec->match_value[i] & ~spec->match_criteria[i]) {
pr_warn("mlx5_core: match_value differs from match_criteria\n");
return false;
}
return true;
}
static struct mlx5_flow_root_namespace *find_root(struct fs_node *node)
{
struct fs_node *root;
struct mlx5_flow_namespace *ns;
root = node->root;
if (WARN_ON(root->type != FS_TYPE_NAMESPACE)) {
pr_warn("mlx5: flow steering node is not in tree or garbaged\n");
return NULL;
}
ns = container_of(root, struct mlx5_flow_namespace, node);
return container_of(ns, struct mlx5_flow_root_namespace, ns);
}
static inline struct mlx5_flow_steering *get_steering(struct fs_node *node)
{
struct mlx5_flow_root_namespace *root = find_root(node);
if (root)
return root->dev->priv.steering;
return NULL;
}
static inline struct mlx5_core_dev *get_dev(struct fs_node *node)
{
struct mlx5_flow_root_namespace *root = find_root(node);
if (root)
return root->dev;
return NULL;
}
static void del_sw_ns(struct fs_node *node)
{
kfree(node);
}
static void del_sw_prio(struct fs_node *node)
{
kfree(node);
}
static void del_hw_flow_table(struct fs_node *node)
{
struct mlx5_flow_root_namespace *root;
struct mlx5_flow_table *ft;
struct mlx5_core_dev *dev;
int err;
fs_get_obj(ft, node);
dev = get_dev(&ft->node);
root = find_root(&ft->node);
trace_mlx5_fs_del_ft(ft);
if (node->active) {
err = root->cmds->destroy_flow_table(root, ft);
if (err)
mlx5_core_warn(dev, "flow steering can't destroy ft\n");
}
}
static void del_sw_flow_table(struct fs_node *node)
{
struct mlx5_flow_table *ft;
struct fs_prio *prio;
fs_get_obj(ft, node);
rhltable_destroy(&ft->fgs_hash);
fs_get_obj(prio, ft->node.parent);
prio->num_ft--;
kfree(ft);
}
static void modify_fte(struct fs_fte *fte)
{
struct mlx5_flow_root_namespace *root;
struct mlx5_flow_table *ft;
struct mlx5_flow_group *fg;
struct mlx5_core_dev *dev;
int err;
fs_get_obj(fg, fte->node.parent);
fs_get_obj(ft, fg->node.parent);
dev = get_dev(&fte->node);
root = find_root(&ft->node);
err = root->cmds->update_fte(root, ft, fg, fte->modify_mask, fte);
if (err)
mlx5_core_warn(dev,
"%s can't del rule fg id=%d fte_index=%d\n",
__func__, fg->id, fte->index);
fte->modify_mask = 0;
}
static void del_sw_hw_rule(struct fs_node *node)
{
struct mlx5_flow_rule *rule;
struct fs_fte *fte;
fs_get_obj(rule, node);
fs_get_obj(fte, rule->node.parent);
trace_mlx5_fs_del_rule(rule);
if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
mutex_lock(&rule->dest_attr.ft->lock);
list_del(&rule->next_ft);
mutex_unlock(&rule->dest_attr.ft->lock);
}
if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER &&
--fte->dests_size) {
fte->modify_mask |=
BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) |
BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT;
goto out;
}
if ((fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
--fte->dests_size) {
fte->modify_mask |=
BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
}
out:
kfree(rule);
}
static void del_hw_fte(struct fs_node *node)
{
struct mlx5_flow_root_namespace *root;
struct mlx5_flow_table *ft;
struct mlx5_flow_group *fg;
struct mlx5_core_dev *dev;
struct fs_fte *fte;
int err;
fs_get_obj(fte, node);
fs_get_obj(fg, fte->node.parent);
fs_get_obj(ft, fg->node.parent);
trace_mlx5_fs_del_fte(fte);
dev = get_dev(&ft->node);
root = find_root(&ft->node);
if (node->active) {
err = root->cmds->delete_fte(root, ft, fte);
if (err)
mlx5_core_warn(dev,
"flow steering can't delete fte in index %d of flow group id %d\n",
fte->index, fg->id);
node->active = 0;
}
}
static void del_sw_fte(struct fs_node *node)
{
struct mlx5_flow_steering *steering = get_steering(node);
struct mlx5_flow_group *fg;
struct fs_fte *fte;
int err;
fs_get_obj(fte, node);
fs_get_obj(fg, fte->node.parent);
err = rhashtable_remove_fast(&fg->ftes_hash,
&fte->hash,
rhash_fte);
WARN_ON(err);
ida_simple_remove(&fg->fte_allocator, fte->index - fg->start_index);
kmem_cache_free(steering->ftes_cache, fte);
}
static void del_hw_flow_group(struct fs_node *node)
{
struct mlx5_flow_root_namespace *root;
struct mlx5_flow_group *fg;
struct mlx5_flow_table *ft;
struct mlx5_core_dev *dev;
fs_get_obj(fg, node);
fs_get_obj(ft, fg->node.parent);
dev = get_dev(&ft->node);
trace_mlx5_fs_del_fg(fg);
root = find_root(&ft->node);
if (fg->node.active && root->cmds->destroy_flow_group(root, ft, fg))
mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n",
fg->id, ft->id);
}
static void del_sw_flow_group(struct fs_node *node)
{
struct mlx5_flow_steering *steering = get_steering(node);
struct mlx5_flow_group *fg;
struct mlx5_flow_table *ft;
int err;
fs_get_obj(fg, node);
fs_get_obj(ft, fg->node.parent);
rhashtable_destroy(&fg->ftes_hash);
ida_destroy(&fg->fte_allocator);
if (ft->autogroup.active && fg->max_ftes == ft->autogroup.group_size)
ft->autogroup.num_groups--;
err = rhltable_remove(&ft->fgs_hash,
&fg->hash,
rhash_fg);
WARN_ON(err);
kmem_cache_free(steering->fgs_cache, fg);
}
static int insert_fte(struct mlx5_flow_group *fg, struct fs_fte *fte)
{
int index;
int ret;
index = ida_simple_get(&fg->fte_allocator, 0, fg->max_ftes, GFP_KERNEL);
if (index < 0)
return index;
fte->index = index + fg->start_index;
ret = rhashtable_insert_fast(&fg->ftes_hash,
&fte->hash,
rhash_fte);
if (ret)
goto err_ida_remove;
tree_add_node(&fte->node, &fg->node);
list_add_tail(&fte->node.list, &fg->node.children);
return 0;
err_ida_remove:
ida_simple_remove(&fg->fte_allocator, index);
return ret;
}
static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft,
const struct mlx5_flow_spec *spec,
struct mlx5_flow_act *flow_act)
{
struct mlx5_flow_steering *steering = get_steering(&ft->node);
struct fs_fte *fte;
fte = kmem_cache_zalloc(steering->ftes_cache, GFP_KERNEL);
if (!fte)
return ERR_PTR(-ENOMEM);
memcpy(fte->val, &spec->match_value, sizeof(fte->val));
fte->node.type = FS_TYPE_FLOW_ENTRY;
fte->action = *flow_act;
fte->flow_context = spec->flow_context;
net/mlx5: Fix FTE cleanup [ Upstream commit cefc23554fc259114e78a7b0908aac4610ee18eb ] Currently, when an FTE is allocated, its refcount is decreased to 0 with the purpose it will not be a stand alone steering object and every rule (destination) of the FTE would increase the refcount. When mlx5_cleanup_fs is called while not all rules were deleted by the steering users, it hit refcount underflow on the FTE once clean_tree calls to tree_remove_node after the deleted rules already decreased the refcount to 0. FTE is no longer destroyed implicitly when the last rule (destination) is deleted. mlx5_del_flow_rules avoids it by increasing the refcount on the FTE and destroy it explicitly after all rules were deleted. So we can avoid the refcount underflow by making FTE as stand alone object. In addition need to set del_hw_func to FTE so the HW object will be destroyed when the FTE is deleted from the cleanup_tree flow. refcount_t: underflow; use-after-free. WARNING: CPU: 2 PID: 15715 at lib/refcount.c:28 refcount_warn_saturate+0xd9/0xe0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: tree_put_node+0xf2/0x140 [mlx5_core] clean_tree+0x4e/0xf0 [mlx5_core] clean_tree+0x4e/0xf0 [mlx5_core] clean_tree+0x4e/0xf0 [mlx5_core] clean_tree+0x5f/0xf0 [mlx5_core] clean_tree+0x4e/0xf0 [mlx5_core] clean_tree+0x5f/0xf0 [mlx5_core] mlx5_cleanup_fs+0x26/0x270 [mlx5_core] mlx5_unload+0x2e/0xa0 [mlx5_core] mlx5_unload_one+0x51/0x120 [mlx5_core] mlx5_devlink_reload_down+0x51/0x90 [mlx5_core] devlink_reload+0x39/0x120 ? devlink_nl_cmd_reload+0x43/0x220 genl_rcv_msg+0x1e4/0x420 ? genl_family_rcv_msg_attrs_parse+0x100/0x100 netlink_rcv_skb+0x47/0x110 genl_rcv+0x24/0x40 netlink_unicast+0x217/0x2f0 netlink_sendmsg+0x30f/0x430 sock_sendmsg+0x30/0x40 __sys_sendto+0x10e/0x140 ? handle_mm_fault+0xc4/0x1f0 ? do_page_fault+0x33f/0x630 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x48/0x130 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 718ce4d601db ("net/mlx5: Consolidate update FTE for all removal changes") Fixes: bd71b08ec2ee ("net/mlx5: Support multiple updates of steering rules in parallel") Signed-off-by: Maor Gottlieb <maorg@nvidia.com> Reviewed-by: Mark Bloch <mbloch@nvidia.com> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2020-08-31 11:50:42 -06:00
tree_init_node(&fte->node, del_hw_fte, del_sw_fte);
return fte;
}
static void dealloc_flow_group(struct mlx5_flow_steering *steering,
struct mlx5_flow_group *fg)
{
rhashtable_destroy(&fg->ftes_hash);
kmem_cache_free(steering->fgs_cache, fg);
}
static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steering,
u8 match_criteria_enable,
const void *match_criteria,
int start_index,
int end_index)
{
struct mlx5_flow_group *fg;
int ret;
fg = kmem_cache_zalloc(steering->fgs_cache, GFP_KERNEL);
if (!fg)
return ERR_PTR(-ENOMEM);
ret = rhashtable_init(&fg->ftes_hash, &rhash_fte);
if (ret) {
kmem_cache_free(steering->fgs_cache, fg);
return ERR_PTR(ret);
}
ida_init(&fg->fte_allocator);
fg->mask.match_criteria_enable = match_criteria_enable;
memcpy(&fg->mask.match_criteria, match_criteria,
sizeof(fg->mask.match_criteria));
fg->node.type = FS_TYPE_FLOW_GROUP;
fg->start_index = start_index;
fg->max_ftes = end_index - start_index + 1;
return fg;
}
static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *ft,
u8 match_criteria_enable,
const void *match_criteria,
int start_index,
int end_index,
struct list_head *prev)
{
struct mlx5_flow_steering *steering = get_steering(&ft->node);
struct mlx5_flow_group *fg;
int ret;
fg = alloc_flow_group(steering, match_criteria_enable, match_criteria,
start_index, end_index);
if (IS_ERR(fg))
return fg;
/* initialize refcnt, add to parent list */
ret = rhltable_insert(&ft->fgs_hash,
&fg->hash,
rhash_fg);
if (ret) {
dealloc_flow_group(steering, fg);
return ERR_PTR(ret);
}
tree_init_node(&fg->node, del_hw_flow_group, del_sw_flow_group);
tree_add_node(&fg->node, &ft->node);
/* Add node to group list */
list_add(&fg->node.list, prev);
atomic_inc(&ft->node.version);
return fg;
}
static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_fte,
enum fs_flow_table_type table_type,
enum fs_flow_table_op_mod op_mod,
u32 flags)
{
struct mlx5_flow_table *ft;
int ret;
ft = kzalloc(sizeof(*ft), GFP_KERNEL);
if (!ft)
return ERR_PTR(-ENOMEM);
ret = rhltable_init(&ft->fgs_hash, &rhash_fg);
if (ret) {
kfree(ft);
return ERR_PTR(ret);
}
ft->level = level;
ft->node.type = FS_TYPE_FLOW_TABLE;
ft->op_mod = op_mod;
ft->type = table_type;
ft->vport = vport;
ft->max_fte = max_fte;
ft->flags = flags;
INIT_LIST_HEAD(&ft->fwd_rules);
mutex_init(&ft->lock);
return ft;
}
/* If reverse is false, then we search for the first flow table in the
* root sub-tree from start(closest from right), else we search for the
* last flow table in the root sub-tree till start(closest from left).
*/
static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root,
struct list_head *start,
bool reverse)
{
#define list_advance_entry(pos, reverse) \
((reverse) ? list_prev_entry(pos, list) : list_next_entry(pos, list))
#define list_for_each_advance_continue(pos, head, reverse) \
for (pos = list_advance_entry(pos, reverse); \
&pos->list != (head); \
pos = list_advance_entry(pos, reverse))
struct fs_node *iter = list_entry(start, struct fs_node, list);
struct mlx5_flow_table *ft = NULL;
if (!root || root->type == FS_TYPE_PRIO_CHAINS)
return NULL;
list_for_each_advance_continue(iter, &root->children, reverse) {
if (iter->type == FS_TYPE_FLOW_TABLE) {
fs_get_obj(ft, iter);
return ft;
}
ft = find_closest_ft_recursive(iter, &iter->children, reverse);
if (ft)
return ft;
}
return ft;
}
/* If reverse if false then return the first flow table in next priority of
* prio in the tree, else return the last flow table in the previous priority
* of prio in the tree.
*/
static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool reverse)
{
struct mlx5_flow_table *ft = NULL;
struct fs_node *curr_node;
struct fs_node *parent;
parent = prio->node.parent;
curr_node = &prio->node;
while (!ft && parent) {
ft = find_closest_ft_recursive(parent, &curr_node->list, reverse);
curr_node = parent;
parent = curr_node->parent;
}
return ft;
}
/* Assuming all the tree is locked by mutex chain lock */
static struct mlx5_flow_table *find_next_chained_ft(struct fs_prio *prio)
{
return find_closest_ft(prio, false);
}
/* Assuming all the tree is locked by mutex chain lock */
static struct mlx5_flow_table *find_prev_chained_ft(struct fs_prio *prio)
{
return find_closest_ft(prio, true);
}
static int connect_fts_in_prio(struct mlx5_core_dev *dev,
struct fs_prio *prio,
struct mlx5_flow_table *ft)
{
struct mlx5_flow_root_namespace *root = find_root(&prio->node);
struct mlx5_flow_table *iter;
int err;
fs_for_each_ft(iter, prio) {
err = root->cmds->modify_flow_table(root, iter, ft);
if (err) {
net/mlx5: Delete extra dump stack that gives nothing [ Upstream commit 6c4e9bcfb48933d533ff975e152757991556294a ] The WARN_*() macros are intended to catch impossible situations from the SW point of view. They gave a little in case HW<->SW interface is out-of-sync. Such out-of-sync scenario can be due to SW errors that are not part of this flow or because some HW errors, where dump stack won't help either. This specific WARN_ON() is useless because mlx5_core code is prepared to handle such situations and will unfold everything correctly while providing enough information to the users to understand why FS is not working. WARNING: CPU: 0 PID: 3222 at drivers/net/ethernet/mellanox/mlx5/core/fs_core.c:825 connect_fts_in_prio.isra.20+0x1dd/0x260 linux/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c:825 Kernel panic - not syncing: panic_on_warn set ... CPU: 0 PID: 3222 Comm: syz-executor861 Not tainted 5.5.0-rc6+ #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 Call Trace: __dump_stack linux/lib/dump_stack.c:77 [inline] dump_stack+0x94/0xce linux/lib/dump_stack.c:118 panic+0x234/0x56f linux/kernel/panic.c:221 __warn+0x1cc/0x1e1 linux/kernel/panic.c:582 report_bug+0x200/0x310 linux/lib/bug.c:195 fixup_bug.part.11+0x32/0x80 linux/arch/x86/kernel/traps.c:174 fixup_bug linux/arch/x86/kernel/traps.c:273 [inline] do_error_trap+0xd3/0x100 linux/arch/x86/kernel/traps.c:267 do_invalid_op+0x31/0x40 linux/arch/x86/kernel/traps.c:286 invalid_op+0x1e/0x30 linux/arch/x86/entry/entry_64.S:1027 RIP: 0010:connect_fts_in_prio.isra.20+0x1dd/0x260 linux/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c:825 Code: 00 00 48 c7 c2 60 8c 31 84 48 c7 c6 00 81 31 84 48 8b 38 e8 3c a8 cb ff 41 83 fd 01 8b 04 24 0f 8e 29 ff ff ff e8 83 7b bc fe <0f> 0b 8b 04 24 e9 1a ff ff ff 89 04 24 e8 c1 20 e0 fe 8b 04 24 eb RSP: 0018:ffffc90004bb7858 EFLAGS: 00010293 RAX: ffff88805de98e80 RBX: 0000000000000c96 RCX: ffffffff827a853d RDX: 0000000000000000 RSI: 0000000000000000 RDI: fffff52000976efa RBP: 0000000000000007 R08: ffffed100da060e3 R09: ffffed100da060e3 R10: 0000000000000001 R11: ffffed100da060e2 R12: dffffc0000000000 R13: 0000000000000002 R14: ffff8880683a1a10 R15: ffffed100d07bc1c connect_prev_fts linux/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c:844 [inline] connect_flow_table linux/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c:975 [inline] __mlx5_create_flow_table+0x8f8/0x1710 linux/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c:1064 mlx5_create_flow_table linux/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c:1094 [inline] mlx5_create_auto_grouped_flow_table+0xe1/0x210 linux/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c:1136 _get_prio linux/drivers/infiniband/hw/mlx5/main.c:3286 [inline] get_flow_table+0x2ea/0x760 linux/drivers/infiniband/hw/mlx5/main.c:3376 mlx5_ib_create_flow+0x331/0x11c0 linux/drivers/infiniband/hw/mlx5/main.c:3896 ib_uverbs_ex_create_flow+0x13e8/0x1b40 linux/drivers/infiniband/core/uverbs_cmd.c:3311 ib_uverbs_write+0xaa5/0xdf0 linux/drivers/infiniband/core/uverbs_main.c:769 __vfs_write+0x7c/0x100 linux/fs/read_write.c:494 vfs_write+0x168/0x4a0 linux/fs/read_write.c:558 ksys_write+0xc8/0x200 linux/fs/read_write.c:611 do_syscall_64+0x9c/0x390 linux/arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x45a059 Code: 00 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fcc17564c98 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 00007fcc17564ca0 RCX: 000000000045a059 RDX: 0000000000000030 RSI: 00000000200003c0 RDI: 0000000000000005 RBP: 0000000000000007 R08: 0000000000000002 R09: 0000000000003131 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000006e636c R13: 0000000000000000 R14: 00000000006e6360 R15: 00007ffdcbdaf6a0 Dumping ftrace buffer: (ftrace buffer empty) Kernel Offset: disabled Rebooting in 1 seconds.. Fixes: f90edfd279f3 ("net/mlx5_core: Connect flow tables") Reviewed-by: Maor Gottlieb <maorg@mellanox.com> Reviewed-by: Mark Bloch <markb@mellanox.com> Signed-off-by: Leon Romanovsky <leonro@mellanox.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
2020-07-19 02:04:30 -06:00
mlx5_core_err(dev,
"Failed to modify flow table id %d, type %d, err %d\n",
iter->id, iter->type, err);
/* The driver is out of sync with the FW */
return err;
}
}
return 0;
}
/* Connect flow tables from previous priority of prio to ft */
static int connect_prev_fts(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft,
struct fs_prio *prio)
{
struct mlx5_flow_table *prev_ft;
prev_ft = find_prev_chained_ft(prio);
if (prev_ft) {
struct fs_prio *prev_prio;
fs_get_obj(prev_prio, prev_ft->node.parent);
return connect_fts_in_prio(dev, prev_prio, ft);
}
return 0;
}
static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
*prio)
{
struct mlx5_flow_root_namespace *root = find_root(&prio->node);
struct mlx5_ft_underlay_qp *uqp;
int min_level = INT_MAX;
int err = 0;
u32 qpn;
if (root->root_ft)
min_level = root->root_ft->level;
if (ft->level >= min_level)
return 0;
if (list_empty(&root->underlay_qpns)) {
/* Don't set any QPN (zero) in case QPN list is empty */
qpn = 0;
err = root->cmds->update_root_ft(root, ft, qpn, false);
} else {
list_for_each_entry(uqp, &root->underlay_qpns, list) {
qpn = uqp->qpn;
err = root->cmds->update_root_ft(root, ft,
qpn, false);
if (err)
break;
}
}
if (err)
mlx5_core_warn(root->dev,
"Update root flow table of id(%u) qpn(%d) failed\n",
ft->id, qpn);
else
root->root_ft = ft;
return err;
}
static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
struct mlx5_flow_destination *dest)
{
struct mlx5_flow_root_namespace *root;
struct mlx5_flow_table *ft;
struct mlx5_flow_group *fg;
struct fs_fte *fte;
int modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
int err = 0;
fs_get_obj(fte, rule->node.parent);
if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
return -EINVAL;
down_write_ref_node(&fte->node, false);
fs_get_obj(fg, fte->node.parent);
fs_get_obj(ft, fg->node.parent);
memcpy(&rule->dest_attr, dest, sizeof(*dest));
root = find_root(&ft->node);
err = root->cmds->update_fte(root, ft, fg,
modify_mask, fte);
up_write_ref_node(&fte->node, false);
return err;
}
int mlx5_modify_rule_destination(struct mlx5_flow_handle *handle,
struct mlx5_flow_destination *new_dest,
struct mlx5_flow_destination *old_dest)
{
int i;
if (!old_dest) {
if (handle->num_rules != 1)
return -EINVAL;
return _mlx5_modify_rule_destination(handle->rule[0],
new_dest);
}
for (i = 0; i < handle->num_rules; i++) {
if (mlx5_flow_dests_cmp(new_dest, &handle->rule[i]->dest_attr))
return _mlx5_modify_rule_destination(handle->rule[i],
new_dest);
}
return -EINVAL;
}
/* Modify/set FWD rules that point on old_next_ft to point on new_next_ft */
static int connect_fwd_rules(struct mlx5_core_dev *dev,
struct mlx5_flow_table *new_next_ft,
struct mlx5_flow_table *old_next_ft)
{
struct mlx5_flow_destination dest = {};
struct mlx5_flow_rule *iter;
int err = 0;
/* new_next_ft and old_next_ft could be NULL only
* when we create/destroy the anchor flow table.
*/
if (!new_next_ft || !old_next_ft)
return 0;
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
dest.ft = new_next_ft;
mutex_lock(&old_next_ft->lock);
list_splice_init(&old_next_ft->fwd_rules, &new_next_ft->fwd_rules);
mutex_unlock(&old_next_ft->lock);
list_for_each_entry(iter, &new_next_ft->fwd_rules, next_ft) {
err = _mlx5_modify_rule_destination(iter, &dest);
if (err)
pr_err("mlx5_core: failed to modify rule to point on flow table %d\n",
new_next_ft->id);
}
return 0;
}
static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft,
struct fs_prio *prio)
{
struct mlx5_flow_table *next_ft;
int err = 0;
/* Connect_prev_fts and update_root_ft_create are mutually exclusive */
if (list_empty(&prio->node.children)) {
err = connect_prev_fts(dev, ft, prio);
if (err)
return err;
next_ft = find_next_chained_ft(prio);
err = connect_fwd_rules(dev, ft, next_ft);
if (err)
return err;
}
if (MLX5_CAP_FLOWTABLE(dev,
flow_table_properties_nic_receive.modify_root))
err = update_root_ft_create(ft, prio);
return err;
}
static void list_add_flow_table(struct mlx5_flow_table *ft,
struct fs_prio *prio)
{
struct list_head *prev = &prio->node.children;
struct mlx5_flow_table *iter;
fs_for_each_ft(iter, prio) {
if (iter->level > ft->level)
break;
prev = &iter->node.list;
}
list_add(&ft->node.list, prev);
}
static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
struct mlx5_flow_table_attr *ft_attr,
enum fs_flow_table_op_mod op_mod,
u16 vport)
{
struct mlx5_flow_root_namespace *root = find_root(&ns->node);
struct mlx5_flow_table *next_ft = NULL;
struct fs_prio *fs_prio = NULL;
struct mlx5_flow_table *ft;
int log_table_sz;
int err;
if (!root) {
pr_err("mlx5: flow steering failed to find root of namespace\n");
return ERR_PTR(-ENODEV);
}
mutex_lock(&root->chain_lock);
fs_prio = find_prio(ns, ft_attr->prio);
if (!fs_prio) {
err = -EINVAL;
goto unlock_root;
}
if (ft_attr->level >= fs_prio->num_levels) {
err = -ENOSPC;
goto unlock_root;
}
/* The level is related to the
* priority level range.
*/
ft_attr->level += fs_prio->start_level;
ft = alloc_flow_table(ft_attr->level,
vport,
ft_attr->max_fte ? roundup_pow_of_two(ft_attr->max_fte) : 0,
root->table_type,
op_mod, ft_attr->flags);
if (IS_ERR(ft)) {
err = PTR_ERR(ft);
goto unlock_root;
}
tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table);
log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0;
next_ft = find_next_chained_ft(fs_prio);
ft->def_miss_action = ns->def_miss_action;
err = root->cmds->create_flow_table(root, ft, log_table_sz, next_ft);
if (err)
goto free_ft;
err = connect_flow_table(root->dev, ft, fs_prio);
if (err)
goto destroy_ft;
ft->node.active = true;
down_write_ref_node(&fs_prio->node, false);
tree_add_node(&ft->node, &fs_prio->node);
list_add_flow_table(ft, fs_prio);
fs_prio->num_ft++;
up_write_ref_node(&fs_prio->node, false);
mutex_unlock(&root->chain_lock);
trace_mlx5_fs_add_ft(ft);
return ft;
destroy_ft:
root->cmds->destroy_flow_table(root, ft);
free_ft:
kfree(ft);
unlock_root:
mutex_unlock(&root->chain_lock);
return ERR_PTR(err);
}
struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
struct mlx5_flow_table_attr *ft_attr)
{
return __mlx5_create_flow_table(ns, ft_attr, FS_FT_OP_MOD_NORMAL, 0);
}
struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
int prio, int max_fte,
u32 level, u16 vport)
{
struct mlx5_flow_table_attr ft_attr = {};
ft_attr.max_fte = max_fte;
ft_attr.level = level;
ft_attr.prio = prio;
return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_NORMAL, vport);
}
struct mlx5_flow_table*
mlx5_create_lag_demux_flow_table(struct mlx5_flow_namespace *ns,
int prio, u32 level)
{
struct mlx5_flow_table_attr ft_attr = {};
ft_attr.level = level;
ft_attr.prio = prio;
return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_LAG_DEMUX, 0);
}
EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table);
struct mlx5_flow_table*
mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
int prio,
int num_flow_table_entries,
int max_num_groups,
u32 level,
u32 flags)
{
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_table *ft;
if (max_num_groups > num_flow_table_entries)
return ERR_PTR(-EINVAL);
ft_attr.max_fte = num_flow_table_entries;
ft_attr.prio = prio;
ft_attr.level = level;
ft_attr.flags = flags;
ft = mlx5_create_flow_table(ns, &ft_attr);
if (IS_ERR(ft))
return ft;
ft->autogroup.active = true;
ft->autogroup.required_groups = max_num_groups;
/* We save place for flow groups in addition to max types */
ft->autogroup.group_size = ft->max_fte / (max_num_groups + 1);
return ft;
}
EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table);
struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
u32 *fg_in)
{
struct mlx5_flow_root_namespace *root = find_root(&ft->node);
void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
fg_in, match_criteria);
u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
fg_in,
match_criteria_enable);
int start_index = MLX5_GET(create_flow_group_in, fg_in,
start_flow_index);
int end_index = MLX5_GET(create_flow_group_in, fg_in,
end_flow_index);
struct mlx5_flow_group *fg;
int err;
if (ft->autogroup.active)
return ERR_PTR(-EPERM);
down_write_ref_node(&ft->node, false);
fg = alloc_insert_flow_group(ft, match_criteria_enable, match_criteria,
start_index, end_index,
ft->node.children.prev);
up_write_ref_node(&ft->node, false);
if (IS_ERR(fg))
return fg;
err = root->cmds->create_flow_group(root, ft, fg_in, fg);
if (err) {
tree_put_node(&fg->node, false);
return ERR_PTR(err);
}
trace_mlx5_fs_add_fg(fg);
fg->node.active = true;
return fg;
}
static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest)
{
struct mlx5_flow_rule *rule;
rule = kzalloc(sizeof(*rule), GFP_KERNEL);
if (!rule)
return NULL;
INIT_LIST_HEAD(&rule->next_ft);
rule->node.type = FS_TYPE_FLOW_DEST;
if (dest)
memcpy(&rule->dest_attr, dest, sizeof(*dest));
return rule;
}
static struct mlx5_flow_handle *alloc_handle(int num_rules)
{
struct mlx5_flow_handle *handle;
treewide: Use struct_size() for kmalloc()-family One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; void *entry[]; }; instance = kmalloc(sizeof(struct foo) + sizeof(void *) * count, GFP_KERNEL); Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: instance = kmalloc(struct_size(instance, entry, count), GFP_KERNEL); This patch makes the changes for kmalloc()-family (and kvmalloc()-family) uses. It was done via automatic conversion with manual review for the "CHECKME" non-standard cases noted below, using the following Coccinelle script: // pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len * // sizeof *pkey_cache->table, GFP_KERNEL); @@ identifier alloc =~ "kmalloc|kzalloc|kvmalloc|kvzalloc"; expression GFP; identifier VAR, ELEMENT; expression COUNT; @@ - alloc(sizeof(*VAR) + COUNT * sizeof(*VAR->ELEMENT), GFP) + alloc(struct_size(VAR, ELEMENT, COUNT), GFP) // mr = kzalloc(sizeof(*mr) + m * sizeof(mr->map[0]), GFP_KERNEL); @@ identifier alloc =~ "kmalloc|kzalloc|kvmalloc|kvzalloc"; expression GFP; identifier VAR, ELEMENT; expression COUNT; @@ - alloc(sizeof(*VAR) + COUNT * sizeof(VAR->ELEMENT[0]), GFP) + alloc(struct_size(VAR, ELEMENT, COUNT), GFP) // Same pattern, but can't trivially locate the trailing element name, // or variable name. @@ identifier alloc =~ "kmalloc|kzalloc|kvmalloc|kvzalloc"; expression GFP; expression SOMETHING, COUNT, ELEMENT; @@ - alloc(sizeof(SOMETHING) + COUNT * sizeof(ELEMENT), GFP) + alloc(CHECKME_struct_size(&SOMETHING, ELEMENT, COUNT), GFP) Signed-off-by: Kees Cook <keescook@chromium.org>
2018-05-08 14:45:50 -06:00
handle = kzalloc(struct_size(handle, rule, num_rules), GFP_KERNEL);
if (!handle)
return NULL;
handle->num_rules = num_rules;
return handle;
}
static void destroy_flow_handle(struct fs_fte *fte,
struct mlx5_flow_handle *handle,
struct mlx5_flow_destination *dest,
int i)
{
for (; --i >= 0;) {
if (refcount_dec_and_test(&handle->rule[i]->node.refcount)) {
fte->dests_size--;
list_del(&handle->rule[i]->node.list);
kfree(handle->rule[i]);
}
}
kfree(handle);
}
static struct mlx5_flow_handle *
create_flow_handle(struct fs_fte *fte,
struct mlx5_flow_destination *dest,
int dest_num,
int *modify_mask,
bool *new_rule)
{
struct mlx5_flow_handle *handle;
struct mlx5_flow_rule *rule = NULL;
static int count = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
static int dst = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
int type;
int i = 0;
handle = alloc_handle((dest_num) ? dest_num : 1);
if (!handle)
return ERR_PTR(-ENOMEM);
do {
if (dest) {
rule = find_flow_rule(fte, dest + i);
if (rule) {
refcount_inc(&rule->node.refcount);
goto rule_found;
}
}
*new_rule = true;
rule = alloc_rule(dest + i);
if (!rule)
goto free_rules;
/* Add dest to dests list- we need flow tables to be in the
* end of the list for forward to next prio rules.
*/
tree_init_node(&rule->node, NULL, del_sw_hw_rule);
if (dest &&
dest[i].type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
list_add(&rule->node.list, &fte->node.children);
else
list_add_tail(&rule->node.list, &fte->node.children);
if (dest) {
fte->dests_size++;
type = dest[i].type ==
MLX5_FLOW_DESTINATION_TYPE_COUNTER;
*modify_mask |= type ? count : dst;
}
rule_found:
handle->rule[i] = rule;
} while (++i < dest_num);
return handle;
free_rules:
destroy_flow_handle(fte, handle, dest, i);
return ERR_PTR(-ENOMEM);
}
/* fte should not be deleted while calling this function */
static struct mlx5_flow_handle *
add_rule_fte(struct fs_fte *fte,
struct mlx5_flow_group *fg,
struct mlx5_flow_destination *dest,
int dest_num,
bool update_action)
{
struct mlx5_flow_root_namespace *root;
struct mlx5_flow_handle *handle;
struct mlx5_flow_table *ft;
int modify_mask = 0;
int err;
bool new_rule = false;
handle = create_flow_handle(fte, dest, dest_num, &modify_mask,
&new_rule);
if (IS_ERR(handle) || !new_rule)
goto out;
if (update_action)
modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
fs_get_obj(ft, fg->node.parent);
root = find_root(&fg->node);
if (!(fte->status & FS_FTE_STATUS_EXISTING))
err = root->cmds->create_fte(root, ft, fg, fte);
else
err = root->cmds->update_fte(root, ft, fg, modify_mask, fte);
if (err)
goto free_handle;
fte->node.active = true;
fte->status |= FS_FTE_STATUS_EXISTING;
atomic_inc(&fte->node.version);
out:
return handle;
free_handle:
destroy_flow_handle(fte, handle, dest, handle->num_rules);
return ERR_PTR(err);
}
static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft,
const struct mlx5_flow_spec *spec)
{
struct list_head *prev = &ft->node.children;
struct mlx5_flow_group *fg;
unsigned int candidate_index = 0;
unsigned int group_size = 0;
if (!ft->autogroup.active)
return ERR_PTR(-ENOENT);
if (ft->autogroup.num_groups < ft->autogroup.required_groups)
group_size = ft->autogroup.group_size;
/* ft->max_fte == ft->autogroup.max_types */
if (group_size == 0)
group_size = 1;
/* sorted by start_index */
fs_for_each_fg(fg, ft) {
if (candidate_index + group_size > fg->start_index)
candidate_index = fg->start_index + fg->max_ftes;
else
break;
prev = &fg->node.list;
}
if (candidate_index + group_size > ft->max_fte)
return ERR_PTR(-ENOSPC);
fg = alloc_insert_flow_group(ft,
spec->match_criteria_enable,
spec->match_criteria,
candidate_index,
candidate_index + group_size - 1,
prev);
if (IS_ERR(fg))
goto out;
if (group_size == ft->autogroup.group_size)
ft->autogroup.num_groups++;
out:
return fg;
}
static int create_auto_flow_group(struct mlx5_flow_table *ft,
struct mlx5_flow_group *fg)
{
struct mlx5_flow_root_namespace *root = find_root(&ft->node);
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
void *match_criteria_addr;
u8 src_esw_owner_mask_on;
void *misc;
int err;
u32 *in;
in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
MLX5_SET(create_flow_group_in, in, match_criteria_enable,
fg->mask.match_criteria_enable);
MLX5_SET(create_flow_group_in, in, start_flow_index, fg->start_index);
MLX5_SET(create_flow_group_in, in, end_flow_index, fg->start_index +
fg->max_ftes - 1);
misc = MLX5_ADDR_OF(fte_match_param, fg->mask.match_criteria,
misc_parameters);
src_esw_owner_mask_on = !!MLX5_GET(fte_match_set_misc, misc,
source_eswitch_owner_vhca_id);
MLX5_SET(create_flow_group_in, in,
source_eswitch_owner_vhca_id_valid, src_esw_owner_mask_on);
match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in,
in, match_criteria);
memcpy(match_criteria_addr, fg->mask.match_criteria,
sizeof(fg->mask.match_criteria));
err = root->cmds->create_flow_group(root, ft, in, fg);
if (!err) {
fg->node.active = true;
trace_mlx5_fs_add_fg(fg);
}
kvfree(in);
return err;
}
static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
struct mlx5_flow_destination *d2)
{
if (d1->type == d2->type) {
if ((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
d1->vport.num == d2->vport.num &&
d1->vport.flags == d2->vport.flags &&
((d1->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID) ?
(d1->vport.vhca_id == d2->vport.vhca_id) : true) &&
((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ?
(d1->vport.pkt_reformat->id ==
d2->vport.pkt_reformat->id) : true)) ||
(d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
d1->ft == d2->ft) ||
(d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
d1->tir_num == d2->tir_num) ||
(d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM &&
d1->ft_num == d2->ft_num))
return true;
}
return false;
}
static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte,
struct mlx5_flow_destination *dest)
{
struct mlx5_flow_rule *rule;
list_for_each_entry(rule, &fte->node.children, node.list) {
if (mlx5_flow_dests_cmp(&rule->dest_attr, dest))
return rule;
}
return NULL;
}
static bool check_conflicting_actions(u32 action1, u32 action2)
{
u32 xored_actions = action1 ^ action2;
/* if one rule only wants to count, it's ok */
if (action1 == MLX5_FLOW_CONTEXT_ACTION_COUNT ||
action2 == MLX5_FLOW_CONTEXT_ACTION_COUNT)
return false;
if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP |
MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
MLX5_FLOW_CONTEXT_ACTION_DECAP |
MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
MLX5_FLOW_CONTEXT_ACTION_VLAN_POP |
MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2 |
MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2))
return true;
return false;
}
static int check_conflicting_ftes(struct fs_fte *fte,
const struct mlx5_flow_context *flow_context,
const struct mlx5_flow_act *flow_act)
{
if (check_conflicting_actions(flow_act->action, fte->action.action)) {
mlx5_core_warn(get_dev(&fte->node),
"Found two FTEs with conflicting actions\n");
return -EEXIST;
}
if ((flow_context->flags & FLOW_CONTEXT_HAS_TAG) &&
fte->flow_context.flow_tag != flow_context->flow_tag) {
mlx5_core_warn(get_dev(&fte->node),
"FTE flow tag %u already exists with different flow tag %u\n",
fte->flow_context.flow_tag,
flow_context->flow_tag);
return -EEXIST;
}
return 0;
}
static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
const struct mlx5_flow_spec *spec,
struct mlx5_flow_act *flow_act,
struct mlx5_flow_destination *dest,
int dest_num,
struct fs_fte *fte)
{
struct mlx5_flow_handle *handle;
int old_action;
int i;
int ret;
ret = check_conflicting_ftes(fte, &spec->flow_context, flow_act);
if (ret)
return ERR_PTR(ret);
old_action = fte->action.action;
fte->action.action |= flow_act->action;
handle = add_rule_fte(fte, fg, dest, dest_num,
old_action != flow_act->action);
if (IS_ERR(handle)) {
fte->action.action = old_action;
return handle;
}
trace_mlx5_fs_set_fte(fte, false);
for (i = 0; i < handle->num_rules; i++) {
if (refcount_read(&handle->rule[i]->node.refcount) == 1) {
tree_add_node(&handle->rule[i]->node, &fte->node);
trace_mlx5_fs_add_rule(handle->rule[i]);
}
}
return handle;
}
static bool counter_is_valid(u32 action)
{
return (action & (MLX5_FLOW_CONTEXT_ACTION_DROP |
MLX5_FLOW_CONTEXT_ACTION_FWD_DEST));
}
static bool dest_is_valid(struct mlx5_flow_destination *dest,
u32 action,
struct mlx5_flow_table *ft)
{
if (dest && (dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER))
return counter_is_valid(action);
if (!(action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
return true;
if (!dest || ((dest->type ==
MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) &&
(dest->ft->level <= ft->level)))
return false;
return true;
}
struct match_list {
struct list_head list;
struct mlx5_flow_group *g;
};
struct match_list_head {
struct list_head list;
struct match_list first;
};
net/mlx5: Fix deadlock in fs_core [ Upstream commit c1948390d78b5183ee9b7dd831efd7f6ac496ab0 ] free_match_list could be called when the flow table is already locked. We need to pass this notation to tree_put_node. It fixes the following lockdep warnning: [ 1797.268537] ============================================ [ 1797.276837] WARNING: possible recursive locking detected [ 1797.285101] 5.5.0-rc5+ #10 Not tainted [ 1797.291641] -------------------------------------------- [ 1797.299917] handler10/9296 is trying to acquire lock: [ 1797.307885] ffff889ad399a0a0 (&node->lock){++++}, at: tree_put_node+0x1d5/0x210 [mlx5_core] [ 1797.319694] [ 1797.319694] but task is already holding lock: [ 1797.330904] ffff889ad399a0a0 (&node->lock){++++}, at: nested_down_write_ref_node.part.33+0x1a/0x60 [mlx5_core] [ 1797.344707] [ 1797.344707] other info that might help us debug this: [ 1797.356952] Possible unsafe locking scenario: [ 1797.356952] [ 1797.368333] CPU0 [ 1797.373357] ---- [ 1797.378364] lock(&node->lock); [ 1797.384222] lock(&node->lock); [ 1797.390031] [ 1797.390031] *** DEADLOCK *** [ 1797.390031] [ 1797.403003] May be due to missing lock nesting notation [ 1797.403003] [ 1797.414691] 3 locks held by handler10/9296: [ 1797.421465] #0: ffff889cf2c5a110 (&block->cb_lock){++++}, at: tc_setup_cb_add+0x70/0x250 [ 1797.432810] #1: ffff88a030081490 (&comp->sem){++++}, at: mlx5_devcom_get_peer_data+0x4c/0xb0 [mlx5_core] [ 1797.445829] #2: ffff889ad399a0a0 (&node->lock){++++}, at: nested_down_write_ref_node.part.33+0x1a/0x60 [mlx5_core] [ 1797.459913] [ 1797.459913] stack backtrace: [ 1797.469436] CPU: 1 PID: 9296 Comm: handler10 Kdump: loaded Not tainted 5.5.0-rc5+ #10 [ 1797.480643] Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.4.3 01/17/2017 [ 1797.491480] Call Trace: [ 1797.496701] dump_stack+0x96/0xe0 [ 1797.502864] __lock_acquire.cold.63+0xf8/0x212 [ 1797.510301] ? lockdep_hardirqs_on+0x250/0x250 [ 1797.517701] ? mark_held_locks+0x55/0xa0 [ 1797.524547] ? quarantine_put+0xb7/0x160 [ 1797.531422] ? lockdep_hardirqs_on+0x17d/0x250 [ 1797.538913] lock_acquire+0xd6/0x1f0 [ 1797.545529] ? tree_put_node+0x1d5/0x210 [mlx5_core] [ 1797.553701] down_write+0x94/0x140 [ 1797.560206] ? tree_put_node+0x1d5/0x210 [mlx5_core] [ 1797.568464] ? down_write_killable_nested+0x170/0x170 [ 1797.576925] ? del_hw_flow_group+0xde/0x1f0 [mlx5_core] [ 1797.585629] tree_put_node+0x1d5/0x210 [mlx5_core] [ 1797.593891] ? free_match_list.part.25+0x147/0x170 [mlx5_core] [ 1797.603389] free_match_list.part.25+0xe0/0x170 [mlx5_core] [ 1797.612654] _mlx5_add_flow_rules+0x17e2/0x20b0 [mlx5_core] [ 1797.621838] ? lock_acquire+0xd6/0x1f0 [ 1797.629028] ? esw_get_prio_table+0xb0/0x3e0 [mlx5_core] [ 1797.637981] ? alloc_insert_flow_group+0x420/0x420 [mlx5_core] [ 1797.647459] ? try_to_wake_up+0x4c7/0xc70 [ 1797.654881] ? lock_downgrade+0x350/0x350 [ 1797.662271] ? __mutex_unlock_slowpath+0xb1/0x3f0 [ 1797.670396] ? find_held_lock+0xac/0xd0 [ 1797.677540] ? mlx5_add_flow_rules+0xdc/0x360 [mlx5_core] [ 1797.686467] mlx5_add_flow_rules+0xdc/0x360 [mlx5_core] [ 1797.695134] ? _mlx5_add_flow_rules+0x20b0/0x20b0 [mlx5_core] [ 1797.704270] ? irq_exit+0xa5/0x170 [ 1797.710764] ? retint_kernel+0x10/0x10 [ 1797.717698] ? mlx5_eswitch_set_rule_source_port.isra.9+0x122/0x230 [mlx5_core] [ 1797.728708] mlx5_eswitch_add_offloaded_rule+0x465/0x6d0 [mlx5_core] [ 1797.738713] ? mlx5_eswitch_get_prio_range+0x30/0x30 [mlx5_core] [ 1797.748384] ? mlx5_fc_stats_work+0x670/0x670 [mlx5_core] [ 1797.757400] mlx5e_tc_offload_fdb_rules.isra.27+0x24/0x90 [mlx5_core] [ 1797.767665] mlx5e_tc_add_fdb_flow+0xaf8/0xd40 [mlx5_core] [ 1797.776886] ? mlx5e_encap_put+0xd0/0xd0 [mlx5_core] [ 1797.785562] ? mlx5e_alloc_flow.isra.43+0x18c/0x1c0 [mlx5_core] [ 1797.795353] __mlx5e_add_fdb_flow+0x2e2/0x440 [mlx5_core] [ 1797.804558] ? mlx5e_tc_update_neigh_used_value+0x8c0/0x8c0 [mlx5_core] [ 1797.815093] ? wait_for_completion+0x260/0x260 [ 1797.823272] mlx5e_configure_flower+0xe94/0x1620 [mlx5_core] [ 1797.832792] ? __mlx5e_add_fdb_flow+0x440/0x440 [mlx5_core] [ 1797.842096] ? down_read+0x11a/0x2e0 [ 1797.849090] ? down_write+0x140/0x140 [ 1797.856142] ? mlx5e_rep_indr_setup_block_cb+0xc0/0xc0 [mlx5_core] [ 1797.866027] tc_setup_cb_add+0x11a/0x250 [ 1797.873339] fl_hw_replace_filter+0x25e/0x320 [cls_flower] [ 1797.882385] ? fl_hw_destroy_filter+0x1c0/0x1c0 [cls_flower] [ 1797.891607] fl_change+0x1d54/0x1fb6 [cls_flower] [ 1797.899772] ? __rhashtable_insert_fast.constprop.50+0x9f0/0x9f0 [cls_flower] [ 1797.910728] ? lock_downgrade+0x350/0x350 [ 1797.918187] ? __radix_tree_lookup+0xa5/0x130 [ 1797.926046] ? fl_set_key+0x1590/0x1590 [cls_flower] [ 1797.934611] ? __rhashtable_insert_fast.constprop.50+0x9f0/0x9f0 [cls_flower] [ 1797.945673] tc_new_tfilter+0xcd1/0x1240 [ 1797.953138] ? tc_del_tfilter+0xb10/0xb10 [ 1797.960688] ? avc_has_perm_noaudit+0x92/0x320 [ 1797.968721] ? avc_has_perm_noaudit+0x1df/0x320 [ 1797.976816] ? avc_has_extended_perms+0x990/0x990 [ 1797.985090] ? mark_lock+0xaa/0x9e0 [ 1797.991988] ? match_held_lock+0x1b/0x240 [ 1797.999457] ? match_held_lock+0x1b/0x240 [ 1798.006859] ? find_held_lock+0xac/0xd0 [ 1798.014045] ? symbol_put_addr+0x40/0x40 [ 1798.021317] ? rcu_read_lock_sched_held+0xd0/0xd0 [ 1798.029460] ? tc_del_tfilter+0xb10/0xb10 [ 1798.036810] rtnetlink_rcv_msg+0x4d5/0x620 [ 1798.044236] ? rtnl_bridge_getlink+0x460/0x460 [ 1798.052034] ? lockdep_hardirqs_on+0x250/0x250 [ 1798.059837] ? match_held_lock+0x1b/0x240 [ 1798.067146] ? find_held_lock+0xac/0xd0 [ 1798.074246] netlink_rcv_skb+0xc6/0x1f0 [ 1798.081339] ? rtnl_bridge_getlink+0x460/0x460 [ 1798.089104] ? netlink_ack+0x440/0x440 [ 1798.096061] netlink_unicast+0x2d4/0x3b0 [ 1798.103189] ? netlink_attachskb+0x3f0/0x3f0 [ 1798.110724] ? _copy_from_iter_full+0xda/0x370 [ 1798.118415] netlink_sendmsg+0x3ba/0x6a0 [ 1798.125478] ? netlink_unicast+0x3b0/0x3b0 [ 1798.132705] ? netlink_unicast+0x3b0/0x3b0 [ 1798.139880] sock_sendmsg+0x94/0xa0 [ 1798.146332] ____sys_sendmsg+0x36c/0x3f0 [ 1798.153251] ? copy_msghdr_from_user+0x165/0x230 [ 1798.160941] ? kernel_sendmsg+0x30/0x30 [ 1798.167738] ___sys_sendmsg+0xeb/0x150 [ 1798.174411] ? sendmsg_copy_msghdr+0x30/0x30 [ 1798.181649] ? lock_downgrade+0x350/0x350 [ 1798.188559] ? rcu_read_lock_sched_held+0xd0/0xd0 [ 1798.196239] ? __fget+0x21d/0x320 [ 1798.202335] ? do_dup2+0x2a0/0x2a0 [ 1798.208499] ? lock_downgrade+0x350/0x350 [ 1798.215366] ? __fget_light+0xd6/0xf0 [ 1798.221808] ? syscall_trace_enter+0x369/0x5d0 [ 1798.229112] __sys_sendmsg+0xd3/0x160 [ 1798.235511] ? __sys_sendmsg_sock+0x60/0x60 [ 1798.242478] ? syscall_trace_enter+0x233/0x5d0 [ 1798.249721] ? syscall_slow_exit_work+0x280/0x280 [ 1798.257211] ? do_syscall_64+0x1e/0x2e0 [ 1798.263680] do_syscall_64+0x72/0x2e0 [ 1798.269950] entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: bd71b08ec2ee ("net/mlx5: Support multiple updates of steering rules in parallel") Signed-off-by: Maor Gottlieb <maorg@mellanox.com> Signed-off-by: Alaa Hleihel <alaa@mellanox.com> Reviewed-by: Mark Bloch <markb@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2020-01-27 00:27:51 -07:00
static void free_match_list(struct match_list_head *head, bool ft_locked)
{
if (!list_empty(&head->list)) {
struct match_list *iter, *match_tmp;
list_del(&head->first.list);
net/mlx5: Fix deadlock in fs_core [ Upstream commit c1948390d78b5183ee9b7dd831efd7f6ac496ab0 ] free_match_list could be called when the flow table is already locked. We need to pass this notation to tree_put_node. It fixes the following lockdep warnning: [ 1797.268537] ============================================ [ 1797.276837] WARNING: possible recursive locking detected [ 1797.285101] 5.5.0-rc5+ #10 Not tainted [ 1797.291641] -------------------------------------------- [ 1797.299917] handler10/9296 is trying to acquire lock: [ 1797.307885] ffff889ad399a0a0 (&node->lock){++++}, at: tree_put_node+0x1d5/0x210 [mlx5_core] [ 1797.319694] [ 1797.319694] but task is already holding lock: [ 1797.330904] ffff889ad399a0a0 (&node->lock){++++}, at: nested_down_write_ref_node.part.33+0x1a/0x60 [mlx5_core] [ 1797.344707] [ 1797.344707] other info that might help us debug this: [ 1797.356952] Possible unsafe locking scenario: [ 1797.356952] [ 1797.368333] CPU0 [ 1797.373357] ---- [ 1797.378364] lock(&node->lock); [ 1797.384222] lock(&node->lock); [ 1797.390031] [ 1797.390031] *** DEADLOCK *** [ 1797.390031] [ 1797.403003] May be due to missing lock nesting notation [ 1797.403003] [ 1797.414691] 3 locks held by handler10/9296: [ 1797.421465] #0: ffff889cf2c5a110 (&block->cb_lock){++++}, at: tc_setup_cb_add+0x70/0x250 [ 1797.432810] #1: ffff88a030081490 (&comp->sem){++++}, at: mlx5_devcom_get_peer_data+0x4c/0xb0 [mlx5_core] [ 1797.445829] #2: ffff889ad399a0a0 (&node->lock){++++}, at: nested_down_write_ref_node.part.33+0x1a/0x60 [mlx5_core] [ 1797.459913] [ 1797.459913] stack backtrace: [ 1797.469436] CPU: 1 PID: 9296 Comm: handler10 Kdump: loaded Not tainted 5.5.0-rc5+ #10 [ 1797.480643] Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.4.3 01/17/2017 [ 1797.491480] Call Trace: [ 1797.496701] dump_stack+0x96/0xe0 [ 1797.502864] __lock_acquire.cold.63+0xf8/0x212 [ 1797.510301] ? lockdep_hardirqs_on+0x250/0x250 [ 1797.517701] ? mark_held_locks+0x55/0xa0 [ 1797.524547] ? quarantine_put+0xb7/0x160 [ 1797.531422] ? lockdep_hardirqs_on+0x17d/0x250 [ 1797.538913] lock_acquire+0xd6/0x1f0 [ 1797.545529] ? tree_put_node+0x1d5/0x210 [mlx5_core] [ 1797.553701] down_write+0x94/0x140 [ 1797.560206] ? tree_put_node+0x1d5/0x210 [mlx5_core] [ 1797.568464] ? down_write_killable_nested+0x170/0x170 [ 1797.576925] ? del_hw_flow_group+0xde/0x1f0 [mlx5_core] [ 1797.585629] tree_put_node+0x1d5/0x210 [mlx5_core] [ 1797.593891] ? free_match_list.part.25+0x147/0x170 [mlx5_core] [ 1797.603389] free_match_list.part.25+0xe0/0x170 [mlx5_core] [ 1797.612654] _mlx5_add_flow_rules+0x17e2/0x20b0 [mlx5_core] [ 1797.621838] ? lock_acquire+0xd6/0x1f0 [ 1797.629028] ? esw_get_prio_table+0xb0/0x3e0 [mlx5_core] [ 1797.637981] ? alloc_insert_flow_group+0x420/0x420 [mlx5_core] [ 1797.647459] ? try_to_wake_up+0x4c7/0xc70 [ 1797.654881] ? lock_downgrade+0x350/0x350 [ 1797.662271] ? __mutex_unlock_slowpath+0xb1/0x3f0 [ 1797.670396] ? find_held_lock+0xac/0xd0 [ 1797.677540] ? mlx5_add_flow_rules+0xdc/0x360 [mlx5_core] [ 1797.686467] mlx5_add_flow_rules+0xdc/0x360 [mlx5_core] [ 1797.695134] ? _mlx5_add_flow_rules+0x20b0/0x20b0 [mlx5_core] [ 1797.704270] ? irq_exit+0xa5/0x170 [ 1797.710764] ? retint_kernel+0x10/0x10 [ 1797.717698] ? mlx5_eswitch_set_rule_source_port.isra.9+0x122/0x230 [mlx5_core] [ 1797.728708] mlx5_eswitch_add_offloaded_rule+0x465/0x6d0 [mlx5_core] [ 1797.738713] ? mlx5_eswitch_get_prio_range+0x30/0x30 [mlx5_core] [ 1797.748384] ? mlx5_fc_stats_work+0x670/0x670 [mlx5_core] [ 1797.757400] mlx5e_tc_offload_fdb_rules.isra.27+0x24/0x90 [mlx5_core] [ 1797.767665] mlx5e_tc_add_fdb_flow+0xaf8/0xd40 [mlx5_core] [ 1797.776886] ? mlx5e_encap_put+0xd0/0xd0 [mlx5_core] [ 1797.785562] ? mlx5e_alloc_flow.isra.43+0x18c/0x1c0 [mlx5_core] [ 1797.795353] __mlx5e_add_fdb_flow+0x2e2/0x440 [mlx5_core] [ 1797.804558] ? mlx5e_tc_update_neigh_used_value+0x8c0/0x8c0 [mlx5_core] [ 1797.815093] ? wait_for_completion+0x260/0x260 [ 1797.823272] mlx5e_configure_flower+0xe94/0x1620 [mlx5_core] [ 1797.832792] ? __mlx5e_add_fdb_flow+0x440/0x440 [mlx5_core] [ 1797.842096] ? down_read+0x11a/0x2e0 [ 1797.849090] ? down_write+0x140/0x140 [ 1797.856142] ? mlx5e_rep_indr_setup_block_cb+0xc0/0xc0 [mlx5_core] [ 1797.866027] tc_setup_cb_add+0x11a/0x250 [ 1797.873339] fl_hw_replace_filter+0x25e/0x320 [cls_flower] [ 1797.882385] ? fl_hw_destroy_filter+0x1c0/0x1c0 [cls_flower] [ 1797.891607] fl_change+0x1d54/0x1fb6 [cls_flower] [ 1797.899772] ? __rhashtable_insert_fast.constprop.50+0x9f0/0x9f0 [cls_flower] [ 1797.910728] ? lock_downgrade+0x350/0x350 [ 1797.918187] ? __radix_tree_lookup+0xa5/0x130 [ 1797.926046] ? fl_set_key+0x1590/0x1590 [cls_flower] [ 1797.934611] ? __rhashtable_insert_fast.constprop.50+0x9f0/0x9f0 [cls_flower] [ 1797.945673] tc_new_tfilter+0xcd1/0x1240 [ 1797.953138] ? tc_del_tfilter+0xb10/0xb10 [ 1797.960688] ? avc_has_perm_noaudit+0x92/0x320 [ 1797.968721] ? avc_has_perm_noaudit+0x1df/0x320 [ 1797.976816] ? avc_has_extended_perms+0x990/0x990 [ 1797.985090] ? mark_lock+0xaa/0x9e0 [ 1797.991988] ? match_held_lock+0x1b/0x240 [ 1797.999457] ? match_held_lock+0x1b/0x240 [ 1798.006859] ? find_held_lock+0xac/0xd0 [ 1798.014045] ? symbol_put_addr+0x40/0x40 [ 1798.021317] ? rcu_read_lock_sched_held+0xd0/0xd0 [ 1798.029460] ? tc_del_tfilter+0xb10/0xb10 [ 1798.036810] rtnetlink_rcv_msg+0x4d5/0x620 [ 1798.044236] ? rtnl_bridge_getlink+0x460/0x460 [ 1798.052034] ? lockdep_hardirqs_on+0x250/0x250 [ 1798.059837] ? match_held_lock+0x1b/0x240 [ 1798.067146] ? find_held_lock+0xac/0xd0 [ 1798.074246] netlink_rcv_skb+0xc6/0x1f0 [ 1798.081339] ? rtnl_bridge_getlink+0x460/0x460 [ 1798.089104] ? netlink_ack+0x440/0x440 [ 1798.096061] netlink_unicast+0x2d4/0x3b0 [ 1798.103189] ? netlink_attachskb+0x3f0/0x3f0 [ 1798.110724] ? _copy_from_iter_full+0xda/0x370 [ 1798.118415] netlink_sendmsg+0x3ba/0x6a0 [ 1798.125478] ? netlink_unicast+0x3b0/0x3b0 [ 1798.132705] ? netlink_unicast+0x3b0/0x3b0 [ 1798.139880] sock_sendmsg+0x94/0xa0 [ 1798.146332] ____sys_sendmsg+0x36c/0x3f0 [ 1798.153251] ? copy_msghdr_from_user+0x165/0x230 [ 1798.160941] ? kernel_sendmsg+0x30/0x30 [ 1798.167738] ___sys_sendmsg+0xeb/0x150 [ 1798.174411] ? sendmsg_copy_msghdr+0x30/0x30 [ 1798.181649] ? lock_downgrade+0x350/0x350 [ 1798.188559] ? rcu_read_lock_sched_held+0xd0/0xd0 [ 1798.196239] ? __fget+0x21d/0x320 [ 1798.202335] ? do_dup2+0x2a0/0x2a0 [ 1798.208499] ? lock_downgrade+0x350/0x350 [ 1798.215366] ? __fget_light+0xd6/0xf0 [ 1798.221808] ? syscall_trace_enter+0x369/0x5d0 [ 1798.229112] __sys_sendmsg+0xd3/0x160 [ 1798.235511] ? __sys_sendmsg_sock+0x60/0x60 [ 1798.242478] ? syscall_trace_enter+0x233/0x5d0 [ 1798.249721] ? syscall_slow_exit_work+0x280/0x280 [ 1798.257211] ? do_syscall_64+0x1e/0x2e0 [ 1798.263680] do_syscall_64+0x72/0x2e0 [ 1798.269950] entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: bd71b08ec2ee ("net/mlx5: Support multiple updates of steering rules in parallel") Signed-off-by: Maor Gottlieb <maorg@mellanox.com> Signed-off-by: Alaa Hleihel <alaa@mellanox.com> Reviewed-by: Mark Bloch <markb@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2020-01-27 00:27:51 -07:00
tree_put_node(&head->first.g->node, ft_locked);
list_for_each_entry_safe(iter, match_tmp, &head->list,
list) {
net/mlx5: Fix deadlock in fs_core [ Upstream commit c1948390d78b5183ee9b7dd831efd7f6ac496ab0 ] free_match_list could be called when the flow table is already locked. We need to pass this notation to tree_put_node. It fixes the following lockdep warnning: [ 1797.268537] ============================================ [ 1797.276837] WARNING: possible recursive locking detected [ 1797.285101] 5.5.0-rc5+ #10 Not tainted [ 1797.291641] -------------------------------------------- [ 1797.299917] handler10/9296 is trying to acquire lock: [ 1797.307885] ffff889ad399a0a0 (&node->lock){++++}, at: tree_put_node+0x1d5/0x210 [mlx5_core] [ 1797.319694] [ 1797.319694] but task is already holding lock: [ 1797.330904] ffff889ad399a0a0 (&node->lock){++++}, at: nested_down_write_ref_node.part.33+0x1a/0x60 [mlx5_core] [ 1797.344707] [ 1797.344707] other info that might help us debug this: [ 1797.356952] Possible unsafe locking scenario: [ 1797.356952] [ 1797.368333] CPU0 [ 1797.373357] ---- [ 1797.378364] lock(&node->lock); [ 1797.384222] lock(&node->lock); [ 1797.390031] [ 1797.390031] *** DEADLOCK *** [ 1797.390031] [ 1797.403003] May be due to missing lock nesting notation [ 1797.403003] [ 1797.414691] 3 locks held by handler10/9296: [ 1797.421465] #0: ffff889cf2c5a110 (&block->cb_lock){++++}, at: tc_setup_cb_add+0x70/0x250 [ 1797.432810] #1: ffff88a030081490 (&comp->sem){++++}, at: mlx5_devcom_get_peer_data+0x4c/0xb0 [mlx5_core] [ 1797.445829] #2: ffff889ad399a0a0 (&node->lock){++++}, at: nested_down_write_ref_node.part.33+0x1a/0x60 [mlx5_core] [ 1797.459913] [ 1797.459913] stack backtrace: [ 1797.469436] CPU: 1 PID: 9296 Comm: handler10 Kdump: loaded Not tainted 5.5.0-rc5+ #10 [ 1797.480643] Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.4.3 01/17/2017 [ 1797.491480] Call Trace: [ 1797.496701] dump_stack+0x96/0xe0 [ 1797.502864] __lock_acquire.cold.63+0xf8/0x212 [ 1797.510301] ? lockdep_hardirqs_on+0x250/0x250 [ 1797.517701] ? mark_held_locks+0x55/0xa0 [ 1797.524547] ? quarantine_put+0xb7/0x160 [ 1797.531422] ? lockdep_hardirqs_on+0x17d/0x250 [ 1797.538913] lock_acquire+0xd6/0x1f0 [ 1797.545529] ? tree_put_node+0x1d5/0x210 [mlx5_core] [ 1797.553701] down_write+0x94/0x140 [ 1797.560206] ? tree_put_node+0x1d5/0x210 [mlx5_core] [ 1797.568464] ? down_write_killable_nested+0x170/0x170 [ 1797.576925] ? del_hw_flow_group+0xde/0x1f0 [mlx5_core] [ 1797.585629] tree_put_node+0x1d5/0x210 [mlx5_core] [ 1797.593891] ? free_match_list.part.25+0x147/0x170 [mlx5_core] [ 1797.603389] free_match_list.part.25+0xe0/0x170 [mlx5_core] [ 1797.612654] _mlx5_add_flow_rules+0x17e2/0x20b0 [mlx5_core] [ 1797.621838] ? lock_acquire+0xd6/0x1f0 [ 1797.629028] ? esw_get_prio_table+0xb0/0x3e0 [mlx5_core] [ 1797.637981] ? alloc_insert_flow_group+0x420/0x420 [mlx5_core] [ 1797.647459] ? try_to_wake_up+0x4c7/0xc70 [ 1797.654881] ? lock_downgrade+0x350/0x350 [ 1797.662271] ? __mutex_unlock_slowpath+0xb1/0x3f0 [ 1797.670396] ? find_held_lock+0xac/0xd0 [ 1797.677540] ? mlx5_add_flow_rules+0xdc/0x360 [mlx5_core] [ 1797.686467] mlx5_add_flow_rules+0xdc/0x360 [mlx5_core] [ 1797.695134] ? _mlx5_add_flow_rules+0x20b0/0x20b0 [mlx5_core] [ 1797.704270] ? irq_exit+0xa5/0x170 [ 1797.710764] ? retint_kernel+0x10/0x10 [ 1797.717698] ? mlx5_eswitch_set_rule_source_port.isra.9+0x122/0x230 [mlx5_core] [ 1797.728708] mlx5_eswitch_add_offloaded_rule+0x465/0x6d0 [mlx5_core] [ 1797.738713] ? mlx5_eswitch_get_prio_range+0x30/0x30 [mlx5_core] [ 1797.748384] ? mlx5_fc_stats_work+0x670/0x670 [mlx5_core] [ 1797.757400] mlx5e_tc_offload_fdb_rules.isra.27+0x24/0x90 [mlx5_core] [ 1797.767665] mlx5e_tc_add_fdb_flow+0xaf8/0xd40 [mlx5_core] [ 1797.776886] ? mlx5e_encap_put+0xd0/0xd0 [mlx5_core] [ 1797.785562] ? mlx5e_alloc_flow.isra.43+0x18c/0x1c0 [mlx5_core] [ 1797.795353] __mlx5e_add_fdb_flow+0x2e2/0x440 [mlx5_core] [ 1797.804558] ? mlx5e_tc_update_neigh_used_value+0x8c0/0x8c0 [mlx5_core] [ 1797.815093] ? wait_for_completion+0x260/0x260 [ 1797.823272] mlx5e_configure_flower+0xe94/0x1620 [mlx5_core] [ 1797.832792] ? __mlx5e_add_fdb_flow+0x440/0x440 [mlx5_core] [ 1797.842096] ? down_read+0x11a/0x2e0 [ 1797.849090] ? down_write+0x140/0x140 [ 1797.856142] ? mlx5e_rep_indr_setup_block_cb+0xc0/0xc0 [mlx5_core] [ 1797.866027] tc_setup_cb_add+0x11a/0x250 [ 1797.873339] fl_hw_replace_filter+0x25e/0x320 [cls_flower] [ 1797.882385] ? fl_hw_destroy_filter+0x1c0/0x1c0 [cls_flower] [ 1797.891607] fl_change+0x1d54/0x1fb6 [cls_flower] [ 1797.899772] ? __rhashtable_insert_fast.constprop.50+0x9f0/0x9f0 [cls_flower] [ 1797.910728] ? lock_downgrade+0x350/0x350 [ 1797.918187] ? __radix_tree_lookup+0xa5/0x130 [ 1797.926046] ? fl_set_key+0x1590/0x1590 [cls_flower] [ 1797.934611] ? __rhashtable_insert_fast.constprop.50+0x9f0/0x9f0 [cls_flower] [ 1797.945673] tc_new_tfilter+0xcd1/0x1240 [ 1797.953138] ? tc_del_tfilter+0xb10/0xb10 [ 1797.960688] ? avc_has_perm_noaudit+0x92/0x320 [ 1797.968721] ? avc_has_perm_noaudit+0x1df/0x320 [ 1797.976816] ? avc_has_extended_perms+0x990/0x990 [ 1797.985090] ? mark_lock+0xaa/0x9e0 [ 1797.991988] ? match_held_lock+0x1b/0x240 [ 1797.999457] ? match_held_lock+0x1b/0x240 [ 1798.006859] ? find_held_lock+0xac/0xd0 [ 1798.014045] ? symbol_put_addr+0x40/0x40 [ 1798.021317] ? rcu_read_lock_sched_held+0xd0/0xd0 [ 1798.029460] ? tc_del_tfilter+0xb10/0xb10 [ 1798.036810] rtnetlink_rcv_msg+0x4d5/0x620 [ 1798.044236] ? rtnl_bridge_getlink+0x460/0x460 [ 1798.052034] ? lockdep_hardirqs_on+0x250/0x250 [ 1798.059837] ? match_held_lock+0x1b/0x240 [ 1798.067146] ? find_held_lock+0xac/0xd0 [ 1798.074246] netlink_rcv_skb+0xc6/0x1f0 [ 1798.081339] ? rtnl_bridge_getlink+0x460/0x460 [ 1798.089104] ? netlink_ack+0x440/0x440 [ 1798.096061] netlink_unicast+0x2d4/0x3b0 [ 1798.103189] ? netlink_attachskb+0x3f0/0x3f0 [ 1798.110724] ? _copy_from_iter_full+0xda/0x370 [ 1798.118415] netlink_sendmsg+0x3ba/0x6a0 [ 1798.125478] ? netlink_unicast+0x3b0/0x3b0 [ 1798.132705] ? netlink_unicast+0x3b0/0x3b0 [ 1798.139880] sock_sendmsg+0x94/0xa0 [ 1798.146332] ____sys_sendmsg+0x36c/0x3f0 [ 1798.153251] ? copy_msghdr_from_user+0x165/0x230 [ 1798.160941] ? kernel_sendmsg+0x30/0x30 [ 1798.167738] ___sys_sendmsg+0xeb/0x150 [ 1798.174411] ? sendmsg_copy_msghdr+0x30/0x30 [ 1798.181649] ? lock_downgrade+0x350/0x350 [ 1798.188559] ? rcu_read_lock_sched_held+0xd0/0xd0 [ 1798.196239] ? __fget+0x21d/0x320 [ 1798.202335] ? do_dup2+0x2a0/0x2a0 [ 1798.208499] ? lock_downgrade+0x350/0x350 [ 1798.215366] ? __fget_light+0xd6/0xf0 [ 1798.221808] ? syscall_trace_enter+0x369/0x5d0 [ 1798.229112] __sys_sendmsg+0xd3/0x160 [ 1798.235511] ? __sys_sendmsg_sock+0x60/0x60 [ 1798.242478] ? syscall_trace_enter+0x233/0x5d0 [ 1798.249721] ? syscall_slow_exit_work+0x280/0x280 [ 1798.257211] ? do_syscall_64+0x1e/0x2e0 [ 1798.263680] do_syscall_64+0x72/0x2e0 [ 1798.269950] entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: bd71b08ec2ee ("net/mlx5: Support multiple updates of steering rules in parallel") Signed-off-by: Maor Gottlieb <maorg@mellanox.com> Signed-off-by: Alaa Hleihel <alaa@mellanox.com> Reviewed-by: Mark Bloch <markb@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2020-01-27 00:27:51 -07:00
tree_put_node(&iter->g->node, ft_locked);
list_del(&iter->list);
kfree(iter);
}
}
}
static int build_match_list(struct match_list_head *match_head,
struct mlx5_flow_table *ft,
net/mlx5: Fix deadlock in fs_core [ Upstream commit c1948390d78b5183ee9b7dd831efd7f6ac496ab0 ] free_match_list could be called when the flow table is already locked. We need to pass this notation to tree_put_node. It fixes the following lockdep warnning: [ 1797.268537] ============================================ [ 1797.276837] WARNING: possible recursive locking detected [ 1797.285101] 5.5.0-rc5+ #10 Not tainted [ 1797.291641] -------------------------------------------- [ 1797.299917] handler10/9296 is trying to acquire lock: [ 1797.307885] ffff889ad399a0a0 (&node->lock){++++}, at: tree_put_node+0x1d5/0x210 [mlx5_core] [ 1797.319694] [ 1797.319694] but task is already holding lock: [ 1797.330904] ffff889ad399a0a0 (&node->lock){++++}, at: nested_down_write_ref_node.part.33+0x1a/0x60 [mlx5_core] [ 1797.344707] [ 1797.344707] other info that might help us debug this: [ 1797.356952] Possible unsafe locking scenario: [ 1797.356952] [ 1797.368333] CPU0 [ 1797.373357] ---- [ 1797.378364] lock(&node->lock); [ 1797.384222] lock(&node->lock); [ 1797.390031] [ 1797.390031] *** DEADLOCK *** [ 1797.390031] [ 1797.403003] May be due to missing lock nesting notation [ 1797.403003] [ 1797.414691] 3 locks held by handler10/9296: [ 1797.421465] #0: ffff889cf2c5a110 (&block->cb_lock){++++}, at: tc_setup_cb_add+0x70/0x250 [ 1797.432810] #1: ffff88a030081490 (&comp->sem){++++}, at: mlx5_devcom_get_peer_data+0x4c/0xb0 [mlx5_core] [ 1797.445829] #2: ffff889ad399a0a0 (&node->lock){++++}, at: nested_down_write_ref_node.part.33+0x1a/0x60 [mlx5_core] [ 1797.459913] [ 1797.459913] stack backtrace: [ 1797.469436] CPU: 1 PID: 9296 Comm: handler10 Kdump: loaded Not tainted 5.5.0-rc5+ #10 [ 1797.480643] Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.4.3 01/17/2017 [ 1797.491480] Call Trace: [ 1797.496701] dump_stack+0x96/0xe0 [ 1797.502864] __lock_acquire.cold.63+0xf8/0x212 [ 1797.510301] ? lockdep_hardirqs_on+0x250/0x250 [ 1797.517701] ? mark_held_locks+0x55/0xa0 [ 1797.524547] ? quarantine_put+0xb7/0x160 [ 1797.531422] ? lockdep_hardirqs_on+0x17d/0x250 [ 1797.538913] lock_acquire+0xd6/0x1f0 [ 1797.545529] ? tree_put_node+0x1d5/0x210 [mlx5_core] [ 1797.553701] down_write+0x94/0x140 [ 1797.560206] ? tree_put_node+0x1d5/0x210 [mlx5_core] [ 1797.568464] ? down_write_killable_nested+0x170/0x170 [ 1797.576925] ? del_hw_flow_group+0xde/0x1f0 [mlx5_core] [ 1797.585629] tree_put_node+0x1d5/0x210 [mlx5_core] [ 1797.593891] ? free_match_list.part.25+0x147/0x170 [mlx5_core] [ 1797.603389] free_match_list.part.25+0xe0/0x170 [mlx5_core] [ 1797.612654] _mlx5_add_flow_rules+0x17e2/0x20b0 [mlx5_core] [ 1797.621838] ? lock_acquire+0xd6/0x1f0 [ 1797.629028] ? esw_get_prio_table+0xb0/0x3e0 [mlx5_core] [ 1797.637981] ? alloc_insert_flow_group+0x420/0x420 [mlx5_core] [ 1797.647459] ? try_to_wake_up+0x4c7/0xc70 [ 1797.654881] ? lock_downgrade+0x350/0x350 [ 1797.662271] ? __mutex_unlock_slowpath+0xb1/0x3f0 [ 1797.670396] ? find_held_lock+0xac/0xd0 [ 1797.677540] ? mlx5_add_flow_rules+0xdc/0x360 [mlx5_core] [ 1797.686467] mlx5_add_flow_rules+0xdc/0x360 [mlx5_core] [ 1797.695134] ? _mlx5_add_flow_rules+0x20b0/0x20b0 [mlx5_core] [ 1797.704270] ? irq_exit+0xa5/0x170 [ 1797.710764] ? retint_kernel+0x10/0x10 [ 1797.717698] ? mlx5_eswitch_set_rule_source_port.isra.9+0x122/0x230 [mlx5_core] [ 1797.728708] mlx5_eswitch_add_offloaded_rule+0x465/0x6d0 [mlx5_core] [ 1797.738713] ? mlx5_eswitch_get_prio_range+0x30/0x30 [mlx5_core] [ 1797.748384] ? mlx5_fc_stats_work+0x670/0x670 [mlx5_core] [ 1797.757400] mlx5e_tc_offload_fdb_rules.isra.27+0x24/0x90 [mlx5_core] [ 1797.767665] mlx5e_tc_add_fdb_flow+0xaf8/0xd40 [mlx5_core] [ 1797.776886] ? mlx5e_encap_put+0xd0/0xd0 [mlx5_core] [ 1797.785562] ? mlx5e_alloc_flow.isra.43+0x18c/0x1c0 [mlx5_core] [ 1797.795353] __mlx5e_add_fdb_flow+0x2e2/0x440 [mlx5_core] [ 1797.804558] ? mlx5e_tc_update_neigh_used_value+0x8c0/0x8c0 [mlx5_core] [ 1797.815093] ? wait_for_completion+0x260/0x260 [ 1797.823272] mlx5e_configure_flower+0xe94/0x1620 [mlx5_core] [ 1797.832792] ? __mlx5e_add_fdb_flow+0x440/0x440 [mlx5_core] [ 1797.842096] ? down_read+0x11a/0x2e0 [ 1797.849090] ? down_write+0x140/0x140 [ 1797.856142] ? mlx5e_rep_indr_setup_block_cb+0xc0/0xc0 [mlx5_core] [ 1797.866027] tc_setup_cb_add+0x11a/0x250 [ 1797.873339] fl_hw_replace_filter+0x25e/0x320 [cls_flower] [ 1797.882385] ? fl_hw_destroy_filter+0x1c0/0x1c0 [cls_flower] [ 1797.891607] fl_change+0x1d54/0x1fb6 [cls_flower] [ 1797.899772] ? __rhashtable_insert_fast.constprop.50+0x9f0/0x9f0 [cls_flower] [ 1797.910728] ? lock_downgrade+0x350/0x350 [ 1797.918187] ? __radix_tree_lookup+0xa5/0x130 [ 1797.926046] ? fl_set_key+0x1590/0x1590 [cls_flower] [ 1797.934611] ? __rhashtable_insert_fast.constprop.50+0x9f0/0x9f0 [cls_flower] [ 1797.945673] tc_new_tfilter+0xcd1/0x1240 [ 1797.953138] ? tc_del_tfilter+0xb10/0xb10 [ 1797.960688] ? avc_has_perm_noaudit+0x92/0x320 [ 1797.968721] ? avc_has_perm_noaudit+0x1df/0x320 [ 1797.976816] ? avc_has_extended_perms+0x990/0x990 [ 1797.985090] ? mark_lock+0xaa/0x9e0 [ 1797.991988] ? match_held_lock+0x1b/0x240 [ 1797.999457] ? match_held_lock+0x1b/0x240 [ 1798.006859] ? find_held_lock+0xac/0xd0 [ 1798.014045] ? symbol_put_addr+0x40/0x40 [ 1798.021317] ? rcu_read_lock_sched_held+0xd0/0xd0 [ 1798.029460] ? tc_del_tfilter+0xb10/0xb10 [ 1798.036810] rtnetlink_rcv_msg+0x4d5/0x620 [ 1798.044236] ? rtnl_bridge_getlink+0x460/0x460 [ 1798.052034] ? lockdep_hardirqs_on+0x250/0x250 [ 1798.059837] ? match_held_lock+0x1b/0x240 [ 1798.067146] ? find_held_lock+0xac/0xd0 [ 1798.074246] netlink_rcv_skb+0xc6/0x1f0 [ 1798.081339] ? rtnl_bridge_getlink+0x460/0x460 [ 1798.089104] ? netlink_ack+0x440/0x440 [ 1798.096061] netlink_unicast+0x2d4/0x3b0 [ 1798.103189] ? netlink_attachskb+0x3f0/0x3f0 [ 1798.110724] ? _copy_from_iter_full+0xda/0x370 [ 1798.118415] netlink_sendmsg+0x3ba/0x6a0 [ 1798.125478] ? netlink_unicast+0x3b0/0x3b0 [ 1798.132705] ? netlink_unicast+0x3b0/0x3b0 [ 1798.139880] sock_sendmsg+0x94/0xa0 [ 1798.146332] ____sys_sendmsg+0x36c/0x3f0 [ 1798.153251] ? copy_msghdr_from_user+0x165/0x230 [ 1798.160941] ? kernel_sendmsg+0x30/0x30 [ 1798.167738] ___sys_sendmsg+0xeb/0x150 [ 1798.174411] ? sendmsg_copy_msghdr+0x30/0x30 [ 1798.181649] ? lock_downgrade+0x350/0x350 [ 1798.188559] ? rcu_read_lock_sched_held+0xd0/0xd0 [ 1798.196239] ? __fget+0x21d/0x320 [ 1798.202335] ? do_dup2+0x2a0/0x2a0 [ 1798.208499] ? lock_downgrade+0x350/0x350 [ 1798.215366] ? __fget_light+0xd6/0xf0 [ 1798.221808] ? syscall_trace_enter+0x369/0x5d0 [ 1798.229112] __sys_sendmsg+0xd3/0x160 [ 1798.235511] ? __sys_sendmsg_sock+0x60/0x60 [ 1798.242478] ? syscall_trace_enter+0x233/0x5d0 [ 1798.249721] ? syscall_slow_exit_work+0x280/0x280 [ 1798.257211] ? do_syscall_64+0x1e/0x2e0 [ 1798.263680] do_syscall_64+0x72/0x2e0 [ 1798.269950] entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: bd71b08ec2ee ("net/mlx5: Support multiple updates of steering rules in parallel") Signed-off-by: Maor Gottlieb <maorg@mellanox.com> Signed-off-by: Alaa Hleihel <alaa@mellanox.com> Reviewed-by: Mark Bloch <markb@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2020-01-27 00:27:51 -07:00
const struct mlx5_flow_spec *spec,
bool ft_locked)
{
struct rhlist_head *tmp, *list;
struct mlx5_flow_group *g;
int err = 0;
rcu_read_lock();
INIT_LIST_HEAD(&match_head->list);
/* Collect all fgs which has a matching match_criteria */
list = rhltable_lookup(&ft->fgs_hash, spec, rhash_fg);
/* RCU is atomic, we can't execute FW commands here */
rhl_for_each_entry_rcu(g, tmp, list, hash) {
struct match_list *curr_match;
if (likely(list_empty(&match_head->list))) {
if (!tree_get_node(&g->node))
continue;
match_head->first.g = g;
list_add_tail(&match_head->first.list,
&match_head->list);
continue;
}
curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC);
if (!curr_match) {
net/mlx5: Fix deadlock in fs_core [ Upstream commit c1948390d78b5183ee9b7dd831efd7f6ac496ab0 ] free_match_list could be called when the flow table is already locked. We need to pass this notation to tree_put_node. It fixes the following lockdep warnning: [ 1797.268537] ============================================ [ 1797.276837] WARNING: possible recursive locking detected [ 1797.285101] 5.5.0-rc5+ #10 Not tainted [ 1797.291641] -------------------------------------------- [ 1797.299917] handler10/9296 is trying to acquire lock: [ 1797.307885] ffff889ad399a0a0 (&node->lock){++++}, at: tree_put_node+0x1d5/0x210 [mlx5_core] [ 1797.319694] [ 1797.319694] but task is already holding lock: [ 1797.330904] ffff889ad399a0a0 (&node->lock){++++}, at: nested_down_write_ref_node.part.33+0x1a/0x60 [mlx5_core] [ 1797.344707] [ 1797.344707] other info that might help us debug this: [ 1797.356952] Possible unsafe locking scenario: [ 1797.356952] [ 1797.368333] CPU0 [ 1797.373357] ---- [ 1797.378364] lock(&node->lock); [ 1797.384222] lock(&node->lock); [ 1797.390031] [ 1797.390031] *** DEADLOCK *** [ 1797.390031] [ 1797.403003] May be due to missing lock nesting notation [ 1797.403003] [ 1797.414691] 3 locks held by handler10/9296: [ 1797.421465] #0: ffff889cf2c5a110 (&block->cb_lock){++++}, at: tc_setup_cb_add+0x70/0x250 [ 1797.432810] #1: ffff88a030081490 (&comp->sem){++++}, at: mlx5_devcom_get_peer_data+0x4c/0xb0 [mlx5_core] [ 1797.445829] #2: ffff889ad399a0a0 (&node->lock){++++}, at: nested_down_write_ref_node.part.33+0x1a/0x60 [mlx5_core] [ 1797.459913] [ 1797.459913] stack backtrace: [ 1797.469436] CPU: 1 PID: 9296 Comm: handler10 Kdump: loaded Not tainted 5.5.0-rc5+ #10 [ 1797.480643] Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.4.3 01/17/2017 [ 1797.491480] Call Trace: [ 1797.496701] dump_stack+0x96/0xe0 [ 1797.502864] __lock_acquire.cold.63+0xf8/0x212 [ 1797.510301] ? lockdep_hardirqs_on+0x250/0x250 [ 1797.517701] ? mark_held_locks+0x55/0xa0 [ 1797.524547] ? quarantine_put+0xb7/0x160 [ 1797.531422] ? lockdep_hardirqs_on+0x17d/0x250 [ 1797.538913] lock_acquire+0xd6/0x1f0 [ 1797.545529] ? tree_put_node+0x1d5/0x210 [mlx5_core] [ 1797.553701] down_write+0x94/0x140 [ 1797.560206] ? tree_put_node+0x1d5/0x210 [mlx5_core] [ 1797.568464] ? down_write_killable_nested+0x170/0x170 [ 1797.576925] ? del_hw_flow_group+0xde/0x1f0 [mlx5_core] [ 1797.585629] tree_put_node+0x1d5/0x210 [mlx5_core] [ 1797.593891] ? free_match_list.part.25+0x147/0x170 [mlx5_core] [ 1797.603389] free_match_list.part.25+0xe0/0x170 [mlx5_core] [ 1797.612654] _mlx5_add_flow_rules+0x17e2/0x20b0 [mlx5_core] [ 1797.621838] ? lock_acquire+0xd6/0x1f0 [ 1797.629028] ? esw_get_prio_table+0xb0/0x3e0 [mlx5_core] [ 1797.637981] ? alloc_insert_flow_group+0x420/0x420 [mlx5_core] [ 1797.647459] ? try_to_wake_up+0x4c7/0xc70 [ 1797.654881] ? lock_downgrade+0x350/0x350 [ 1797.662271] ? __mutex_unlock_slowpath+0xb1/0x3f0 [ 1797.670396] ? find_held_lock+0xac/0xd0 [ 1797.677540] ? mlx5_add_flow_rules+0xdc/0x360 [mlx5_core] [ 1797.686467] mlx5_add_flow_rules+0xdc/0x360 [mlx5_core] [ 1797.695134] ? _mlx5_add_flow_rules+0x20b0/0x20b0 [mlx5_core] [ 1797.704270] ? irq_exit+0xa5/0x170 [ 1797.710764] ? retint_kernel+0x10/0x10 [ 1797.717698] ? mlx5_eswitch_set_rule_source_port.isra.9+0x122/0x230 [mlx5_core] [ 1797.728708] mlx5_eswitch_add_offloaded_rule+0x465/0x6d0 [mlx5_core] [ 1797.738713] ? mlx5_eswitch_get_prio_range+0x30/0x30 [mlx5_core] [ 1797.748384] ? mlx5_fc_stats_work+0x670/0x670 [mlx5_core] [ 1797.757400] mlx5e_tc_offload_fdb_rules.isra.27+0x24/0x90 [mlx5_core] [ 1797.767665] mlx5e_tc_add_fdb_flow+0xaf8/0xd40 [mlx5_core] [ 1797.776886] ? mlx5e_encap_put+0xd0/0xd0 [mlx5_core] [ 1797.785562] ? mlx5e_alloc_flow.isra.43+0x18c/0x1c0 [mlx5_core] [ 1797.795353] __mlx5e_add_fdb_flow+0x2e2/0x440 [mlx5_core] [ 1797.804558] ? mlx5e_tc_update_neigh_used_value+0x8c0/0x8c0 [mlx5_core] [ 1797.815093] ? wait_for_completion+0x260/0x260 [ 1797.823272] mlx5e_configure_flower+0xe94/0x1620 [mlx5_core] [ 1797.832792] ? __mlx5e_add_fdb_flow+0x440/0x440 [mlx5_core] [ 1797.842096] ? down_read+0x11a/0x2e0 [ 1797.849090] ? down_write+0x140/0x140 [ 1797.856142] ? mlx5e_rep_indr_setup_block_cb+0xc0/0xc0 [mlx5_core] [ 1797.866027] tc_setup_cb_add+0x11a/0x250 [ 1797.873339] fl_hw_replace_filter+0x25e/0x320 [cls_flower] [ 1797.882385] ? fl_hw_destroy_filter+0x1c0/0x1c0 [cls_flower] [ 1797.891607] fl_change+0x1d54/0x1fb6 [cls_flower] [ 1797.899772] ? __rhashtable_insert_fast.constprop.50+0x9f0/0x9f0 [cls_flower] [ 1797.910728] ? lock_downgrade+0x350/0x350 [ 1797.918187] ? __radix_tree_lookup+0xa5/0x130 [ 1797.926046] ? fl_set_key+0x1590/0x1590 [cls_flower] [ 1797.934611] ? __rhashtable_insert_fast.constprop.50+0x9f0/0x9f0 [cls_flower] [ 1797.945673] tc_new_tfilter+0xcd1/0x1240 [ 1797.953138] ? tc_del_tfilter+0xb10/0xb10 [ 1797.960688] ? avc_has_perm_noaudit+0x92/0x320 [ 1797.968721] ? avc_has_perm_noaudit+0x1df/0x320 [ 1797.976816] ? avc_has_extended_perms+0x990/0x990 [ 1797.985090] ? mark_lock+0xaa/0x9e0 [ 1797.991988] ? match_held_lock+0x1b/0x240 [ 1797.999457] ? match_held_lock+0x1b/0x240 [ 1798.006859] ? find_held_lock+0xac/0xd0 [ 1798.014045] ? symbol_put_addr+0x40/0x40 [ 1798.021317] ? rcu_read_lock_sched_held+0xd0/0xd0 [ 1798.029460] ? tc_del_tfilter+0xb10/0xb10 [ 1798.036810] rtnetlink_rcv_msg+0x4d5/0x620 [ 1798.044236] ? rtnl_bridge_getlink+0x460/0x460 [ 1798.052034] ? lockdep_hardirqs_on+0x250/0x250 [ 1798.059837] ? match_held_lock+0x1b/0x240 [ 1798.067146] ? find_held_lock+0xac/0xd0 [ 1798.074246] netlink_rcv_skb+0xc6/0x1f0 [ 1798.081339] ? rtnl_bridge_getlink+0x460/0x460 [ 1798.089104] ? netlink_ack+0x440/0x440 [ 1798.096061] netlink_unicast+0x2d4/0x3b0 [ 1798.103189] ? netlink_attachskb+0x3f0/0x3f0 [ 1798.110724] ? _copy_from_iter_full+0xda/0x370 [ 1798.118415] netlink_sendmsg+0x3ba/0x6a0 [ 1798.125478] ? netlink_unicast+0x3b0/0x3b0 [ 1798.132705] ? netlink_unicast+0x3b0/0x3b0 [ 1798.139880] sock_sendmsg+0x94/0xa0 [ 1798.146332] ____sys_sendmsg+0x36c/0x3f0 [ 1798.153251] ? copy_msghdr_from_user+0x165/0x230 [ 1798.160941] ? kernel_sendmsg+0x30/0x30 [ 1798.167738] ___sys_sendmsg+0xeb/0x150 [ 1798.174411] ? sendmsg_copy_msghdr+0x30/0x30 [ 1798.181649] ? lock_downgrade+0x350/0x350 [ 1798.188559] ? rcu_read_lock_sched_held+0xd0/0xd0 [ 1798.196239] ? __fget+0x21d/0x320 [ 1798.202335] ? do_dup2+0x2a0/0x2a0 [ 1798.208499] ? lock_downgrade+0x350/0x350 [ 1798.215366] ? __fget_light+0xd6/0xf0 [ 1798.221808] ? syscall_trace_enter+0x369/0x5d0 [ 1798.229112] __sys_sendmsg+0xd3/0x160 [ 1798.235511] ? __sys_sendmsg_sock+0x60/0x60 [ 1798.242478] ? syscall_trace_enter+0x233/0x5d0 [ 1798.249721] ? syscall_slow_exit_work+0x280/0x280 [ 1798.257211] ? do_syscall_64+0x1e/0x2e0 [ 1798.263680] do_syscall_64+0x72/0x2e0 [ 1798.269950] entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: bd71b08ec2ee ("net/mlx5: Support multiple updates of steering rules in parallel") Signed-off-by: Maor Gottlieb <maorg@mellanox.com> Signed-off-by: Alaa Hleihel <alaa@mellanox.com> Reviewed-by: Mark Bloch <markb@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2020-01-27 00:27:51 -07:00
free_match_list(match_head, ft_locked);
err = -ENOMEM;
goto out;
}
if (!tree_get_node(&g->node)) {
kfree(curr_match);
continue;
}
curr_match->g = g;
list_add_tail(&curr_match->list, &match_head->list);
}
out:
rcu_read_unlock();
return err;
}
static u64 matched_fgs_get_version(struct list_head *match_head)
{
struct match_list *iter;
u64 version = 0;
list_for_each_entry(iter, match_head, list)
version += (u64)atomic_read(&iter->g->node.version);
return version;
}
static struct fs_fte *
lookup_fte_locked(struct mlx5_flow_group *g,
const u32 *match_value,
bool take_write)
{
struct fs_fte *fte_tmp;
if (take_write)
nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
else
nested_down_read_ref_node(&g->node, FS_LOCK_PARENT);
fte_tmp = rhashtable_lookup_fast(&g->ftes_hash, match_value,
rhash_fte);
if (!fte_tmp || !tree_get_node(&fte_tmp->node)) {
fte_tmp = NULL;
goto out;
}
if (!fte_tmp->node.active) {
tree_put_node(&fte_tmp->node, false);
fte_tmp = NULL;
goto out;
}
nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD);
out:
if (take_write)
up_write_ref_node(&g->node, false);
else
up_read_ref_node(&g->node);
return fte_tmp;
}
static struct mlx5_flow_handle *
try_add_to_existing_fg(struct mlx5_flow_table *ft,
struct list_head *match_head,
const struct mlx5_flow_spec *spec,
struct mlx5_flow_act *flow_act,
struct mlx5_flow_destination *dest,
int dest_num,
int ft_version)
{
struct mlx5_flow_steering *steering = get_steering(&ft->node);
struct mlx5_flow_group *g;
struct mlx5_flow_handle *rule;
struct match_list *iter;
bool take_write = false;
struct fs_fte *fte;
u64 version;
int err;
fte = alloc_fte(ft, spec, flow_act);
if (IS_ERR(fte))
return ERR_PTR(-ENOMEM);
search_again_locked:
version = matched_fgs_get_version(match_head);
if (flow_act->flags & FLOW_ACT_NO_APPEND)
goto skip_search;
/* Try to find a fg that already contains a matching fte */
list_for_each_entry(iter, match_head, list) {
struct fs_fte *fte_tmp;
g = iter->g;
fte_tmp = lookup_fte_locked(g, spec->match_value, take_write);
if (!fte_tmp)
continue;
rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte_tmp);
up_write_ref_node(&fte_tmp->node, false);
tree_put_node(&fte_tmp->node, false);
kmem_cache_free(steering->ftes_cache, fte);
return rule;
}
skip_search:
/* No group with matching fte found, or we skipped the search.
* Try to add a new fte to any matching fg.
*/
/* Check the ft version, for case that new flow group
* was added while the fgs weren't locked
*/
if (atomic_read(&ft->node.version) != ft_version) {
rule = ERR_PTR(-EAGAIN);
goto out;
}
/* Check the fgs version, for case the new FTE with the
* same values was added while the fgs weren't locked
*/
if (version != matched_fgs_get_version(match_head)) {
take_write = true;
goto search_again_locked;
}
list_for_each_entry(iter, match_head, list) {
g = iter->g;
if (!g->node.active)
continue;
nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
err = insert_fte(g, fte);
if (err) {
up_write_ref_node(&g->node, false);
if (err == -ENOSPC)
continue;
kmem_cache_free(steering->ftes_cache, fte);
return ERR_PTR(err);
}
nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
up_write_ref_node(&g->node, false);
rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte);
up_write_ref_node(&fte->node, false);
return rule;
}
rule = ERR_PTR(-ENOENT);
out:
kmem_cache_free(steering->ftes_cache, fte);
return rule;
}
static struct mlx5_flow_handle *
_mlx5_add_flow_rules(struct mlx5_flow_table *ft,
const struct mlx5_flow_spec *spec,
struct mlx5_flow_act *flow_act,
struct mlx5_flow_destination *dest,
int dest_num)
{
struct mlx5_flow_steering *steering = get_steering(&ft->node);
struct mlx5_flow_group *g;
struct mlx5_flow_handle *rule;
struct match_list_head match_head;
bool take_write = false;
struct fs_fte *fte;
int version;
int err;
int i;
if (!check_valid_spec(spec))
return ERR_PTR(-EINVAL);
for (i = 0; i < dest_num; i++) {
if (!dest_is_valid(&dest[i], flow_act->action, ft))
return ERR_PTR(-EINVAL);
}
nested_down_read_ref_node(&ft->node, FS_LOCK_GRANDPARENT);
search_again_locked:
version = atomic_read(&ft->node.version);
/* Collect all fgs which has a matching match_criteria */
net/mlx5: Fix deadlock in fs_core [ Upstream commit c1948390d78b5183ee9b7dd831efd7f6ac496ab0 ] free_match_list could be called when the flow table is already locked. We need to pass this notation to tree_put_node. It fixes the following lockdep warnning: [ 1797.268537] ============================================ [ 1797.276837] WARNING: possible recursive locking detected [ 1797.285101] 5.5.0-rc5+ #10 Not tainted [ 1797.291641] -------------------------------------------- [ 1797.299917] handler10/9296 is trying to acquire lock: [ 1797.307885] ffff889ad399a0a0 (&node->lock){++++}, at: tree_put_node+0x1d5/0x210 [mlx5_core] [ 1797.319694] [ 1797.319694] but task is already holding lock: [ 1797.330904] ffff889ad399a0a0 (&node->lock){++++}, at: nested_down_write_ref_node.part.33+0x1a/0x60 [mlx5_core] [ 1797.344707] [ 1797.344707] other info that might help us debug this: [ 1797.356952] Possible unsafe locking scenario: [ 1797.356952] [ 1797.368333] CPU0 [ 1797.373357] ---- [ 1797.378364] lock(&node->lock); [ 1797.384222] lock(&node->lock); [ 1797.390031] [ 1797.390031] *** DEADLOCK *** [ 1797.390031] [ 1797.403003] May be due to missing lock nesting notation [ 1797.403003] [ 1797.414691] 3 locks held by handler10/9296: [ 1797.421465] #0: ffff889cf2c5a110 (&block->cb_lock){++++}, at: tc_setup_cb_add+0x70/0x250 [ 1797.432810] #1: ffff88a030081490 (&comp->sem){++++}, at: mlx5_devcom_get_peer_data+0x4c/0xb0 [mlx5_core] [ 1797.445829] #2: ffff889ad399a0a0 (&node->lock){++++}, at: nested_down_write_ref_node.part.33+0x1a/0x60 [mlx5_core] [ 1797.459913] [ 1797.459913] stack backtrace: [ 1797.469436] CPU: 1 PID: 9296 Comm: handler10 Kdump: loaded Not tainted 5.5.0-rc5+ #10 [ 1797.480643] Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.4.3 01/17/2017 [ 1797.491480] Call Trace: [ 1797.496701] dump_stack+0x96/0xe0 [ 1797.502864] __lock_acquire.cold.63+0xf8/0x212 [ 1797.510301] ? lockdep_hardirqs_on+0x250/0x250 [ 1797.517701] ? mark_held_locks+0x55/0xa0 [ 1797.524547] ? quarantine_put+0xb7/0x160 [ 1797.531422] ? lockdep_hardirqs_on+0x17d/0x250 [ 1797.538913] lock_acquire+0xd6/0x1f0 [ 1797.545529] ? tree_put_node+0x1d5/0x210 [mlx5_core] [ 1797.553701] down_write+0x94/0x140 [ 1797.560206] ? tree_put_node+0x1d5/0x210 [mlx5_core] [ 1797.568464] ? down_write_killable_nested+0x170/0x170 [ 1797.576925] ? del_hw_flow_group+0xde/0x1f0 [mlx5_core] [ 1797.585629] tree_put_node+0x1d5/0x210 [mlx5_core] [ 1797.593891] ? free_match_list.part.25+0x147/0x170 [mlx5_core] [ 1797.603389] free_match_list.part.25+0xe0/0x170 [mlx5_core] [ 1797.612654] _mlx5_add_flow_rules+0x17e2/0x20b0 [mlx5_core] [ 1797.621838] ? lock_acquire+0xd6/0x1f0 [ 1797.629028] ? esw_get_prio_table+0xb0/0x3e0 [mlx5_core] [ 1797.637981] ? alloc_insert_flow_group+0x420/0x420 [mlx5_core] [ 1797.647459] ? try_to_wake_up+0x4c7/0xc70 [ 1797.654881] ? lock_downgrade+0x350/0x350 [ 1797.662271] ? __mutex_unlock_slowpath+0xb1/0x3f0 [ 1797.670396] ? find_held_lock+0xac/0xd0 [ 1797.677540] ? mlx5_add_flow_rules+0xdc/0x360 [mlx5_core] [ 1797.686467] mlx5_add_flow_rules+0xdc/0x360 [mlx5_core] [ 1797.695134] ? _mlx5_add_flow_rules+0x20b0/0x20b0 [mlx5_core] [ 1797.704270] ? irq_exit+0xa5/0x170 [ 1797.710764] ? retint_kernel+0x10/0x10 [ 1797.717698] ? mlx5_eswitch_set_rule_source_port.isra.9+0x122/0x230 [mlx5_core] [ 1797.728708] mlx5_eswitch_add_offloaded_rule+0x465/0x6d0 [mlx5_core] [ 1797.738713] ? mlx5_eswitch_get_prio_range+0x30/0x30 [mlx5_core] [ 1797.748384] ? mlx5_fc_stats_work+0x670/0x670 [mlx5_core] [ 1797.757400] mlx5e_tc_offload_fdb_rules.isra.27+0x24/0x90 [mlx5_core] [ 1797.767665] mlx5e_tc_add_fdb_flow+0xaf8/0xd40 [mlx5_core] [ 1797.776886] ? mlx5e_encap_put+0xd0/0xd0 [mlx5_core] [ 1797.785562] ? mlx5e_alloc_flow.isra.43+0x18c/0x1c0 [mlx5_core] [ 1797.795353] __mlx5e_add_fdb_flow+0x2e2/0x440 [mlx5_core] [ 1797.804558] ? mlx5e_tc_update_neigh_used_value+0x8c0/0x8c0 [mlx5_core] [ 1797.815093] ? wait_for_completion+0x260/0x260 [ 1797.823272] mlx5e_configure_flower+0xe94/0x1620 [mlx5_core] [ 1797.832792] ? __mlx5e_add_fdb_flow+0x440/0x440 [mlx5_core] [ 1797.842096] ? down_read+0x11a/0x2e0 [ 1797.849090] ? down_write+0x140/0x140 [ 1797.856142] ? mlx5e_rep_indr_setup_block_cb+0xc0/0xc0 [mlx5_core] [ 1797.866027] tc_setup_cb_add+0x11a/0x250 [ 1797.873339] fl_hw_replace_filter+0x25e/0x320 [cls_flower] [ 1797.882385] ? fl_hw_destroy_filter+0x1c0/0x1c0 [cls_flower] [ 1797.891607] fl_change+0x1d54/0x1fb6 [cls_flower] [ 1797.899772] ? __rhashtable_insert_fast.constprop.50+0x9f0/0x9f0 [cls_flower] [ 1797.910728] ? lock_downgrade+0x350/0x350 [ 1797.918187] ? __radix_tree_lookup+0xa5/0x130 [ 1797.926046] ? fl_set_key+0x1590/0x1590 [cls_flower] [ 1797.934611] ? __rhashtable_insert_fast.constprop.50+0x9f0/0x9f0 [cls_flower] [ 1797.945673] tc_new_tfilter+0xcd1/0x1240 [ 1797.953138] ? tc_del_tfilter+0xb10/0xb10 [ 1797.960688] ? avc_has_perm_noaudit+0x92/0x320 [ 1797.968721] ? avc_has_perm_noaudit+0x1df/0x320 [ 1797.976816] ? avc_has_extended_perms+0x990/0x990 [ 1797.985090] ? mark_lock+0xaa/0x9e0 [ 1797.991988] ? match_held_lock+0x1b/0x240 [ 1797.999457] ? match_held_lock+0x1b/0x240 [ 1798.006859] ? find_held_lock+0xac/0xd0 [ 1798.014045] ? symbol_put_addr+0x40/0x40 [ 1798.021317] ? rcu_read_lock_sched_held+0xd0/0xd0 [ 1798.029460] ? tc_del_tfilter+0xb10/0xb10 [ 1798.036810] rtnetlink_rcv_msg+0x4d5/0x620 [ 1798.044236] ? rtnl_bridge_getlink+0x460/0x460 [ 1798.052034] ? lockdep_hardirqs_on+0x250/0x250 [ 1798.059837] ? match_held_lock+0x1b/0x240 [ 1798.067146] ? find_held_lock+0xac/0xd0 [ 1798.074246] netlink_rcv_skb+0xc6/0x1f0 [ 1798.081339] ? rtnl_bridge_getlink+0x460/0x460 [ 1798.089104] ? netlink_ack+0x440/0x440 [ 1798.096061] netlink_unicast+0x2d4/0x3b0 [ 1798.103189] ? netlink_attachskb+0x3f0/0x3f0 [ 1798.110724] ? _copy_from_iter_full+0xda/0x370 [ 1798.118415] netlink_sendmsg+0x3ba/0x6a0 [ 1798.125478] ? netlink_unicast+0x3b0/0x3b0 [ 1798.132705] ? netlink_unicast+0x3b0/0x3b0 [ 1798.139880] sock_sendmsg+0x94/0xa0 [ 1798.146332] ____sys_sendmsg+0x36c/0x3f0 [ 1798.153251] ? copy_msghdr_from_user+0x165/0x230 [ 1798.160941] ? kernel_sendmsg+0x30/0x30 [ 1798.167738] ___sys_sendmsg+0xeb/0x150 [ 1798.174411] ? sendmsg_copy_msghdr+0x30/0x30 [ 1798.181649] ? lock_downgrade+0x350/0x350 [ 1798.188559] ? rcu_read_lock_sched_held+0xd0/0xd0 [ 1798.196239] ? __fget+0x21d/0x320 [ 1798.202335] ? do_dup2+0x2a0/0x2a0 [ 1798.208499] ? lock_downgrade+0x350/0x350 [ 1798.215366] ? __fget_light+0xd6/0xf0 [ 1798.221808] ? syscall_trace_enter+0x369/0x5d0 [ 1798.229112] __sys_sendmsg+0xd3/0x160 [ 1798.235511] ? __sys_sendmsg_sock+0x60/0x60 [ 1798.242478] ? syscall_trace_enter+0x233/0x5d0 [ 1798.249721] ? syscall_slow_exit_work+0x280/0x280 [ 1798.257211] ? do_syscall_64+0x1e/0x2e0 [ 1798.263680] do_syscall_64+0x72/0x2e0 [ 1798.269950] entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: bd71b08ec2ee ("net/mlx5: Support multiple updates of steering rules in parallel") Signed-off-by: Maor Gottlieb <maorg@mellanox.com> Signed-off-by: Alaa Hleihel <alaa@mellanox.com> Reviewed-by: Mark Bloch <markb@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2020-01-27 00:27:51 -07:00
err = build_match_list(&match_head, ft, spec, take_write);
if (err) {
if (take_write)
up_write_ref_node(&ft->node, false);
else
up_read_ref_node(&ft->node);
return ERR_PTR(err);
}
if (!take_write)
up_read_ref_node(&ft->node);
rule = try_add_to_existing_fg(ft, &match_head.list, spec, flow_act, dest,
dest_num, version);
net/mlx5: Fix deadlock in fs_core [ Upstream commit c1948390d78b5183ee9b7dd831efd7f6ac496ab0 ] free_match_list could be called when the flow table is already locked. We need to pass this notation to tree_put_node. It fixes the following lockdep warnning: [ 1797.268537] ============================================ [ 1797.276837] WARNING: possible recursive locking detected [ 1797.285101] 5.5.0-rc5+ #10 Not tainted [ 1797.291641] -------------------------------------------- [ 1797.299917] handler10/9296 is trying to acquire lock: [ 1797.307885] ffff889ad399a0a0 (&node->lock){++++}, at: tree_put_node+0x1d5/0x210 [mlx5_core] [ 1797.319694] [ 1797.319694] but task is already holding lock: [ 1797.330904] ffff889ad399a0a0 (&node->lock){++++}, at: nested_down_write_ref_node.part.33+0x1a/0x60 [mlx5_core] [ 1797.344707] [ 1797.344707] other info that might help us debug this: [ 1797.356952] Possible unsafe locking scenario: [ 1797.356952] [ 1797.368333] CPU0 [ 1797.373357] ---- [ 1797.378364] lock(&node->lock); [ 1797.384222] lock(&node->lock); [ 1797.390031] [ 1797.390031] *** DEADLOCK *** [ 1797.390031] [ 1797.403003] May be due to missing lock nesting notation [ 1797.403003] [ 1797.414691] 3 locks held by handler10/9296: [ 1797.421465] #0: ffff889cf2c5a110 (&block->cb_lock){++++}, at: tc_setup_cb_add+0x70/0x250 [ 1797.432810] #1: ffff88a030081490 (&comp->sem){++++}, at: mlx5_devcom_get_peer_data+0x4c/0xb0 [mlx5_core] [ 1797.445829] #2: ffff889ad399a0a0 (&node->lock){++++}, at: nested_down_write_ref_node.part.33+0x1a/0x60 [mlx5_core] [ 1797.459913] [ 1797.459913] stack backtrace: [ 1797.469436] CPU: 1 PID: 9296 Comm: handler10 Kdump: loaded Not tainted 5.5.0-rc5+ #10 [ 1797.480643] Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.4.3 01/17/2017 [ 1797.491480] Call Trace: [ 1797.496701] dump_stack+0x96/0xe0 [ 1797.502864] __lock_acquire.cold.63+0xf8/0x212 [ 1797.510301] ? lockdep_hardirqs_on+0x250/0x250 [ 1797.517701] ? mark_held_locks+0x55/0xa0 [ 1797.524547] ? quarantine_put+0xb7/0x160 [ 1797.531422] ? lockdep_hardirqs_on+0x17d/0x250 [ 1797.538913] lock_acquire+0xd6/0x1f0 [ 1797.545529] ? tree_put_node+0x1d5/0x210 [mlx5_core] [ 1797.553701] down_write+0x94/0x140 [ 1797.560206] ? tree_put_node+0x1d5/0x210 [mlx5_core] [ 1797.568464] ? down_write_killable_nested+0x170/0x170 [ 1797.576925] ? del_hw_flow_group+0xde/0x1f0 [mlx5_core] [ 1797.585629] tree_put_node+0x1d5/0x210 [mlx5_core] [ 1797.593891] ? free_match_list.part.25+0x147/0x170 [mlx5_core] [ 1797.603389] free_match_list.part.25+0xe0/0x170 [mlx5_core] [ 1797.612654] _mlx5_add_flow_rules+0x17e2/0x20b0 [mlx5_core] [ 1797.621838] ? lock_acquire+0xd6/0x1f0 [ 1797.629028] ? esw_get_prio_table+0xb0/0x3e0 [mlx5_core] [ 1797.637981] ? alloc_insert_flow_group+0x420/0x420 [mlx5_core] [ 1797.647459] ? try_to_wake_up+0x4c7/0xc70 [ 1797.654881] ? lock_downgrade+0x350/0x350 [ 1797.662271] ? __mutex_unlock_slowpath+0xb1/0x3f0 [ 1797.670396] ? find_held_lock+0xac/0xd0 [ 1797.677540] ? mlx5_add_flow_rules+0xdc/0x360 [mlx5_core] [ 1797.686467] mlx5_add_flow_rules+0xdc/0x360 [mlx5_core] [ 1797.695134] ? _mlx5_add_flow_rules+0x20b0/0x20b0 [mlx5_core] [ 1797.704270] ? irq_exit+0xa5/0x170 [ 1797.710764] ? retint_kernel+0x10/0x10 [ 1797.717698] ? mlx5_eswitch_set_rule_source_port.isra.9+0x122/0x230 [mlx5_core] [ 1797.728708] mlx5_eswitch_add_offloaded_rule+0x465/0x6d0 [mlx5_core] [ 1797.738713] ? mlx5_eswitch_get_prio_range+0x30/0x30 [mlx5_core] [ 1797.748384] ? mlx5_fc_stats_work+0x670/0x670 [mlx5_core] [ 1797.757400] mlx5e_tc_offload_fdb_rules.isra.27+0x24/0x90 [mlx5_core] [ 1797.767665] mlx5e_tc_add_fdb_flow+0xaf8/0xd40 [mlx5_core] [ 1797.776886] ? mlx5e_encap_put+0xd0/0xd0 [mlx5_core] [ 1797.785562] ? mlx5e_alloc_flow.isra.43+0x18c/0x1c0 [mlx5_core] [ 1797.795353] __mlx5e_add_fdb_flow+0x2e2/0x440 [mlx5_core] [ 1797.804558] ? mlx5e_tc_update_neigh_used_value+0x8c0/0x8c0 [mlx5_core] [ 1797.815093] ? wait_for_completion+0x260/0x260 [ 1797.823272] mlx5e_configure_flower+0xe94/0x1620 [mlx5_core] [ 1797.832792] ? __mlx5e_add_fdb_flow+0x440/0x440 [mlx5_core] [ 1797.842096] ? down_read+0x11a/0x2e0 [ 1797.849090] ? down_write+0x140/0x140 [ 1797.856142] ? mlx5e_rep_indr_setup_block_cb+0xc0/0xc0 [mlx5_core] [ 1797.866027] tc_setup_cb_add+0x11a/0x250 [ 1797.873339] fl_hw_replace_filter+0x25e/0x320 [cls_flower] [ 1797.882385] ? fl_hw_destroy_filter+0x1c0/0x1c0 [cls_flower] [ 1797.891607] fl_change+0x1d54/0x1fb6 [cls_flower] [ 1797.899772] ? __rhashtable_insert_fast.constprop.50+0x9f0/0x9f0 [cls_flower] [ 1797.910728] ? lock_downgrade+0x350/0x350 [ 1797.918187] ? __radix_tree_lookup+0xa5/0x130 [ 1797.926046] ? fl_set_key+0x1590/0x1590 [cls_flower] [ 1797.934611] ? __rhashtable_insert_fast.constprop.50+0x9f0/0x9f0 [cls_flower] [ 1797.945673] tc_new_tfilter+0xcd1/0x1240 [ 1797.953138] ? tc_del_tfilter+0xb10/0xb10 [ 1797.960688] ? avc_has_perm_noaudit+0x92/0x320 [ 1797.968721] ? avc_has_perm_noaudit+0x1df/0x320 [ 1797.976816] ? avc_has_extended_perms+0x990/0x990 [ 1797.985090] ? mark_lock+0xaa/0x9e0 [ 1797.991988] ? match_held_lock+0x1b/0x240 [ 1797.999457] ? match_held_lock+0x1b/0x240 [ 1798.006859] ? find_held_lock+0xac/0xd0 [ 1798.014045] ? symbol_put_addr+0x40/0x40 [ 1798.021317] ? rcu_read_lock_sched_held+0xd0/0xd0 [ 1798.029460] ? tc_del_tfilter+0xb10/0xb10 [ 1798.036810] rtnetlink_rcv_msg+0x4d5/0x620 [ 1798.044236] ? rtnl_bridge_getlink+0x460/0x460 [ 1798.052034] ? lockdep_hardirqs_on+0x250/0x250 [ 1798.059837] ? match_held_lock+0x1b/0x240 [ 1798.067146] ? find_held_lock+0xac/0xd0 [ 1798.074246] netlink_rcv_skb+0xc6/0x1f0 [ 1798.081339] ? rtnl_bridge_getlink+0x460/0x460 [ 1798.089104] ? netlink_ack+0x440/0x440 [ 1798.096061] netlink_unicast+0x2d4/0x3b0 [ 1798.103189] ? netlink_attachskb+0x3f0/0x3f0 [ 1798.110724] ? _copy_from_iter_full+0xda/0x370 [ 1798.118415] netlink_sendmsg+0x3ba/0x6a0 [ 1798.125478] ? netlink_unicast+0x3b0/0x3b0 [ 1798.132705] ? netlink_unicast+0x3b0/0x3b0 [ 1798.139880] sock_sendmsg+0x94/0xa0 [ 1798.146332] ____sys_sendmsg+0x36c/0x3f0 [ 1798.153251] ? copy_msghdr_from_user+0x165/0x230 [ 1798.160941] ? kernel_sendmsg+0x30/0x30 [ 1798.167738] ___sys_sendmsg+0xeb/0x150 [ 1798.174411] ? sendmsg_copy_msghdr+0x30/0x30 [ 1798.181649] ? lock_downgrade+0x350/0x350 [ 1798.188559] ? rcu_read_lock_sched_held+0xd0/0xd0 [ 1798.196239] ? __fget+0x21d/0x320 [ 1798.202335] ? do_dup2+0x2a0/0x2a0 [ 1798.208499] ? lock_downgrade+0x350/0x350 [ 1798.215366] ? __fget_light+0xd6/0xf0 [ 1798.221808] ? syscall_trace_enter+0x369/0x5d0 [ 1798.229112] __sys_sendmsg+0xd3/0x160 [ 1798.235511] ? __sys_sendmsg_sock+0x60/0x60 [ 1798.242478] ? syscall_trace_enter+0x233/0x5d0 [ 1798.249721] ? syscall_slow_exit_work+0x280/0x280 [ 1798.257211] ? do_syscall_64+0x1e/0x2e0 [ 1798.263680] do_syscall_64+0x72/0x2e0 [ 1798.269950] entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: bd71b08ec2ee ("net/mlx5: Support multiple updates of steering rules in parallel") Signed-off-by: Maor Gottlieb <maorg@mellanox.com> Signed-off-by: Alaa Hleihel <alaa@mellanox.com> Reviewed-by: Mark Bloch <markb@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2020-01-27 00:27:51 -07:00
free_match_list(&match_head, take_write);
if (!IS_ERR(rule) ||
(PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) {
if (take_write)
up_write_ref_node(&ft->node, false);
return rule;
}
if (!take_write) {
nested_down_write_ref_node(&ft->node, FS_LOCK_GRANDPARENT);
take_write = true;
}
if (PTR_ERR(rule) == -EAGAIN ||
version != atomic_read(&ft->node.version))
goto search_again_locked;
g = alloc_auto_flow_group(ft, spec);
if (IS_ERR(g)) {
rule = ERR_CAST(g);
up_write_ref_node(&ft->node, false);
return rule;
}
nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
up_write_ref_node(&ft->node, false);
err = create_auto_flow_group(ft, g);
if (err)
goto err_release_fg;
fte = alloc_fte(ft, spec, flow_act);
if (IS_ERR(fte)) {
err = PTR_ERR(fte);
goto err_release_fg;
}
err = insert_fte(g, fte);
if (err) {
kmem_cache_free(steering->ftes_cache, fte);
goto err_release_fg;
}
nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
up_write_ref_node(&g->node, false);
rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte);
up_write_ref_node(&fte->node, false);
tree_put_node(&g->node, false);
return rule;
err_release_fg:
up_write_ref_node(&g->node, false);
tree_put_node(&g->node, false);
return ERR_PTR(err);
}
static bool fwd_next_prio_supported(struct mlx5_flow_table *ft)
{
return ((ft->type == FS_FT_NIC_RX) &&
(MLX5_CAP_FLOWTABLE(get_dev(&ft->node), nic_rx_multi_path_tirs)));
}
struct mlx5_flow_handle *
mlx5_add_flow_rules(struct mlx5_flow_table *ft,
const struct mlx5_flow_spec *spec,
struct mlx5_flow_act *flow_act,
struct mlx5_flow_destination *dest,
int num_dest)
{
struct mlx5_flow_root_namespace *root = find_root(&ft->node);
struct mlx5_flow_destination gen_dest = {};
struct mlx5_flow_table *next_ft = NULL;
struct mlx5_flow_handle *handle = NULL;
u32 sw_action = flow_act->action;
struct fs_prio *prio;
fs_get_obj(prio, ft->node.parent);
if (flow_act->action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
if (!fwd_next_prio_supported(ft))
return ERR_PTR(-EOPNOTSUPP);
if (num_dest)
return ERR_PTR(-EINVAL);
mutex_lock(&root->chain_lock);
next_ft = find_next_chained_ft(prio);
if (next_ft) {
gen_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
gen_dest.ft = next_ft;
dest = &gen_dest;
num_dest = 1;
flow_act->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
} else {
mutex_unlock(&root->chain_lock);
return ERR_PTR(-EOPNOTSUPP);
}
}
handle = _mlx5_add_flow_rules(ft, spec, flow_act, dest, num_dest);
if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
if (!IS_ERR_OR_NULL(handle) &&
(list_empty(&handle->rule[0]->next_ft))) {
mutex_lock(&next_ft->lock);
list_add(&handle->rule[0]->next_ft,
&next_ft->fwd_rules);
mutex_unlock(&next_ft->lock);
handle->rule[0]->sw_action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
}
mutex_unlock(&root->chain_lock);
}
return handle;
}
EXPORT_SYMBOL(mlx5_add_flow_rules);
void mlx5_del_flow_rules(struct mlx5_flow_handle *handle)
{
struct fs_fte *fte;
int i;
/* In order to consolidate the HW changes we lock the FTE for other
* changes, and increase its refcount, in order not to perform the
* "del" functions of the FTE. Will handle them here.
* The removal of the rules is done under locked FTE.
* After removing all the handle's rules, if there are remaining
* rules, it means we just need to modify the FTE in FW, and
* unlock/decrease the refcount we increased before.
* Otherwise, it means the FTE should be deleted. First delete the
* FTE in FW. Then, unlock the FTE, and proceed the tree_put_node of
* the FTE, which will handle the last decrease of the refcount, as
* well as required handling of its parent.
*/
fs_get_obj(fte, handle->rule[0]->node.parent);
down_write_ref_node(&fte->node, false);
for (i = handle->num_rules - 1; i >= 0; i--)
tree_remove_node(&handle->rule[i]->node, true);
if (fte->dests_size) {
if (fte->modify_mask)
modify_fte(fte);
up_write_ref_node(&fte->node, false);
} else if (list_empty(&fte->node.children)) {
del_hw_fte(&fte->node);
net/mlx5: Fix FTE cleanup [ Upstream commit cefc23554fc259114e78a7b0908aac4610ee18eb ] Currently, when an FTE is allocated, its refcount is decreased to 0 with the purpose it will not be a stand alone steering object and every rule (destination) of the FTE would increase the refcount. When mlx5_cleanup_fs is called while not all rules were deleted by the steering users, it hit refcount underflow on the FTE once clean_tree calls to tree_remove_node after the deleted rules already decreased the refcount to 0. FTE is no longer destroyed implicitly when the last rule (destination) is deleted. mlx5_del_flow_rules avoids it by increasing the refcount on the FTE and destroy it explicitly after all rules were deleted. So we can avoid the refcount underflow by making FTE as stand alone object. In addition need to set del_hw_func to FTE so the HW object will be destroyed when the FTE is deleted from the cleanup_tree flow. refcount_t: underflow; use-after-free. WARNING: CPU: 2 PID: 15715 at lib/refcount.c:28 refcount_warn_saturate+0xd9/0xe0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: tree_put_node+0xf2/0x140 [mlx5_core] clean_tree+0x4e/0xf0 [mlx5_core] clean_tree+0x4e/0xf0 [mlx5_core] clean_tree+0x4e/0xf0 [mlx5_core] clean_tree+0x5f/0xf0 [mlx5_core] clean_tree+0x4e/0xf0 [mlx5_core] clean_tree+0x5f/0xf0 [mlx5_core] mlx5_cleanup_fs+0x26/0x270 [mlx5_core] mlx5_unload+0x2e/0xa0 [mlx5_core] mlx5_unload_one+0x51/0x120 [mlx5_core] mlx5_devlink_reload_down+0x51/0x90 [mlx5_core] devlink_reload+0x39/0x120 ? devlink_nl_cmd_reload+0x43/0x220 genl_rcv_msg+0x1e4/0x420 ? genl_family_rcv_msg_attrs_parse+0x100/0x100 netlink_rcv_skb+0x47/0x110 genl_rcv+0x24/0x40 netlink_unicast+0x217/0x2f0 netlink_sendmsg+0x30f/0x430 sock_sendmsg+0x30/0x40 __sys_sendto+0x10e/0x140 ? handle_mm_fault+0xc4/0x1f0 ? do_page_fault+0x33f/0x630 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x48/0x130 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 718ce4d601db ("net/mlx5: Consolidate update FTE for all removal changes") Fixes: bd71b08ec2ee ("net/mlx5: Support multiple updates of steering rules in parallel") Signed-off-by: Maor Gottlieb <maorg@nvidia.com> Reviewed-by: Mark Bloch <mbloch@nvidia.com> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2020-08-31 11:50:42 -06:00
/* Avoid double call to del_hw_fte */
fte->node.del_hw_func = NULL;
up_write_ref_node(&fte->node, false);
tree_put_node(&fte->node, false);
}
kfree(handle);
}
EXPORT_SYMBOL(mlx5_del_flow_rules);
/* Assuming prio->node.children(flow tables) is sorted by level */
static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft)
{
struct fs_prio *prio;
fs_get_obj(prio, ft->node.parent);
if (!list_is_last(&ft->node.list, &prio->node.children))
return list_next_entry(ft, node.list);
return find_next_chained_ft(prio);
}
static int update_root_ft_destroy(struct mlx5_flow_table *ft)
{
struct mlx5_flow_root_namespace *root = find_root(&ft->node);
struct mlx5_ft_underlay_qp *uqp;
struct mlx5_flow_table *new_root_ft = NULL;
int err = 0;
u32 qpn;
if (root->root_ft != ft)
return 0;
new_root_ft = find_next_ft(ft);
if (!new_root_ft) {
root->root_ft = NULL;
return 0;
}
if (list_empty(&root->underlay_qpns)) {
/* Don't set any QPN (zero) in case QPN list is empty */
qpn = 0;
err = root->cmds->update_root_ft(root, new_root_ft,
qpn, false);
} else {
list_for_each_entry(uqp, &root->underlay_qpns, list) {
qpn = uqp->qpn;
err = root->cmds->update_root_ft(root,
new_root_ft, qpn,
false);
if (err)
break;
}
}
if (err)
mlx5_core_warn(root->dev,
"Update root flow table of id(%u) qpn(%d) failed\n",
ft->id, qpn);
else
root->root_ft = new_root_ft;
return 0;
}
/* Connect flow table from previous priority to
* the next flow table.
*/
static int disconnect_flow_table(struct mlx5_flow_table *ft)
{
struct mlx5_core_dev *dev = get_dev(&ft->node);
struct mlx5_flow_table *next_ft;
struct fs_prio *prio;
int err = 0;
err = update_root_ft_destroy(ft);
if (err)
return err;
fs_get_obj(prio, ft->node.parent);
if (!(list_first_entry(&prio->node.children,
struct mlx5_flow_table,
node.list) == ft))
return 0;
next_ft = find_next_chained_ft(prio);
err = connect_fwd_rules(dev, next_ft, ft);
if (err)
return err;
err = connect_prev_fts(dev, next_ft, prio);
if (err)
mlx5_core_warn(dev, "Failed to disconnect flow table %d\n",
ft->id);
return err;
}
int mlx5_destroy_flow_table(struct mlx5_flow_table *ft)
{
struct mlx5_flow_root_namespace *root = find_root(&ft->node);
int err = 0;
mutex_lock(&root->chain_lock);
err = disconnect_flow_table(ft);
if (err) {
mutex_unlock(&root->chain_lock);
return err;
}
if (tree_remove_node(&ft->node, false))
mlx5_core_warn(get_dev(&ft->node), "Flow table %d wasn't destroyed, refcount > 1\n",
ft->id);
mutex_unlock(&root->chain_lock);
return err;
}
EXPORT_SYMBOL(mlx5_destroy_flow_table);
void mlx5_destroy_flow_group(struct mlx5_flow_group *fg)
{
if (tree_remove_node(&fg->node, false))
mlx5_core_warn(get_dev(&fg->node), "Flow group %d wasn't destroyed, refcount > 1\n",
fg->id);
}
struct mlx5_flow_namespace *mlx5_get_fdb_sub_ns(struct mlx5_core_dev *dev,
int n)
{
struct mlx5_flow_steering *steering = dev->priv.steering;
if (!steering || !steering->fdb_sub_ns)
return NULL;
return steering->fdb_sub_ns[n];
}
EXPORT_SYMBOL(mlx5_get_fdb_sub_ns);
struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
enum mlx5_flow_namespace_type type)
{
struct mlx5_flow_steering *steering = dev->priv.steering;
struct mlx5_flow_root_namespace *root_ns;
int prio = 0;
struct fs_prio *fs_prio;
struct mlx5_flow_namespace *ns;
if (!steering)
return NULL;
switch (type) {
case MLX5_FLOW_NAMESPACE_FDB:
if (steering->fdb_root_ns)
return &steering->fdb_root_ns->ns;
return NULL;
case MLX5_FLOW_NAMESPACE_SNIFFER_RX:
if (steering->sniffer_rx_root_ns)
return &steering->sniffer_rx_root_ns->ns;
return NULL;
case MLX5_FLOW_NAMESPACE_SNIFFER_TX:
if (steering->sniffer_tx_root_ns)
return &steering->sniffer_tx_root_ns->ns;
return NULL;
default:
break;
}
if (type == MLX5_FLOW_NAMESPACE_EGRESS) {
root_ns = steering->egress_root_ns;
} else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX) {
root_ns = steering->rdma_rx_root_ns;
prio = RDMA_RX_BYPASS_PRIO;
} else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL) {
root_ns = steering->rdma_rx_root_ns;
prio = RDMA_RX_KERNEL_PRIO;
} else { /* Must be NIC RX */
root_ns = steering->root_ns;
prio = type;
}
if (!root_ns)
return NULL;
fs_prio = find_prio(&root_ns->ns, prio);
if (!fs_prio)
return NULL;
ns = list_first_entry(&fs_prio->node.children,
typeof(*ns),
node.list);
return ns;
}
EXPORT_SYMBOL(mlx5_get_flow_namespace);
struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_dev *dev,
enum mlx5_flow_namespace_type type,
int vport)
{
struct mlx5_flow_steering *steering = dev->priv.steering;
if (!steering || vport >= mlx5_eswitch_get_total_vports(dev))
return NULL;
switch (type) {
case MLX5_FLOW_NAMESPACE_ESW_EGRESS:
if (steering->esw_egress_root_ns &&
steering->esw_egress_root_ns[vport])
return &steering->esw_egress_root_ns[vport]->ns;
else
return NULL;
case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
if (steering->esw_ingress_root_ns &&
steering->esw_ingress_root_ns[vport])
return &steering->esw_ingress_root_ns[vport]->ns;
else
return NULL;
default:
return NULL;
}
}
static struct fs_prio *_fs_create_prio(struct mlx5_flow_namespace *ns,
unsigned int prio,
int num_levels,
enum fs_node_type type)
{
struct fs_prio *fs_prio;
fs_prio = kzalloc(sizeof(*fs_prio), GFP_KERNEL);
if (!fs_prio)
return ERR_PTR(-ENOMEM);
fs_prio->node.type = type;
tree_init_node(&fs_prio->node, NULL, del_sw_prio);
tree_add_node(&fs_prio->node, &ns->node);
fs_prio->num_levels = num_levels;
fs_prio->prio = prio;
list_add_tail(&fs_prio->node.list, &ns->node.children);
return fs_prio;
}
static struct fs_prio *fs_create_prio_chained(struct mlx5_flow_namespace *ns,
unsigned int prio,
int num_levels)
{
return _fs_create_prio(ns, prio, num_levels, FS_TYPE_PRIO_CHAINS);
}
static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
unsigned int prio, int num_levels)
{
return _fs_create_prio(ns, prio, num_levels, FS_TYPE_PRIO);
}
static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace
*ns)
{
ns->node.type = FS_TYPE_NAMESPACE;
return ns;
}
static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio,
int def_miss_act)
{
struct mlx5_flow_namespace *ns;
ns = kzalloc(sizeof(*ns), GFP_KERNEL);
if (!ns)
return ERR_PTR(-ENOMEM);
fs_init_namespace(ns);
ns->def_miss_action = def_miss_act;
tree_init_node(&ns->node, NULL, del_sw_ns);
tree_add_node(&ns->node, &prio->node);
list_add_tail(&ns->node.list, &prio->node.children);
return ns;
}
static int create_leaf_prios(struct mlx5_flow_namespace *ns, int prio,
struct init_tree_node *prio_metadata)
{
struct fs_prio *fs_prio;
int i;
for (i = 0; i < prio_metadata->num_leaf_prios; i++) {
fs_prio = fs_create_prio(ns, prio++, prio_metadata->num_levels);
if (IS_ERR(fs_prio))
return PTR_ERR(fs_prio);
}
return 0;
}
#define FLOW_TABLE_BIT_SZ 1
#define GET_FLOW_TABLE_CAP(dev, offset) \
((be32_to_cpu(*((__be32 *)(dev->caps.hca_cur[MLX5_CAP_FLOW_TABLE]) + \
offset / 32)) >> \
(32 - FLOW_TABLE_BIT_SZ - (offset & 0x1f))) & FLOW_TABLE_BIT_SZ)
static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps)
{
int i;
for (i = 0; i < caps->arr_sz; i++) {
if (!GET_FLOW_TABLE_CAP(dev, caps->caps[i]))
return false;
}
return true;
}
static int init_root_tree_recursive(struct mlx5_flow_steering *steering,
struct init_tree_node *init_node,
struct fs_node *fs_parent_node,
struct init_tree_node *init_parent_node,
int prio)
{
int max_ft_level = MLX5_CAP_FLOWTABLE(steering->dev,
flow_table_properties_nic_receive.
max_ft_level);
struct mlx5_flow_namespace *fs_ns;
struct fs_prio *fs_prio;
struct fs_node *base;
int i;
int err;
if (init_node->type == FS_TYPE_PRIO) {
if ((init_node->min_ft_level > max_ft_level) ||
!has_required_caps(steering->dev, &init_node->caps))
return 0;
fs_get_obj(fs_ns, fs_parent_node);
if (init_node->num_leaf_prios)
return create_leaf_prios(fs_ns, prio, init_node);
fs_prio = fs_create_prio(fs_ns, prio, init_node->num_levels);
if (IS_ERR(fs_prio))
return PTR_ERR(fs_prio);
base = &fs_prio->node;
} else if (init_node->type == FS_TYPE_NAMESPACE) {
fs_get_obj(fs_prio, fs_parent_node);
fs_ns = fs_create_namespace(fs_prio, init_node->def_miss_action);
if (IS_ERR(fs_ns))
return PTR_ERR(fs_ns);
base = &fs_ns->node;
} else {
return -EINVAL;
}
prio = 0;
for (i = 0; i < init_node->ar_size; i++) {
err = init_root_tree_recursive(steering, &init_node->children[i],
base, init_node, prio);
if (err)
return err;
if (init_node->children[i].type == FS_TYPE_PRIO &&
init_node->children[i].num_leaf_prios) {
prio += init_node->children[i].num_leaf_prios;
}
}
return 0;
}
static int init_root_tree(struct mlx5_flow_steering *steering,
struct init_tree_node *init_node,
struct fs_node *fs_parent_node)
{
int i;
struct mlx5_flow_namespace *fs_ns;
int err;
fs_get_obj(fs_ns, fs_parent_node);
for (i = 0; i < init_node->ar_size; i++) {
err = init_root_tree_recursive(steering, &init_node->children[i],
&fs_ns->node,
init_node, i);
if (err)
return err;
}
return 0;
}
static struct mlx5_flow_root_namespace
*create_root_ns(struct mlx5_flow_steering *steering,
enum fs_flow_table_type table_type)
{
const struct mlx5_flow_cmds *cmds = mlx5_fs_cmd_get_default(table_type);
struct mlx5_flow_root_namespace *root_ns;
struct mlx5_flow_namespace *ns;
if (mlx5_accel_ipsec_device_caps(steering->dev) & MLX5_ACCEL_IPSEC_CAP_DEVICE &&
(table_type == FS_FT_NIC_RX || table_type == FS_FT_NIC_TX))
cmds = mlx5_fs_cmd_get_default_ipsec_fpga_cmds(table_type);
/* Create the root namespace */
root_ns = kzalloc(sizeof(*root_ns), GFP_KERNEL);
if (!root_ns)
return NULL;
root_ns->dev = steering->dev;
root_ns->table_type = table_type;
root_ns->cmds = cmds;
INIT_LIST_HEAD(&root_ns->underlay_qpns);
ns = &root_ns->ns;
fs_init_namespace(ns);
mutex_init(&root_ns->chain_lock);
tree_init_node(&ns->node, NULL, NULL);
tree_add_node(&ns->node, NULL);
return root_ns;
}
static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level);
static int set_prio_attrs_in_ns(struct mlx5_flow_namespace *ns, int acc_level)
{
struct fs_prio *prio;
fs_for_each_prio(prio, ns) {
/* This updates prio start_level and num_levels */
set_prio_attrs_in_prio(prio, acc_level);
acc_level += prio->num_levels;
}
return acc_level;
}
static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level)
{
struct mlx5_flow_namespace *ns;
int acc_level_ns = acc_level;
prio->start_level = acc_level;
fs_for_each_ns(ns, prio)
/* This updates start_level and num_levels of ns's priority descendants */
acc_level_ns = set_prio_attrs_in_ns(ns, acc_level);
if (!prio->num_levels)
prio->num_levels = acc_level_ns - prio->start_level;
WARN_ON(prio->num_levels < acc_level_ns - prio->start_level);
}
static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns)
{
struct mlx5_flow_namespace *ns = &root_ns->ns;
struct fs_prio *prio;
int start_level = 0;
fs_for_each_prio(prio, ns) {
set_prio_attrs_in_prio(prio, start_level);
start_level += prio->num_levels;
}
}
#define ANCHOR_PRIO 0
#define ANCHOR_SIZE 1
#define ANCHOR_LEVEL 0
static int create_anchor_flow_table(struct mlx5_flow_steering *steering)
{
struct mlx5_flow_namespace *ns = NULL;
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_table *ft;
ns = mlx5_get_flow_namespace(steering->dev, MLX5_FLOW_NAMESPACE_ANCHOR);
if (WARN_ON(!ns))
return -EINVAL;
ft_attr.max_fte = ANCHOR_SIZE;
ft_attr.level = ANCHOR_LEVEL;
ft_attr.prio = ANCHOR_PRIO;
ft = mlx5_create_flow_table(ns, &ft_attr);
if (IS_ERR(ft)) {
mlx5_core_err(steering->dev, "Failed to create last anchor flow table");
return PTR_ERR(ft);
}
return 0;
}
static int init_root_ns(struct mlx5_flow_steering *steering)
{
int err;
steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX);
if (!steering->root_ns)
return -ENOMEM;
err = init_root_tree(steering, &root_fs, &steering->root_ns->ns.node);
if (err)
goto out_err;
set_prio_attrs(steering->root_ns);
err = create_anchor_flow_table(steering);
if (err)
goto out_err;
return 0;
out_err:
cleanup_root_ns(steering->root_ns);
steering->root_ns = NULL;
return err;
}
static void clean_tree(struct fs_node *node)
{
if (node) {
struct fs_node *iter;
struct fs_node *temp;
tree_get_node(node);
list_for_each_entry_safe(iter, temp, &node->children, list)
clean_tree(iter);
tree_put_node(node, false);
tree_remove_node(node, false);
}
}
static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns)
{
if (!root_ns)
return;
clean_tree(&root_ns->ns.node);
}
static void cleanup_egress_acls_root_ns(struct mlx5_core_dev *dev)
{
struct mlx5_flow_steering *steering = dev->priv.steering;
int i;
if (!steering->esw_egress_root_ns)
return;
for (i = 0; i < mlx5_eswitch_get_total_vports(dev); i++)
cleanup_root_ns(steering->esw_egress_root_ns[i]);
kfree(steering->esw_egress_root_ns);
steering->esw_egress_root_ns = NULL;
}
static void cleanup_ingress_acls_root_ns(struct mlx5_core_dev *dev)
{
struct mlx5_flow_steering *steering = dev->priv.steering;
int i;
if (!steering->esw_ingress_root_ns)
return;
for (i = 0; i < mlx5_eswitch_get_total_vports(dev); i++)
cleanup_root_ns(steering->esw_ingress_root_ns[i]);
kfree(steering->esw_ingress_root_ns);
steering->esw_ingress_root_ns = NULL;
}
void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
{
struct mlx5_flow_steering *steering = dev->priv.steering;
cleanup_root_ns(steering->root_ns);
cleanup_egress_acls_root_ns(dev);
cleanup_ingress_acls_root_ns(dev);
cleanup_root_ns(steering->fdb_root_ns);
steering->fdb_root_ns = NULL;
kfree(steering->fdb_sub_ns);
steering->fdb_sub_ns = NULL;
cleanup_root_ns(steering->sniffer_rx_root_ns);
cleanup_root_ns(steering->sniffer_tx_root_ns);
cleanup_root_ns(steering->rdma_rx_root_ns);
cleanup_root_ns(steering->egress_root_ns);
mlx5_cleanup_fc_stats(dev);
kmem_cache_destroy(steering->ftes_cache);
kmem_cache_destroy(steering->fgs_cache);
kfree(steering);
}
static int init_sniffer_tx_root_ns(struct mlx5_flow_steering *steering)
{
struct fs_prio *prio;
steering->sniffer_tx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_TX);
if (!steering->sniffer_tx_root_ns)
return -ENOMEM;
/* Create single prio */
prio = fs_create_prio(&steering->sniffer_tx_root_ns->ns, 0, 1);
return PTR_ERR_OR_ZERO(prio);
}
static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering)
{
struct fs_prio *prio;
steering->sniffer_rx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_RX);
if (!steering->sniffer_rx_root_ns)
return -ENOMEM;
/* Create single prio */
prio = fs_create_prio(&steering->sniffer_rx_root_ns->ns, 0, 1);
return PTR_ERR_OR_ZERO(prio);
}
static int init_rdma_rx_root_ns(struct mlx5_flow_steering *steering)
{
int err;
steering->rdma_rx_root_ns = create_root_ns(steering, FS_FT_RDMA_RX);
if (!steering->rdma_rx_root_ns)
return -ENOMEM;
err = init_root_tree(steering, &rdma_rx_root_fs,
&steering->rdma_rx_root_ns->ns.node);
if (err)
goto out_err;
set_prio_attrs(steering->rdma_rx_root_ns);
return 0;
out_err:
cleanup_root_ns(steering->rdma_rx_root_ns);
steering->rdma_rx_root_ns = NULL;
return err;
}
static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
{
struct mlx5_flow_namespace *ns;
struct fs_prio *maj_prio;
struct fs_prio *min_prio;
int levels;
int chain;
int prio;
int err;
steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB);
if (!steering->fdb_root_ns)
return -ENOMEM;
steering->fdb_sub_ns = kzalloc(sizeof(steering->fdb_sub_ns) *
(FDB_MAX_CHAIN + 1), GFP_KERNEL);
if (!steering->fdb_sub_ns)
return -ENOMEM;
maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BYPASS_PATH,
1);
if (IS_ERR(maj_prio)) {
err = PTR_ERR(maj_prio);
goto out_err;
}
levels = 2 * FDB_MAX_PRIO * (FDB_MAX_CHAIN + 1);
maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns,
FDB_FAST_PATH,
levels);
if (IS_ERR(maj_prio)) {
err = PTR_ERR(maj_prio);
goto out_err;
}
for (chain = 0; chain <= FDB_MAX_CHAIN; chain++) {
ns = fs_create_namespace(maj_prio, MLX5_FLOW_TABLE_MISS_ACTION_DEF);
if (IS_ERR(ns)) {
err = PTR_ERR(ns);
goto out_err;
}
for (prio = 0; prio < FDB_MAX_PRIO * (chain + 1); prio++) {
min_prio = fs_create_prio(ns, prio, 2);
if (IS_ERR(min_prio)) {
err = PTR_ERR(min_prio);
goto out_err;
}
}
steering->fdb_sub_ns[chain] = ns;
}
maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_SLOW_PATH, 1);
if (IS_ERR(maj_prio)) {
err = PTR_ERR(maj_prio);
goto out_err;
}
set_prio_attrs(steering->fdb_root_ns);
return 0;
out_err:
cleanup_root_ns(steering->fdb_root_ns);
kfree(steering->fdb_sub_ns);
steering->fdb_sub_ns = NULL;
steering->fdb_root_ns = NULL;
return err;
}
static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering, int vport)
{
struct fs_prio *prio;
steering->esw_egress_root_ns[vport] = create_root_ns(steering, FS_FT_ESW_EGRESS_ACL);
if (!steering->esw_egress_root_ns[vport])
return -ENOMEM;
/* create 1 prio*/
prio = fs_create_prio(&steering->esw_egress_root_ns[vport]->ns, 0, 1);
return PTR_ERR_OR_ZERO(prio);
}
static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering, int vport)
{
struct fs_prio *prio;
steering->esw_ingress_root_ns[vport] = create_root_ns(steering, FS_FT_ESW_INGRESS_ACL);
if (!steering->esw_ingress_root_ns[vport])
return -ENOMEM;
/* create 1 prio*/
prio = fs_create_prio(&steering->esw_ingress_root_ns[vport]->ns, 0, 1);
return PTR_ERR_OR_ZERO(prio);
}
static int init_egress_acls_root_ns(struct mlx5_core_dev *dev)
{
struct mlx5_flow_steering *steering = dev->priv.steering;
int total_vports = mlx5_eswitch_get_total_vports(dev);
int err;
int i;
steering->esw_egress_root_ns =
kcalloc(total_vports,
sizeof(*steering->esw_egress_root_ns),
GFP_KERNEL);
if (!steering->esw_egress_root_ns)
return -ENOMEM;
for (i = 0; i < total_vports; i++) {
err = init_egress_acl_root_ns(steering, i);
if (err)
goto cleanup_root_ns;
}
return 0;
cleanup_root_ns:
for (i--; i >= 0; i--)
cleanup_root_ns(steering->esw_egress_root_ns[i]);
kfree(steering->esw_egress_root_ns);
steering->esw_egress_root_ns = NULL;
return err;
}
static int init_ingress_acls_root_ns(struct mlx5_core_dev *dev)
{
struct mlx5_flow_steering *steering = dev->priv.steering;
int total_vports = mlx5_eswitch_get_total_vports(dev);
int err;
int i;
steering->esw_ingress_root_ns =
kcalloc(total_vports,
sizeof(*steering->esw_ingress_root_ns),
GFP_KERNEL);
if (!steering->esw_ingress_root_ns)
return -ENOMEM;
for (i = 0; i < total_vports; i++) {
err = init_ingress_acl_root_ns(steering, i);
if (err)
goto cleanup_root_ns;
}
return 0;
cleanup_root_ns:
for (i--; i >= 0; i--)
cleanup_root_ns(steering->esw_ingress_root_ns[i]);
kfree(steering->esw_ingress_root_ns);
steering->esw_ingress_root_ns = NULL;
return err;
}
static int init_egress_root_ns(struct mlx5_flow_steering *steering)
{
int err;
steering->egress_root_ns = create_root_ns(steering,
FS_FT_NIC_TX);
if (!steering->egress_root_ns)
return -ENOMEM;
err = init_root_tree(steering, &egress_root_fs,
&steering->egress_root_ns->ns.node);
if (err)
goto cleanup;
set_prio_attrs(steering->egress_root_ns);
return 0;
cleanup:
cleanup_root_ns(steering->egress_root_ns);
steering->egress_root_ns = NULL;
return err;
}
int mlx5_init_fs(struct mlx5_core_dev *dev)
{
struct mlx5_flow_steering *steering;
int err = 0;
err = mlx5_init_fc_stats(dev);
if (err)
return err;
steering = kzalloc(sizeof(*steering), GFP_KERNEL);
if (!steering)
return -ENOMEM;
steering->dev = dev;
dev->priv.steering = steering;
steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs",
sizeof(struct mlx5_flow_group), 0,
0, NULL);
steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0,
0, NULL);
if (!steering->ftes_cache || !steering->fgs_cache) {
err = -ENOMEM;
goto err;
}
if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
(MLX5_CAP_GEN(dev, nic_flow_table))) ||
((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) &&
MLX5_CAP_GEN(dev, ipoib_enhanced_offloads))) &&
MLX5_CAP_FLOWTABLE_NIC_RX(dev, ft_support)) {
err = init_root_ns(steering);
if (err)
goto err;
}
if (MLX5_ESWITCH_MANAGER(dev)) {
if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) {
err = init_fdb_root_ns(steering);
if (err)
goto err;
}
if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) {
err = init_egress_acls_root_ns(dev);
if (err)
goto err;
}
if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) {
err = init_ingress_acls_root_ns(dev);
if (err)
goto err;
}
}
if (MLX5_CAP_FLOWTABLE_SNIFFER_RX(dev, ft_support)) {
err = init_sniffer_rx_root_ns(steering);
if (err)
goto err;
}
if (MLX5_CAP_FLOWTABLE_SNIFFER_TX(dev, ft_support)) {
err = init_sniffer_tx_root_ns(steering);
if (err)
goto err;
}
if (MLX5_CAP_FLOWTABLE_RDMA_RX(dev, ft_support) &&
MLX5_CAP_FLOWTABLE_RDMA_RX(dev, table_miss_action_domain)) {
err = init_rdma_rx_root_ns(steering);
if (err)
goto err;
}
if (MLX5_IPSEC_DEV(dev) || MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) {
err = init_egress_root_ns(steering);
if (err)
goto err;
}
return 0;
err:
mlx5_cleanup_fs(dev);
return err;
}
int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
{
struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns;
struct mlx5_ft_underlay_qp *new_uqp;
int err = 0;
new_uqp = kzalloc(sizeof(*new_uqp), GFP_KERNEL);
if (!new_uqp)
return -ENOMEM;
mutex_lock(&root->chain_lock);
if (!root->root_ft) {
err = -EINVAL;
goto update_ft_fail;
}
err = root->cmds->update_root_ft(root, root->root_ft, underlay_qpn,
false);
if (err) {
mlx5_core_warn(dev, "Failed adding underlay QPN (%u) to root FT err(%d)\n",
underlay_qpn, err);
goto update_ft_fail;
}
new_uqp->qpn = underlay_qpn;
list_add_tail(&new_uqp->list, &root->underlay_qpns);
mutex_unlock(&root->chain_lock);
return 0;
update_ft_fail:
mutex_unlock(&root->chain_lock);
kfree(new_uqp);
return err;
}
EXPORT_SYMBOL(mlx5_fs_add_rx_underlay_qpn);
int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
{
struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns;
struct mlx5_ft_underlay_qp *uqp;
bool found = false;
int err = 0;
mutex_lock(&root->chain_lock);
list_for_each_entry(uqp, &root->underlay_qpns, list) {
if (uqp->qpn == underlay_qpn) {
found = true;
break;
}
}
if (!found) {
mlx5_core_warn(dev, "Failed finding underlay qp (%u) in qpn list\n",
underlay_qpn);
err = -EINVAL;
goto out;
}
err = root->cmds->update_root_ft(root, root->root_ft, underlay_qpn,
true);
if (err)
mlx5_core_warn(dev, "Failed removing underlay QPN (%u) from root FT err(%d)\n",
underlay_qpn, err);
list_del(&uqp->list);
mutex_unlock(&root->chain_lock);
kfree(uqp);
return 0;
out:
mutex_unlock(&root->chain_lock);
return err;
}
EXPORT_SYMBOL(mlx5_fs_remove_rx_underlay_qpn);
static struct mlx5_flow_root_namespace
*get_root_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type ns_type)
{
struct mlx5_flow_namespace *ns;
if (ns_type == MLX5_FLOW_NAMESPACE_ESW_EGRESS ||
ns_type == MLX5_FLOW_NAMESPACE_ESW_INGRESS)
ns = mlx5_get_flow_vport_acl_namespace(dev, ns_type, 0);
else
ns = mlx5_get_flow_namespace(dev, ns_type);
if (!ns)
return NULL;
return find_root(&ns->node);
}
struct mlx5_modify_hdr *mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
u8 ns_type, u8 num_actions,
void *modify_actions)
{
struct mlx5_flow_root_namespace *root;
struct mlx5_modify_hdr *modify_hdr;
int err;
root = get_root_namespace(dev, ns_type);
if (!root)
return ERR_PTR(-EOPNOTSUPP);
modify_hdr = kzalloc(sizeof(*modify_hdr), GFP_KERNEL);
if (!modify_hdr)
return ERR_PTR(-ENOMEM);
modify_hdr->ns_type = ns_type;
err = root->cmds->modify_header_alloc(root, ns_type, num_actions,
modify_actions, modify_hdr);
if (err) {
kfree(modify_hdr);
return ERR_PTR(err);
}
return modify_hdr;
}
EXPORT_SYMBOL(mlx5_modify_header_alloc);
void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev,
struct mlx5_modify_hdr *modify_hdr)
{
struct mlx5_flow_root_namespace *root;
root = get_root_namespace(dev, modify_hdr->ns_type);
if (WARN_ON(!root))
return;
root->cmds->modify_header_dealloc(root, modify_hdr);
kfree(modify_hdr);
}
EXPORT_SYMBOL(mlx5_modify_header_dealloc);
struct mlx5_pkt_reformat *mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
int reformat_type,
size_t size,
void *reformat_data,
enum mlx5_flow_namespace_type ns_type)
{
struct mlx5_pkt_reformat *pkt_reformat;
struct mlx5_flow_root_namespace *root;
int err;
root = get_root_namespace(dev, ns_type);
if (!root)
return ERR_PTR(-EOPNOTSUPP);
pkt_reformat = kzalloc(sizeof(*pkt_reformat), GFP_KERNEL);
if (!pkt_reformat)
return ERR_PTR(-ENOMEM);
pkt_reformat->ns_type = ns_type;
pkt_reformat->reformat_type = reformat_type;
err = root->cmds->packet_reformat_alloc(root, reformat_type, size,
reformat_data, ns_type,
pkt_reformat);
if (err) {
kfree(pkt_reformat);
return ERR_PTR(err);
}
return pkt_reformat;
}
EXPORT_SYMBOL(mlx5_packet_reformat_alloc);
void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
struct mlx5_pkt_reformat *pkt_reformat)
{
struct mlx5_flow_root_namespace *root;
root = get_root_namespace(dev, pkt_reformat->ns_type);
if (WARN_ON(!root))
return;
root->cmds->packet_reformat_dealloc(root, pkt_reformat);
kfree(pkt_reformat);
}
EXPORT_SYMBOL(mlx5_packet_reformat_dealloc);
int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_root_namespace *peer_ns)
{
if (peer_ns && ns->mode != peer_ns->mode) {
mlx5_core_err(ns->dev,
"Can't peer namespace of different steering mode\n");
return -EINVAL;
}
return ns->cmds->set_peer(ns, peer_ns);
}
/* This function should be called only at init stage of the namespace.
* It is not safe to call this function while steering operations
* are executed in the namespace.
*/
int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns,
enum mlx5_flow_steering_mode mode)
{
struct mlx5_flow_root_namespace *root;
const struct mlx5_flow_cmds *cmds;
int err;
root = find_root(&ns->node);
if (&root->ns != ns)
/* Can't set cmds to non root namespace */
return -EINVAL;
if (root->table_type != FS_FT_FDB)
return -EOPNOTSUPP;
if (root->mode == mode)
return 0;
if (mode == MLX5_FLOW_STEERING_MODE_SMFS)
cmds = mlx5_fs_cmd_get_dr_cmds();
else
cmds = mlx5_fs_cmd_get_fw_cmds();
if (!cmds)
return -EOPNOTSUPP;
err = cmds->create_ns(root);
if (err) {
mlx5_core_err(root->dev, "Failed to create flow namespace (%d)\n",
err);
return err;
}
root->cmds->destroy_ns(root);
root->cmds = cmds;
root->mode = mode;
return 0;
}