Merge branch 'mlxsw-spectrum_router-Optimize-LPM-trees'

Jiri Pirko says:

====================
mlxsw: spectrum_router: Optimize LPM trees

Ido says:

This set tries to optimize the structure of the LPM trees used for route
lookup by avoiding lookups that are guaranteed not to return a result.
This is done by making sure only used prefix lengths are present in the
tree.

First two patches are small preparatory steps towards the actual change
in the last patch.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2018-01-22 09:22:11 -05:00
commit 2ae50dea1d

View file

@ -71,6 +71,7 @@
#include "spectrum_mr_tcam.h"
#include "spectrum_router.h"
struct mlxsw_sp_fib;
struct mlxsw_sp_vr;
struct mlxsw_sp_lpm_tree;
struct mlxsw_sp_rif_ops;
@ -84,6 +85,8 @@ struct mlxsw_sp_router {
struct rhashtable nexthop_ht;
struct list_head nexthop_list;
struct {
/* One tree for each protocol: IPv4 and IPv6 */
struct mlxsw_sp_lpm_tree *proto_trees[2];
struct mlxsw_sp_lpm_tree *trees;
unsigned int tree_count;
} lpm;
@ -162,6 +165,15 @@ struct mlxsw_sp_rif_ops {
struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif);
};
static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_lpm_tree *lpm_tree);
static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
const struct mlxsw_sp_fib *fib,
u8 tree_id);
static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
const struct mlxsw_sp_fib *fib);
static unsigned int *
mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
enum mlxsw_sp_rif_counter_dir dir)
@ -349,14 +361,6 @@ mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
}
static bool
mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage)
{
struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } };
return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none);
}
static void
mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
struct mlxsw_sp_prefix_usage *prefix_usage2)
@ -398,7 +402,6 @@ enum mlxsw_sp_fib_entry_type {
};
struct mlxsw_sp_nexthop_group;
struct mlxsw_sp_fib;
struct mlxsw_sp_fib_node {
struct list_head entry_list;
@ -445,6 +448,7 @@ struct mlxsw_sp_lpm_tree {
u8 id; /* tree ID */
unsigned int ref_count;
enum mlxsw_sp_l3proto proto;
unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
struct mlxsw_sp_prefix_usage prefix_usage;
};
@ -453,8 +457,6 @@ struct mlxsw_sp_fib {
struct list_head node_list;
struct mlxsw_sp_vr *vr;
struct mlxsw_sp_lpm_tree *lpm_tree;
unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
struct mlxsw_sp_prefix_usage prefix_usage;
enum mlxsw_sp_l3proto proto;
};
@ -469,12 +471,15 @@ struct mlxsw_sp_vr {
static const struct rhashtable_params mlxsw_sp_fib_ht_params;
static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp_vr *vr,
static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_vr *vr,
enum mlxsw_sp_l3proto proto)
{
struct mlxsw_sp_lpm_tree *lpm_tree;
struct mlxsw_sp_fib *fib;
int err;
lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
fib = kzalloc(sizeof(*fib), GFP_KERNEL);
if (!fib)
return ERR_PTR(-ENOMEM);
@ -484,17 +489,26 @@ static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp_vr *vr,
INIT_LIST_HEAD(&fib->node_list);
fib->proto = proto;
fib->vr = vr;
fib->lpm_tree = lpm_tree;
mlxsw_sp_lpm_tree_hold(lpm_tree);
err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
if (err)
goto err_lpm_tree_bind;
return fib;
err_lpm_tree_bind:
mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
err_rhashtable_init:
kfree(fib);
return ERR_PTR(err);
}
static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib)
static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib *fib)
{
mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
WARN_ON(!list_empty(&fib->node_list));
WARN_ON(fib->lpm_tree);
rhashtable_destroy(&fib->ht);
kfree(fib);
}
@ -581,6 +595,9 @@ mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
goto err_left_struct_set;
memcpy(&lpm_tree->prefix_usage, prefix_usage,
sizeof(lpm_tree->prefix_usage));
memset(&lpm_tree->prefix_ref_count, 0,
sizeof(lpm_tree->prefix_ref_count));
lpm_tree->ref_count = 1;
return lpm_tree;
err_left_struct_set:
@ -607,8 +624,10 @@ mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
if (lpm_tree->ref_count != 0 &&
lpm_tree->proto == proto &&
mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
prefix_usage))
prefix_usage)) {
mlxsw_sp_lpm_tree_hold(lpm_tree);
return lpm_tree;
}
}
return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
}
@ -629,9 +648,10 @@ static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
{
struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
struct mlxsw_sp_lpm_tree *lpm_tree;
u64 max_trees;
int i;
int err, i;
if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
return -EIO;
@ -649,11 +669,42 @@ static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
}
lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
MLXSW_SP_L3_PROTO_IPV4);
if (IS_ERR(lpm_tree)) {
err = PTR_ERR(lpm_tree);
goto err_ipv4_tree_get;
}
mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
MLXSW_SP_L3_PROTO_IPV6);
if (IS_ERR(lpm_tree)) {
err = PTR_ERR(lpm_tree);
goto err_ipv6_tree_get;
}
mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
return 0;
err_ipv6_tree_get:
lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
err_ipv4_tree_get:
kfree(mlxsw_sp->router->lpm.trees);
return err;
}
static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
{
struct mlxsw_sp_lpm_tree *lpm_tree;
lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
kfree(mlxsw_sp->router->lpm.trees);
}
@ -745,10 +796,10 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported virtual routers");
return ERR_PTR(-EBUSY);
}
vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4);
vr->fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
if (IS_ERR(vr->fib4))
return ERR_CAST(vr->fib4);
vr->fib6 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV6);
vr->fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
if (IS_ERR(vr->fib6)) {
err = PTR_ERR(vr->fib6);
goto err_fib6_create;
@ -763,21 +814,22 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
return vr;
err_mr_table_create:
mlxsw_sp_fib_destroy(vr->fib6);
mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
vr->fib6 = NULL;
err_fib6_create:
mlxsw_sp_fib_destroy(vr->fib4);
mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
vr->fib4 = NULL;
return ERR_PTR(err);
}
static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr)
static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_vr *vr)
{
mlxsw_sp_mr_table_destroy(vr->mr4_table);
vr->mr4_table = NULL;
mlxsw_sp_fib_destroy(vr->fib6);
mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
vr->fib6 = NULL;
mlxsw_sp_fib_destroy(vr->fib4);
mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
vr->fib4 = NULL;
}
@ -793,12 +845,12 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
return vr;
}
static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr)
static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
{
if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
list_empty(&vr->fib6->node_list) &&
mlxsw_sp_mr_table_empty(vr->mr4_table))
mlxsw_sp_vr_destroy(vr);
mlxsw_sp_vr_destroy(mlxsw_sp, vr);
}
static bool
@ -809,7 +861,7 @@ mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
if (!mlxsw_sp_vr_is_used(vr))
return false;
if (fib->lpm_tree && fib->lpm_tree->id == tree_id)
if (fib->lpm_tree->id == tree_id)
return true;
return false;
}
@ -839,14 +891,13 @@ static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib *fib,
struct mlxsw_sp_lpm_tree *new_tree)
{
struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
enum mlxsw_sp_l3proto proto = fib->proto;
struct mlxsw_sp_lpm_tree *old_tree;
u8 old_id, new_id = new_tree->id;
struct mlxsw_sp_vr *vr;
int i, err;
if (!old_tree)
goto no_replace;
old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
old_id = old_tree->id;
for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
@ -860,6 +911,11 @@ static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
goto err_tree_replace;
}
memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
sizeof(new_tree->prefix_ref_count));
mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
return 0;
err_tree_replace:
@ -871,36 +927,6 @@ err_tree_replace:
old_tree);
}
return err;
no_replace:
fib->lpm_tree = new_tree;
mlxsw_sp_lpm_tree_hold(new_tree);
err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
if (err) {
mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
fib->lpm_tree = NULL;
return err;
}
return 0;
}
static void
mlxsw_sp_vrs_prefixes(struct mlxsw_sp *mlxsw_sp,
enum mlxsw_sp_l3proto proto,
struct mlxsw_sp_prefix_usage *req_prefix_usage)
{
int i;
for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
unsigned char prefix;
if (!mlxsw_sp_vr_is_used(vr))
continue;
mlxsw_sp_prefix_usage_for_each(prefix, &fib->prefix_usage)
mlxsw_sp_prefix_usage_set(req_prefix_usage, prefix);
}
}
static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
@ -4201,68 +4227,66 @@ mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
}
static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib *fib,
struct mlxsw_sp_fib_node *fib_node)
{
struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
struct mlxsw_sp_prefix_usage req_prefix_usage;
struct mlxsw_sp_fib *fib = fib_node->fib;
struct mlxsw_sp_lpm_tree *lpm_tree;
int err;
/* Since the tree is shared between all virtual routers we must
* make sure it contains all the required prefix lengths. This
* can be computed by either adding the new prefix length to the
* existing prefix usage of a bound tree, or by aggregating the
* prefix lengths across all virtual routers and adding the new
* one as well.
*/
if (fib->lpm_tree)
mlxsw_sp_prefix_usage_cpy(&req_prefix_usage,
&fib->lpm_tree->prefix_usage);
else
mlxsw_sp_vrs_prefixes(mlxsw_sp, fib->proto, &req_prefix_usage);
mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
goto out;
mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
fib->proto);
if (IS_ERR(lpm_tree))
return PTR_ERR(lpm_tree);
if (fib->lpm_tree && fib->lpm_tree->id == lpm_tree->id)
return 0;
err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
if (err)
return err;
goto err_lpm_tree_replace;
out:
lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
return 0;
err_lpm_tree_replace:
mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
return err;
}
static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib *fib)
struct mlxsw_sp_fib_node *fib_node)
{
if (!mlxsw_sp_prefix_usage_none(&fib->prefix_usage))
struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
struct mlxsw_sp_prefix_usage req_prefix_usage;
struct mlxsw_sp_fib *fib = fib_node->fib;
int err;
if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
return;
/* Try to construct a new LPM tree from the current prefix usage
* minus the unused one. If we fail, continue using the old one.
*/
mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
fib_node->key.prefix_len);
lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
fib->proto);
if (IS_ERR(lpm_tree))
return;
mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
fib->lpm_tree = NULL;
}
static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node)
{
unsigned char prefix_len = fib_node->key.prefix_len;
struct mlxsw_sp_fib *fib = fib_node->fib;
err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
if (err)
goto err_lpm_tree_replace;
if (fib->prefix_ref_count[prefix_len]++ == 0)
mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len);
}
return;
static void mlxsw_sp_fib_node_prefix_dec(struct mlxsw_sp_fib_node *fib_node)
{
unsigned char prefix_len = fib_node->key.prefix_len;
struct mlxsw_sp_fib *fib = fib_node->fib;
if (--fib->prefix_ref_count[prefix_len] == 0)
mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len);
err_lpm_tree_replace:
mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
}
static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
@ -4276,12 +4300,10 @@ static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
return err;
fib_node->fib = fib;
err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib, fib_node);
err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
if (err)
goto err_fib_lpm_tree_link;
mlxsw_sp_fib_node_prefix_inc(fib_node);
return 0;
err_fib_lpm_tree_link:
@ -4295,8 +4317,7 @@ static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
{
struct mlxsw_sp_fib *fib = fib_node->fib;
mlxsw_sp_fib_node_prefix_dec(fib_node);
mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib);
mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
fib_node->fib = NULL;
mlxsw_sp_fib_node_remove(fib, fib_node);
}
@ -4335,7 +4356,7 @@ mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
err_fib_node_init:
mlxsw_sp_fib_node_destroy(fib_node);
err_fib_node_create:
mlxsw_sp_vr_put(vr);
mlxsw_sp_vr_put(mlxsw_sp, vr);
return ERR_PTR(err);
}
@ -4348,7 +4369,7 @@ static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
return;
mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
mlxsw_sp_fib_node_destroy(fib_node);
mlxsw_sp_vr_put(vr);
mlxsw_sp_vr_put(mlxsw_sp, vr);
}
static struct mlxsw_sp_fib4_entry *
@ -5371,7 +5392,7 @@ static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
return;
mlxsw_sp_mr_route4_del(vr->mr4_table, men_info->mfc);
mlxsw_sp_vr_put(vr);
mlxsw_sp_vr_put(mlxsw_sp, vr);
}
static int
@ -5408,7 +5429,7 @@ mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
return;
mlxsw_sp_mr_vif_del(vr->mr4_table, ven_info->vif_index);
mlxsw_sp_vr_put(vr);
mlxsw_sp_vr_put(mlxsw_sp, vr);
}
static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
@ -6057,7 +6078,7 @@ err_fid_get:
err_rif_alloc:
err_rif_index_alloc:
vr->rif_count--;
mlxsw_sp_vr_put(vr);
mlxsw_sp_vr_put(mlxsw_sp, vr);
return ERR_PTR(err);
}
@ -6080,7 +6101,7 @@ void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
mlxsw_sp_fid_put(fid);
kfree(rif);
vr->rif_count--;
mlxsw_sp_vr_put(vr);
mlxsw_sp_vr_put(mlxsw_sp, vr);
}
static void
@ -6870,7 +6891,7 @@ mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
return 0;
err_loopback_op:
mlxsw_sp_vr_put(ul_vr);
mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
return err;
}
@ -6884,7 +6905,7 @@ static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false);
--ul_vr->rif_count;
mlxsw_sp_vr_put(ul_vr);
mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
}
static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = {