1
0
Fork 0

IB/hfi1: Change default number of user contexts

Change the default number of user contexts to the number of real
(non-HT) cpu cores in order to reduce the division of hfi1 hardware
contexts in the case of high core counts with hyper-threading enabled.

Reviewed-by: Dean Luick <dean.luick@intel.com>
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Reviewed-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Jubin John <jubin.john@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
hifive-unleashed-5.1
Jubin John 2016-04-12 11:30:08 -07:00 committed by Doug Ledford
parent b218f786ad
commit 0852d241f4
3 changed files with 84 additions and 54 deletions

View File

@ -53,20 +53,6 @@
#include "sdma.h"
#include "trace.h"
struct cpu_mask_set {
struct cpumask mask;
struct cpumask used;
uint gen;
};
struct hfi1_affinity {
struct cpu_mask_set def_intr;
struct cpu_mask_set rcv_intr;
struct cpu_mask_set proc;
/* spin lock to protect affinity struct */
spinlock_t lock;
};
/* Name of IRQ types, indexed by enum irq_type */
static const char * const irq_type_names[] = {
"SDMA",
@ -82,6 +68,48 @@ static inline void init_cpu_mask_set(struct cpu_mask_set *set)
set->gen = 0;
}
/* Initialize non-HT cpu cores mask */
int init_real_cpu_mask(struct hfi1_devdata *dd)
{
struct hfi1_affinity *info;
int possible, curr_cpu, i, ht;
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
cpumask_clear(&info->real_cpu_mask);
/* Start with cpu online mask as the real cpu mask */
cpumask_copy(&info->real_cpu_mask, cpu_online_mask);
/*
* Remove HT cores from the real cpu mask. Do this in two steps below.
*/
possible = cpumask_weight(&info->real_cpu_mask);
ht = cpumask_weight(topology_sibling_cpumask(
cpumask_first(&info->real_cpu_mask)));
/*
* Step 1. Skip over the first N HT siblings and use them as the
* "real" cores. Assumes that HT cores are not enumerated in
* succession (except in the single core case).
*/
curr_cpu = cpumask_first(&info->real_cpu_mask);
for (i = 0; i < possible / ht; i++)
curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask);
/*
* Step 2. Remove the remaining HT siblings. Use cpumask_next() to
* skip any gaps.
*/
for (; i < possible; i++) {
cpumask_clear_cpu(curr_cpu, &info->real_cpu_mask);
curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask);
}
dd->affinity = info;
return 0;
}
/*
* Interrupt affinity.
*
@ -93,20 +121,17 @@ static inline void init_cpu_mask_set(struct cpu_mask_set *set)
* to the node relative 1 as necessary.
*
*/
int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
void hfi1_dev_affinity_init(struct hfi1_devdata *dd)
{
int node = pcibus_to_node(dd->pcidev->bus);
struct hfi1_affinity *info;
struct hfi1_affinity *info = dd->affinity;
const struct cpumask *local_mask;
int curr_cpu, possible, i, ht;
int curr_cpu, possible, i;
if (node < 0)
node = numa_node_id();
dd->node = node;
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
spin_lock_init(&info->lock);
init_cpu_mask_set(&info->def_intr);
@ -116,30 +141,8 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
local_mask = cpumask_of_node(dd->node);
if (cpumask_first(local_mask) >= nr_cpu_ids)
local_mask = topology_core_cpumask(0);
/* use local mask as default */
cpumask_copy(&info->def_intr.mask, local_mask);
/*
* Remove HT cores from the default mask. Do this in two steps below.
*/
possible = cpumask_weight(&info->def_intr.mask);
ht = cpumask_weight(topology_sibling_cpumask(
cpumask_first(&info->def_intr.mask)));
/*
* Step 1. Skip over the first N HT siblings and use them as the
* "real" cores. Assumes that HT cores are not enumerated in
* succession (except in the single core case).
*/
curr_cpu = cpumask_first(&info->def_intr.mask);
for (i = 0; i < possible / ht; i++)
curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
/*
* Step 2. Remove the remaining HT siblings. Use cpumask_next() to
* skip any gaps.
*/
for (; i < possible; i++) {
cpumask_clear_cpu(curr_cpu, &info->def_intr.mask);
curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
}
/* Use the "real" cpu mask of this node as the default */
cpumask_and(&info->def_intr.mask, &info->real_cpu_mask, local_mask);
/* fill in the receive list */
possible = cpumask_weight(&info->def_intr.mask);
@ -167,8 +170,6 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
}
cpumask_copy(&info->proc.mask, cpu_online_mask);
dd->affinity = info;
return 0;
}
void hfi1_dev_affinity_free(struct hfi1_devdata *dd)

View File

@ -64,10 +64,27 @@ enum affinity_flags {
AFF_IRQ_LOCAL
};
struct cpu_mask_set {
struct cpumask mask;
struct cpumask used;
uint gen;
};
struct hfi1_affinity {
struct cpu_mask_set def_intr;
struct cpu_mask_set rcv_intr;
struct cpu_mask_set proc;
struct cpumask real_cpu_mask;
/* spin lock to protect affinity struct */
spinlock_t lock;
};
struct hfi1_msix_entry;
/* Initialize non-HT cpu cores mask */
int init_real_cpu_mask(struct hfi1_devdata *);
/* Initialize driver affinity data */
int hfi1_dev_affinity_init(struct hfi1_devdata *);
void hfi1_dev_affinity_init(struct hfi1_devdata *);
/* Free driver affinity data */
void hfi1_dev_affinity_free(struct hfi1_devdata *);
/*

View File

@ -12656,12 +12656,13 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
num_kernel_contexts = dd->chip_send_contexts - num_vls - 1;
}
/*
* User contexts: (to be fixed later)
* - default to 1 user context per CPU if num_user_contexts is
* negative
* User contexts:
* - default to 1 user context per real (non-HT) CPU core if
* num_user_contexts is negative
*/
if (num_user_contexts < 0)
num_user_contexts = num_online_cpus();
num_user_contexts =
cpumask_weight(&dd->affinity->real_cpu_mask);
total_contexts = num_kernel_contexts + num_user_contexts;
@ -14089,6 +14090,19 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
(dd->revision >> CCE_REVISION_SW_SHIFT)
& CCE_REVISION_SW_MASK);
/*
* The real cpu mask is part of the affinity struct but has to be
* initialized earlier than the rest of the affinity struct because it
* is needed to calculate the number of user contexts in
* set_up_context_variables(). However, hfi1_dev_affinity_init(),
* which initializes the rest of the affinity struct members,
* depends on set_up_context_variables() for the number of kernel
* contexts, so it cannot be called before set_up_context_variables().
*/
ret = init_real_cpu_mask(dd);
if (ret)
goto bail_cleanup;
ret = set_up_context_variables(dd);
if (ret)
goto bail_cleanup;
@ -14102,9 +14116,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
/* set up KDETH QP prefix in both RX and TX CSRs */
init_kdeth_qp(dd);
ret = hfi1_dev_affinity_init(dd);
if (ret)
goto bail_cleanup;
hfi1_dev_affinity_init(dd);
/* send contexts must be set up before receive contexts */
ret = init_send_contexts(dd);