KVM: PPC: Book3S HV: Allow userspace to set the desired SMT mode

This allows userspace to set the desired virtual SMT (simultaneous
multithreading) mode for a VM, that is, the number of VCPUs that
get assigned to each virtual core.  Previously, the virtual SMT mode
was fixed to the number of threads per subcore, and if userspace
wanted to have fewer vcpus per vcore, then it would achieve that by
using a sparse CPU numbering.  This had the disadvantage that the
vcpu numbers can get quite large, particularly for SMT1 guests on
a POWER8 with 8 threads per core.  With this patch, userspace can
set its desired virtual SMT mode and then use contiguous vcpu
numbering.

On POWER8, where the threading mode is "strict", the virtual SMT mode
must be less than or equal to the number of threads per subcore.  On
POWER9, which implements a "loose" threading mode, the virtual SMT
mode can be any power of 2 between 1 and 8, even though there is
effectively one thread per subcore, since the threads are independent
and can all be in different partitions.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
Paul Mackerras 2017-02-06 13:24:41 +11:00
parent 769377f77c
commit 3c31352460
5 changed files with 90 additions and 12 deletions

View file

@ -3996,6 +3996,21 @@ Parameters: none
Allow use of adapter-interruption suppression.
Returns: 0 on success; -EBUSY if a VCPU has already been created.
7.11 KVM_CAP_PPC_SMT
Architectures: ppc
Parameters: vsmt_mode, flags
Enabling this capability on a VM provides userspace with a way to set
the desired virtual SMT mode (i.e. the number of virtual CPUs per
virtual core). The virtual SMT mode, vsmt_mode, must be a power of 2
between 1 and 8. On POWER8, vsmt_mode must also be no greater than
the number of threads per subcore for the host. Currently flags must
be 0. A successful call to enable this capability will result in
vsmt_mode being returned when the KVM_CAP_PPC_SMT capability is
subsequently queried for the VM. This capability is only supported by
HV KVM, and can only be set before any VCPUs have been created.
8. Other capabilities.
----------------------

View file

@ -267,6 +267,7 @@ struct kvm_resize_hpt;
struct kvm_arch {
unsigned int lpid;
unsigned int smt_mode; /* # vcpus per virtual core */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
unsigned int tlb_sets;
struct kvm_hpt_info hpt;

View file

@ -315,6 +315,8 @@ struct kvmppc_ops {
struct irq_bypass_producer *);
int (*configure_mmu)(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg);
int (*get_rmmu_info)(struct kvm *kvm, struct kvm_ppc_rmmu_info *info);
int (*set_smt_mode)(struct kvm *kvm, unsigned long mode,
unsigned long flags);
};
extern struct kvmppc_ops *kvmppc_hv_ops;

View file

@ -1628,7 +1628,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
init_swait_queue_head(&vcore->wq);
vcore->preempt_tb = TB_NIL;
vcore->lpcr = kvm->arch.lpcr;
vcore->first_vcpuid = core * threads_per_vcore();
vcore->first_vcpuid = core * kvm->arch.smt_mode;
vcore->kvm = kvm;
INIT_LIST_HEAD(&vcore->preempt_list);
@ -1787,14 +1787,10 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
unsigned int id)
{
struct kvm_vcpu *vcpu;
int err = -EINVAL;
int err;
int core;
struct kvmppc_vcore *vcore;
core = id / threads_per_vcore();
if (core >= KVM_MAX_VCORES)
goto out;
err = -ENOMEM;
vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
if (!vcpu)
@ -1842,11 +1838,17 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
init_waitqueue_head(&vcpu->arch.cpu_run);
mutex_lock(&kvm->lock);
vcore = kvm->arch.vcores[core];
if (!vcore) {
vcore = kvmppc_vcore_create(kvm, core);
kvm->arch.vcores[core] = vcore;
kvm->arch.online_vcores++;
vcore = NULL;
err = -EINVAL;
core = id / kvm->arch.smt_mode;
if (core < KVM_MAX_VCORES) {
vcore = kvm->arch.vcores[core];
if (!vcore) {
err = -ENOMEM;
vcore = kvmppc_vcore_create(kvm, core);
kvm->arch.vcores[core] = vcore;
kvm->arch.online_vcores++;
}
}
mutex_unlock(&kvm->lock);
@ -1874,6 +1876,40 @@ out:
return ERR_PTR(err);
}
static int kvmhv_set_smt_mode(struct kvm *kvm, unsigned long smt_mode,
unsigned long flags)
{
int err;
if (flags)
return -EINVAL;
if (smt_mode > MAX_SMT_THREADS || !is_power_of_2(smt_mode))
return -EINVAL;
if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
/*
* On POWER8 (or POWER7), the threading mode is "strict",
* so we pack smt_mode vcpus per vcore.
*/
if (smt_mode > threads_per_subcore)
return -EINVAL;
} else {
/*
* On POWER9, the threading mode is "loose",
* so each vcpu gets its own vcore.
*/
smt_mode = 1;
}
mutex_lock(&kvm->lock);
err = -EBUSY;
if (!kvm->arch.online_vcores) {
kvm->arch.smt_mode = smt_mode;
err = 0;
}
mutex_unlock(&kvm->lock);
return err;
}
static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)
{
if (vpa->pinned_addr)
@ -3553,6 +3589,18 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
if (!cpu_has_feature(CPU_FTR_ARCH_300))
kvm_hv_vm_activated();
/*
* Initialize smt_mode depending on processor.
* POWER8 and earlier have to use "strict" threading, where
* all vCPUs in a vcore have to run on the same (sub)core,
* whereas on POWER9 the threads can each run a different
* guest.
*/
if (!cpu_has_feature(CPU_FTR_ARCH_300))
kvm->arch.smt_mode = threads_per_subcore;
else
kvm->arch.smt_mode = 1;
/*
* Create a debugfs directory for the VM
*/
@ -3982,6 +4030,7 @@ static struct kvmppc_ops kvm_ops_hv = {
#endif
.configure_mmu = kvmhv_configure_mmu,
.get_rmmu_info = kvmhv_get_rmmu_info,
.set_smt_mode = kvmhv_set_smt_mode,
};
static int kvm_init_subcore_bitmap(void)

View file

@ -554,7 +554,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
case KVM_CAP_PPC_SMT:
r = 0;
if (hv_enabled) {
if (kvm)
r = kvm->arch.smt_mode;
else if (hv_enabled) {
if (cpu_has_feature(CPU_FTR_ARCH_300))
r = 1;
else
@ -1712,6 +1714,15 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
r = 0;
break;
}
case KVM_CAP_PPC_SMT: {
unsigned long mode = cap->args[0];
unsigned long flags = cap->args[1];
r = -EINVAL;
if (kvm->arch.kvm_ops->set_smt_mode)
r = kvm->arch.kvm_ops->set_smt_mode(kvm, mode, flags);
break;
}
#endif
default:
r = -EINVAL;