Merge branch 'kvm-updates/3.2' of git://git.kernel.org/pub/scm/virt/kvm/kvm

* 'kvm-updates/3.2' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: PPC: e500: include linux/export.h
  KVM: PPC: fix kvmppc_start_thread() for CONFIG_SMP=N
  KVM: PPC: protect use of kvmppc_h_pr
  KVM: PPC: move compute_tlbie_rb to book3s_64 common header
  KVM: Don't automatically expose the TSC deadline timer in cpuid
  KVM: Device assignment permission checks
  KVM: Remove ability to assign a device without iommu support
  KVM: x86: Prevent starting PIT timers in the absence of irqchip support
This commit is contained in:
Linus Torvalds 2011-12-26 13:17:00 -08:00
commit 7f54492fbc
10 changed files with 154 additions and 56 deletions

View file

@ -1100,6 +1100,15 @@ emulate them efficiently. The fields in each entry are defined as follows:
eax, ebx, ecx, edx: the values returned by the cpuid instruction for
this function/index combination
The TSC deadline timer feature (CPUID leaf 1, ecx[24]) is always returned
as false, since the feature depends on KVM_CREATE_IRQCHIP for local APIC
support. Instead it is reported via
ioctl(KVM_CHECK_EXTENSION, KVM_CAP_TSC_DEADLINE_TIMER)
if that returns true and you use KVM_CREATE_IRQCHIP, or if you emulate the
feature in userspace, then you can enable the feature for KVM_SET_CPUID2.
4.47 KVM_PPC_GET_PVINFO
Capability: KVM_CAP_PPC_GET_PVINFO
@ -1151,6 +1160,13 @@ following flags are specified:
/* Depends on KVM_CAP_IOMMU */
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
The KVM_DEV_ASSIGN_ENABLE_IOMMU flag is a mandatory option to ensure
isolation of the device. Usages not specifying this flag are deprecated.
Only PCI header type 0 devices with PCI BAR resources are supported by
device assignment. The user requesting this ioctl must have read/write
access to the PCI sysfs resource files associated with the device.
4.49 KVM_DEASSIGN_PCI_DEVICE
Capability: KVM_CAP_DEVICE_DEASSIGNMENT

View file

@ -381,39 +381,6 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
}
#endif
static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
unsigned long pte_index)
{
unsigned long rb, va_low;
rb = (v & ~0x7fUL) << 16; /* AVA field */
va_low = pte_index >> 3;
if (v & HPTE_V_SECONDARY)
va_low = ~va_low;
/* xor vsid from AVA */
if (!(v & HPTE_V_1TB_SEG))
va_low ^= v >> 12;
else
va_low ^= v >> 24;
va_low &= 0x7ff;
if (v & HPTE_V_LARGE) {
rb |= 1; /* L field */
if (cpu_has_feature(CPU_FTR_ARCH_206) &&
(r & 0xff000)) {
/* non-16MB large page, must be 64k */
/* (masks depend on page size) */
rb |= 0x1000; /* page encoding in LP field */
rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */
rb |= (va_low & 0xfe); /* AVAL field (P7 doesn't seem to care) */
}
} else {
/* 4kB page */
rb |= (va_low & 0x7ff) << 12; /* remaining 11b of VA */
}
rb |= (v >> 54) & 0x300; /* B field */
return rb;
}
/* Magic register values loaded into r3 and r4 before the 'sc' assembly
* instruction for the OSI hypercalls */
#define OSI_SC_MAGIC_R3 0x113724FA

View file

@ -29,4 +29,37 @@ static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu)
#define SPAPR_TCE_SHIFT 12
static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
unsigned long pte_index)
{
unsigned long rb, va_low;
rb = (v & ~0x7fUL) << 16; /* AVA field */
va_low = pte_index >> 3;
if (v & HPTE_V_SECONDARY)
va_low = ~va_low;
/* xor vsid from AVA */
if (!(v & HPTE_V_1TB_SEG))
va_low ^= v >> 12;
else
va_low ^= v >> 24;
va_low &= 0x7ff;
if (v & HPTE_V_LARGE) {
rb |= 1; /* L field */
if (cpu_has_feature(CPU_FTR_ARCH_206) &&
(r & 0xff000)) {
/* non-16MB large page, must be 64k */
/* (masks depend on page size) */
rb |= 0x1000; /* page encoding in LP field */
rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */
rb |= (va_low & 0xfe); /* AVAL field (P7 doesn't seem to care) */
}
} else {
/* 4kB page */
rb |= (va_low & 0x7ff) << 12; /* remaining 11b of VA */
}
rb |= (v >> 54) & 0x300; /* B field */
return rb;
}
#endif /* __ASM_KVM_BOOK3S_64_H__ */

View file

@ -538,7 +538,7 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
tpaca->kvm_hstate.napping = 0;
vcpu->cpu = vc->pcpu;
smp_wmb();
#ifdef CONFIG_PPC_ICP_NATIVE
#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
if (vcpu->arch.ptid) {
tpaca->cpu_start = 0x80;
wmb();

View file

@ -658,10 +658,12 @@ program_interrupt:
ulong cmd = kvmppc_get_gpr(vcpu, 3);
int i;
#ifdef CONFIG_KVM_BOOK3S_64_PR
if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) {
r = RESUME_GUEST;
break;
}
#endif
run->papr_hcall.nr = cmd;
for (i = 0; i < 9; ++i) {

View file

@ -15,6 +15,7 @@
#include <linux/kvm_host.h>
#include <linux/slab.h>
#include <linux/err.h>
#include <linux/export.h>
#include <asm/reg.h>
#include <asm/cputable.h>

View file

@ -338,11 +338,15 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
return HRTIMER_NORESTART;
}
static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
{
struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
struct kvm_timer *pt = &ps->pit_timer;
s64 interval;
if (!irqchip_in_kernel(kvm))
return;
interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
pr_debug("create pit timer, interval is %llu nsec\n", interval);
@ -394,13 +398,13 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
/* FIXME: enhance mode 4 precision */
case 4:
if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)) {
create_pit_timer(ps, val, 0);
create_pit_timer(kvm, val, 0);
}
break;
case 2:
case 3:
if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)){
create_pit_timer(ps, val, 1);
create_pit_timer(kvm, val, 1);
}
break;
default:

View file

@ -602,7 +602,6 @@ static void update_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
struct kvm_lapic *apic = vcpu->arch.apic;
u32 timer_mode_mask;
best = kvm_find_cpuid_entry(vcpu, 1, 0);
if (!best)
@ -615,15 +614,12 @@ static void update_cpuid(struct kvm_vcpu *vcpu)
best->ecx |= bit(X86_FEATURE_OSXSAVE);
}
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
best->function == 0x1) {
best->ecx |= bit(X86_FEATURE_TSC_DEADLINE_TIMER);
timer_mode_mask = 3 << 17;
} else
timer_mode_mask = 1 << 17;
if (apic)
apic->lapic_timer.timer_mode_mask = timer_mode_mask;
if (apic) {
if (best->ecx & bit(X86_FEATURE_TSC_DEADLINE_TIMER))
apic->lapic_timer.timer_mode_mask = 3 << 17;
else
apic->lapic_timer.timer_mode_mask = 1 << 17;
}
}
int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@ -2135,6 +2131,9 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_TSC_CONTROL:
r = kvm_has_tsc_control;
break;
case KVM_CAP_TSC_DEADLINE_TIMER:
r = boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER);
break;
default:
r = 0;
break;

View file

@ -557,6 +557,7 @@ struct kvm_ppc_pvinfo {
#define KVM_CAP_MAX_VCPUS 66 /* returns max vcpus per vm */
#define KVM_CAP_PPC_PAPR 68
#define KVM_CAP_S390_GMAP 71
#define KVM_CAP_TSC_DEADLINE_TIMER 72
#ifdef KVM_CAP_IRQ_ROUTING

View file

@ -17,6 +17,8 @@
#include <linux/pci.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
#include <linux/namei.h>
#include <linux/fs.h>
#include "irq.h"
static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
@ -480,12 +482,76 @@ out:
return r;
}
/*
* We want to test whether the caller has been granted permissions to
* use this device. To be able to configure and control the device,
* the user needs access to PCI configuration space and BAR resources.
* These are accessed through PCI sysfs. PCI config space is often
* passed to the process calling this ioctl via file descriptor, so we
* can't rely on access to that file. We can check for permissions
* on each of the BAR resource files, which is a pretty clear
* indicator that the user has been granted access to the device.
*/
static int probe_sysfs_permissions(struct pci_dev *dev)
{
#ifdef CONFIG_SYSFS
int i;
bool bar_found = false;
for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) {
char *kpath, *syspath;
struct path path;
struct inode *inode;
int r;
if (!pci_resource_len(dev, i))
continue;
kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
if (!kpath)
return -ENOMEM;
/* Per sysfs-rules, sysfs is always at /sys */
syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i);
kfree(kpath);
if (!syspath)
return -ENOMEM;
r = kern_path(syspath, LOOKUP_FOLLOW, &path);
kfree(syspath);
if (r)
return r;
inode = path.dentry->d_inode;
r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
path_put(&path);
if (r)
return r;
bar_found = true;
}
/* If no resources, probably something special */
if (!bar_found)
return -EPERM;
return 0;
#else
return -EINVAL; /* No way to control the device without sysfs */
#endif
}
static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
struct kvm_assigned_pci_dev *assigned_dev)
{
int r = 0, idx;
struct kvm_assigned_dev_kernel *match;
struct pci_dev *dev;
u8 header_type;
if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU))
return -EINVAL;
mutex_lock(&kvm->lock);
idx = srcu_read_lock(&kvm->srcu);
@ -513,6 +579,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
r = -EINVAL;
goto out_free;
}
/* Don't allow bridges to be assigned */
pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
if ((header_type & PCI_HEADER_TYPE) != PCI_HEADER_TYPE_NORMAL) {
r = -EPERM;
goto out_put;
}
r = probe_sysfs_permissions(dev);
if (r)
goto out_put;
if (pci_enable_device(dev)) {
printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
r = -EBUSY;
@ -544,16 +622,14 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
list_add(&match->list, &kvm->arch.assigned_dev_head);
if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
if (!kvm->arch.iommu_domain) {
r = kvm_iommu_map_guest(kvm);
if (r)
goto out_list_del;
}
r = kvm_assign_device(kvm, match);
if (!kvm->arch.iommu_domain) {
r = kvm_iommu_map_guest(kvm);
if (r)
goto out_list_del;
}
r = kvm_assign_device(kvm, match);
if (r)
goto out_list_del;
out:
srcu_read_unlock(&kvm->srcu, idx);
@ -593,8 +669,7 @@ static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
goto out;
}
if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)
kvm_deassign_device(kvm, match);
kvm_deassign_device(kvm, match);
kvm_free_assigned_device(kvm, match);