Merge branch 'perf/urgent' into perf/core

Conflicts:
	tools/perf/util/python.c

Merge reason: resolve the conflict with perf/urgent.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Ingo Molnar 2011-06-04 12:28:05 +02:00
commit 3ce2a0bc9d
47 changed files with 844 additions and 203 deletions

View file

@ -999,7 +999,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
With this option on every unmap_single operation will
result in a hardware IOTLB flush operation as opposed
to batching them for performance.
sp_off [Default Off]
By default, super page will be supported if Intel IOMMU
has the capability. With this option, super page will
not be supported.
intremap= [X86-64, Intel-IOMMU]
Format: { on (default) | off | nosid }
on enable Interrupt Remapping (default)

View file

@ -1,5 +1,5 @@
# This creates the demonstration utility "lguest" which runs a Linux guest.
# Missing headers? Add "-I../../include -I../../arch/x86/include"
# Missing headers? Add "-I../../../include -I../../../arch/x86/include"
CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -U_FORTIFY_SOURCE
all: lguest

View file

@ -49,7 +49,7 @@
#include <linux/virtio_rng.h>
#include <linux/virtio_ring.h>
#include <asm/bootparam.h>
#include "../../include/linux/lguest_launcher.h"
#include "../../../include/linux/lguest_launcher.h"
/*L:110
* We can ignore the 42 include files we need for this program, but I do want
* to draw attention to the use of kernel-style types.
@ -135,9 +135,6 @@ struct device {
/* Is it operational */
bool running;
/* Does Guest want an intrrupt on empty? */
bool irq_on_empty;
/* Device-specific data. */
void *priv;
};
@ -637,10 +634,7 @@ static void trigger_irq(struct virtqueue *vq)
/* If they don't want an interrupt, don't send one... */
if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) {
/* ... unless they've asked us to force one on empty. */
if (!vq->dev->irq_on_empty
|| lg_last_avail(vq) != vq->vring.avail->idx)
return;
return;
}
/* Send the Guest an interrupt tell them we used something up. */
@ -1057,15 +1051,6 @@ static void create_thread(struct virtqueue *vq)
close(vq->eventfd);
}
static bool accepted_feature(struct device *dev, unsigned int bit)
{
const u8 *features = get_feature_bits(dev) + dev->feature_len;
if (dev->feature_len < bit / CHAR_BIT)
return false;
return features[bit / CHAR_BIT] & (1 << (bit % CHAR_BIT));
}
static void start_device(struct device *dev)
{
unsigned int i;
@ -1079,8 +1064,6 @@ static void start_device(struct device *dev)
verbose(" %02x", get_feature_bits(dev)
[dev->feature_len+i]);
dev->irq_on_empty = accepted_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY);
for (vq = dev->vq; vq; vq = vq->next) {
if (vq->service)
create_thread(vq);
@ -1564,7 +1547,6 @@ static void setup_tun_net(char *arg)
/* Set up the tun device. */
configure_device(ipfd, tapif, ip);
add_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY);
/* Expect Guest to handle everything except UFO */
add_feature(dev, VIRTIO_NET_F_CSUM);
add_feature(dev, VIRTIO_NET_F_GUEST_CSUM);

View file

@ -320,11 +320,12 @@
#define __NR_clock_adjtime 1328
#define __NR_syncfs 1329
#define __NR_setns 1330
#define __NR_sendmmsg 1331
#ifdef __KERNEL__
#define NR_syscalls 307 /* length of syscall table */
#define NR_syscalls 308 /* length of syscall table */
/*
* The following defines stop scripts/checksyscalls.sh from complaining about

View file

@ -1776,6 +1776,7 @@ sys_call_table:
data8 sys_clock_adjtime
data8 sys_syncfs
data8 sys_setns // 1330
data8 sys_sendmmsg
.org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */

View file

@ -715,7 +715,8 @@ static struct syscore_ops pmacpic_syscore_ops = {
static int __init init_pmacpic_syscore(void)
{
register_syscore_ops(&pmacpic_syscore_ops);
if (pmac_irq_hw[0])
register_syscore_ops(&pmacpic_syscore_ops);
return 0;
}

View file

@ -8,6 +8,7 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
ifdef CONFIG_FUNCTION_TRACER
# Do not profile debug and lowlevel utilities
CFLAGS_REMOVE_tsc.o = -pg
CFLAGS_REMOVE_rtc.o = -pg
CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
CFLAGS_REMOVE_pvclock.o = -pg
@ -28,6 +29,7 @@ CFLAGS_paravirt.o := $(nostackp)
GCOV_PROFILE_vsyscall_64.o := n
GCOV_PROFILE_hpet.o := n
GCOV_PROFILE_tsc.o := n
GCOV_PROFILE_vread_tsc_64.o := n
GCOV_PROFILE_paravirt.o := n
# vread_tsc_64 is hot and should be fully optimized:

View file

@ -642,7 +642,7 @@ static int __init idle_setup(char *str)
boot_option_idle_override = IDLE_POLL;
} else if (!strcmp(str, "mwait")) {
boot_option_idle_override = IDLE_FORCE_MWAIT;
WARN_ONCE(1, "\idle=mwait\" will be removed in 2012\"\n");
WARN_ONCE(1, "\"idle=mwait\" will be removed in 2012\n");
} else if (!strcmp(str, "halt")) {
/*
* When the boot option of idle=halt is added, halt is

View file

@ -1332,7 +1332,7 @@ static inline void mwait_play_dead(void)
void *mwait_ptr;
struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info);
if (!this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c))
if (!(this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c)))
return;
if (!this_cpu_has(X86_FEATURE_CLFLSH))
return;

View file

@ -993,6 +993,7 @@ static void lguest_time_irq(unsigned int irq, struct irq_desc *desc)
static void lguest_time_init(void)
{
/* Set up the timer interrupt (0) to go to our simple timer routine */
lguest_setup_irq(0);
irq_set_handler(0, lguest_time_irq);
clocksource_register_hz(&lguest_clock, NSEC_PER_SEC);

View file

@ -321,7 +321,6 @@ static void pcd_init_units(void)
strcpy(disk->disk_name, cd->name); /* umm... */
disk->fops = &pcd_bdops;
disk->flags = GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
disk->events = DISK_EVENT_MEDIA_CHANGE;
}
}

View file

@ -6,10 +6,13 @@
#include <linux/virtio.h>
#include <linux/virtio_blk.h>
#include <linux/scatterlist.h>
#include <linux/string_helpers.h>
#include <scsi/scsi_cmnd.h>
#define PART_BITS 4
static int major, index;
struct workqueue_struct *virtblk_wq;
struct virtio_blk
{
@ -26,6 +29,9 @@ struct virtio_blk
mempool_t *pool;
/* Process context for config space updates */
struct work_struct config_work;
/* What host tells us, plus 2 for header & tailer. */
unsigned int sg_elems;
@ -141,7 +147,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
num = blk_rq_map_sg(q, vbr->req, vblk->sg + out);
if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) {
sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, 96);
sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr,
sizeof(vbr->in_hdr));
}
@ -291,6 +297,46 @@ static ssize_t virtblk_serial_show(struct device *dev,
}
DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
static void virtblk_config_changed_work(struct work_struct *work)
{
struct virtio_blk *vblk =
container_of(work, struct virtio_blk, config_work);
struct virtio_device *vdev = vblk->vdev;
struct request_queue *q = vblk->disk->queue;
char cap_str_2[10], cap_str_10[10];
u64 capacity, size;
/* Host must always specify the capacity. */
vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
&capacity, sizeof(capacity));
/* If capacity is too big, truncate with warning. */
if ((sector_t)capacity != capacity) {
dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
(unsigned long long)capacity);
capacity = (sector_t)-1;
}
size = capacity * queue_logical_block_size(q);
string_get_size(size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
string_get_size(size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
dev_notice(&vdev->dev,
"new size: %llu %d-byte logical blocks (%s/%s)\n",
(unsigned long long)capacity,
queue_logical_block_size(q),
cap_str_10, cap_str_2);
set_capacity(vblk->disk, capacity);
}
static void virtblk_config_changed(struct virtio_device *vdev)
{
struct virtio_blk *vblk = vdev->priv;
queue_work(virtblk_wq, &vblk->config_work);
}
static int __devinit virtblk_probe(struct virtio_device *vdev)
{
struct virtio_blk *vblk;
@ -327,6 +373,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
vblk->vdev = vdev;
vblk->sg_elems = sg_elems;
sg_init_table(vblk->sg, vblk->sg_elems);
INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
/* We expect one virtqueue, for output. */
vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests");
@ -477,6 +524,8 @@ static void __devexit virtblk_remove(struct virtio_device *vdev)
{
struct virtio_blk *vblk = vdev->priv;
flush_work(&vblk->config_work);
/* Nothing should be pending. */
BUG_ON(!list_empty(&vblk->reqs));
@ -508,27 +557,47 @@ static unsigned int features[] = {
* Use __refdata to avoid this warning.
*/
static struct virtio_driver __refdata virtio_blk = {
.feature_table = features,
.feature_table_size = ARRAY_SIZE(features),
.driver.name = KBUILD_MODNAME,
.driver.owner = THIS_MODULE,
.id_table = id_table,
.probe = virtblk_probe,
.remove = __devexit_p(virtblk_remove),
.feature_table = features,
.feature_table_size = ARRAY_SIZE(features),
.driver.name = KBUILD_MODNAME,
.driver.owner = THIS_MODULE,
.id_table = id_table,
.probe = virtblk_probe,
.remove = __devexit_p(virtblk_remove),
.config_changed = virtblk_config_changed,
};
static int __init init(void)
{
int error;
virtblk_wq = alloc_workqueue("virtio-blk", 0, 0);
if (!virtblk_wq)
return -ENOMEM;
major = register_blkdev(0, "virtblk");
if (major < 0)
return major;
return register_virtio_driver(&virtio_blk);
if (major < 0) {
error = major;
goto out_destroy_workqueue;
}
error = register_virtio_driver(&virtio_blk);
if (error)
goto out_unregister_blkdev;
return 0;
out_unregister_blkdev:
unregister_blkdev(major, "virtblk");
out_destroy_workqueue:
destroy_workqueue(virtblk_wq);
return error;
}
static void __exit fini(void)
{
unregister_blkdev(major, "virtblk");
unregister_virtio_driver(&virtio_blk);
destroy_workqueue(virtblk_wq);
}
module_init(init);
module_exit(fini);

View file

@ -627,7 +627,6 @@ static int viocd_probe(struct vio_dev *vdev, const struct vio_device_id *id)
gendisk->fops = &viocd_fops;
gendisk->flags = GENHD_FL_CD | GENHD_FL_REMOVABLE |
GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
gendisk->events = DISK_EVENT_MEDIA_CHANGE;
set_capacity(gendisk, 0);
gendisk->private_data = d;
d->viocd_disk = gendisk;

View file

@ -1677,17 +1677,12 @@ static int __devinit virtcons_probe(struct virtio_device *vdev)
portdev->config.max_nr_ports = 1;
if (virtio_has_feature(vdev, VIRTIO_CONSOLE_F_MULTIPORT)) {
multiport = true;
vdev->features[0] |= 1 << VIRTIO_CONSOLE_F_MULTIPORT;
vdev->config->get(vdev, offsetof(struct virtio_console_config,
max_nr_ports),
&portdev->config.max_nr_ports,
sizeof(portdev->config.max_nr_ports));
}
/* Let the Host know we support multiple ports.*/
vdev->config->finalize_features(vdev);
err = init_vqs(portdev);
if (err < 0) {
dev_err(&vdev->dev, "Error %d initializing vqs\n", err);

View file

@ -1782,7 +1782,6 @@ static int ide_cd_probe(ide_drive_t *drive)
ide_cd_read_toc(drive, &sense);
g->fops = &idecd_ops;
g->flags |= GENHD_FL_REMOVABLE | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
g->events = DISK_EVENT_MEDIA_CHANGE;
add_disk(g);
return 0;

View file

@ -304,7 +304,10 @@ static int check_and_rewind_pc(char *put_str, char *arg)
return 1;
}
/* Readjust the instruction pointer if needed */
instruction_pointer_set(&kgdbts_regs, ip + offset);
ip += offset;
#ifdef GDB_ADJUSTS_BREAK_OFFSET
instruction_pointer_set(&kgdbts_regs, ip);
#endif
return 0;
}

View file

@ -609,7 +609,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
* before it gets out of hand. Naturally, this wastes entries. */
if (capacity < 2+MAX_SKB_FRAGS) {
netif_stop_queue(dev);
if (unlikely(!virtqueue_enable_cb(vi->svq))) {
if (unlikely(!virtqueue_enable_cb_delayed(vi->svq))) {
/* More just got used, free them then recheck. */
capacity += free_old_xmit_skbs(vi);
if (capacity >= 2+MAX_SKB_FRAGS) {

View file

@ -698,12 +698,7 @@ int __init detect_intel_iommu(void)
{
#ifdef CONFIG_INTR_REMAP
struct acpi_table_dmar *dmar;
/*
* for now we will disable dma-remapping when interrupt
* remapping is enabled.
* When support for queued invalidation for IOTLB invalidation
* is added, we will not need this any more.
*/
dmar = (struct acpi_table_dmar *) dmar_tbl;
if (ret && cpu_has_x2apic && dmar->flags & 0x1)
printk(KERN_INFO

View file

@ -47,6 +47,8 @@
#define ROOT_SIZE VTD_PAGE_SIZE
#define CONTEXT_SIZE VTD_PAGE_SIZE
#define IS_BRIDGE_HOST_DEVICE(pdev) \
((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
@ -116,6 +118,11 @@ static inline unsigned long align_to_level(unsigned long pfn, int level)
return (pfn + level_size(level) - 1) & level_mask(level);
}
static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
{
return 1 << ((lvl - 1) * LEVEL_STRIDE);
}
/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
are never going to work. */
static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
@ -142,6 +149,12 @@ static struct intel_iommu **g_iommus;
static void __init check_tylersburg_isoch(void);
static int rwbf_quirk;
/*
* set to 1 to panic kernel if can't successfully enable VT-d
* (used when kernel is launched w/ TXT)
*/
static int force_on = 0;
/*
* 0: Present
* 1-11: Reserved
@ -338,6 +351,9 @@ struct dmar_domain {
int iommu_coherency;/* indicate coherency of iommu access */
int iommu_snooping; /* indicate snooping control feature*/
int iommu_count; /* reference count of iommu */
int iommu_superpage;/* Level of superpages supported:
0 == 4KiB (no superpages), 1 == 2MiB,
2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
spinlock_t iommu_lock; /* protect iommu set in domain */
u64 max_addr; /* maximum mapped address */
};
@ -387,6 +403,7 @@ int dmar_disabled = 1;
static int dmar_map_gfx = 1;
static int dmar_forcedac;
static int intel_iommu_strict;
static int intel_iommu_superpage = 1;
#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
static DEFINE_SPINLOCK(device_domain_lock);
@ -417,6 +434,10 @@ static int __init intel_iommu_setup(char *str)
printk(KERN_INFO
"Intel-IOMMU: disable batched IOTLB flush\n");
intel_iommu_strict = 1;
} else if (!strncmp(str, "sp_off", 6)) {
printk(KERN_INFO
"Intel-IOMMU: disable supported super page\n");
intel_iommu_superpage = 0;
}
str += strcspn(str, ",");
@ -555,11 +576,32 @@ static void domain_update_iommu_snooping(struct dmar_domain *domain)
}
}
static void domain_update_iommu_superpage(struct dmar_domain *domain)
{
int i, mask = 0xf;
if (!intel_iommu_superpage) {
domain->iommu_superpage = 0;
return;
}
domain->iommu_superpage = 4; /* 1TiB */
for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
mask |= cap_super_page_val(g_iommus[i]->cap);
if (!mask) {
break;
}
}
domain->iommu_superpage = fls(mask);
}
/* Some capabilities may be different across iommus */
static void domain_update_iommu_cap(struct dmar_domain *domain)
{
domain_update_iommu_coherency(domain);
domain_update_iommu_snooping(domain);
domain_update_iommu_superpage(domain);
}
static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
@ -689,23 +731,31 @@ out:
}
static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
unsigned long pfn)
unsigned long pfn, int large_level)
{
int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
struct dma_pte *parent, *pte = NULL;
int level = agaw_to_level(domain->agaw);
int offset;
int offset, target_level;
BUG_ON(!domain->pgd);
BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
parent = domain->pgd;
/* Search pte */
if (!large_level)
target_level = 1;
else
target_level = large_level;
while (level > 0) {
void *tmp_page;
offset = pfn_level_offset(pfn, level);
pte = &parent[offset];
if (level == 1)
if (!large_level && (pte->val & DMA_PTE_LARGE_PAGE))
break;
if (level == target_level)
break;
if (!dma_pte_present(pte)) {
@ -733,10 +783,11 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
return pte;
}
/* return address's pte at specific level */
static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
unsigned long pfn,
int level)
int level, int *large_page)
{
struct dma_pte *parent, *pte = NULL;
int total = agaw_to_level(domain->agaw);
@ -749,8 +800,16 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
if (level == total)
return pte;
if (!dma_pte_present(pte))
if (!dma_pte_present(pte)) {
*large_page = total;
break;
}
if (pte->val & DMA_PTE_LARGE_PAGE) {
*large_page = total;
return pte;
}
parent = phys_to_virt(dma_pte_addr(pte));
total--;
}
@ -763,6 +822,7 @@ static void dma_pte_clear_range(struct dmar_domain *domain,
unsigned long last_pfn)
{
int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
unsigned int large_page = 1;
struct dma_pte *first_pte, *pte;
BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
@ -771,14 +831,15 @@ static void dma_pte_clear_range(struct dmar_domain *domain,
/* we don't need lock here; nobody else touches the iova range */
do {
first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1);
large_page = 1;
first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
if (!pte) {
start_pfn = align_to_level(start_pfn + 1, 2);
start_pfn = align_to_level(start_pfn + 1, large_page + 1);
continue;
}
do {
do {
dma_clear_pte(pte);
start_pfn++;
start_pfn += lvl_to_nr_pages(large_page);
pte++;
} while (start_pfn <= last_pfn && !first_pte_in_page(pte));
@ -798,6 +859,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
int total = agaw_to_level(domain->agaw);
int level;
unsigned long tmp;
int large_page = 2;
BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
@ -813,7 +875,10 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
return;
do {
first_pte = pte = dma_pfn_level_pte(domain, tmp, level);
large_page = level;
first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
if (large_page > level)
level = large_page + 1;
if (!pte) {
tmp = align_to_level(tmp + 1, level + 1);
continue;
@ -1397,6 +1462,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
else
domain->iommu_snooping = 0;
domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
domain->iommu_count = 1;
domain->nid = iommu->node;
@ -1417,6 +1483,10 @@ static void domain_exit(struct dmar_domain *domain)
if (!domain)
return;
/* Flush any lazy unmaps that may reference this domain */
if (!intel_iommu_strict)
flush_unmaps_timeout(0);
domain_remove_dev_info(domain);
/* destroy iovas */
put_iova_domain(&domain->iovad);
@ -1648,6 +1718,34 @@ static inline unsigned long aligned_nrpages(unsigned long host_addr,
return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
}
/* Return largest possible superpage level for a given mapping */
static inline int hardware_largepage_caps(struct dmar_domain *domain,
unsigned long iov_pfn,
unsigned long phy_pfn,
unsigned long pages)
{
int support, level = 1;
unsigned long pfnmerge;
support = domain->iommu_superpage;
/* To use a large page, the virtual *and* physical addresses
must be aligned to 2MiB/1GiB/etc. Lower bits set in either
of them will mean we have to use smaller pages. So just
merge them and check both at once. */
pfnmerge = iov_pfn | phy_pfn;
while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
pages >>= VTD_STRIDE_SHIFT;
if (!pages)
break;
pfnmerge >>= VTD_STRIDE_SHIFT;
level++;
support--;
}
return level;
}
static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
struct scatterlist *sg, unsigned long phys_pfn,
unsigned long nr_pages, int prot)
@ -1656,6 +1754,8 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
phys_addr_t uninitialized_var(pteval);
int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
unsigned long sg_res;
unsigned int largepage_lvl = 0;
unsigned long lvl_pages = 0;
BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
@ -1671,7 +1771,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
}
while (nr_pages--) {
while (nr_pages > 0) {
uint64_t tmp;
if (!sg_res) {
@ -1679,11 +1779,21 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
sg->dma_length = sg->length;
pteval = page_to_phys(sg_page(sg)) | prot;
phys_pfn = pteval >> VTD_PAGE_SHIFT;
}
if (!pte) {
first_pte = pte = pfn_to_dma_pte(domain, iov_pfn);
largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
if (!pte)
return -ENOMEM;
/* It is large page*/
if (largepage_lvl > 1)
pteval |= DMA_PTE_LARGE_PAGE;
else
pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
}
/* We don't need lock here, nobody else
* touches the iova range
@ -1699,16 +1809,38 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
}
WARN_ON(1);
}
lvl_pages = lvl_to_nr_pages(largepage_lvl);
BUG_ON(nr_pages < lvl_pages);
BUG_ON(sg_res < lvl_pages);
nr_pages -= lvl_pages;
iov_pfn += lvl_pages;
phys_pfn += lvl_pages;
pteval += lvl_pages * VTD_PAGE_SIZE;
sg_res -= lvl_pages;
/* If the next PTE would be the first in a new page, then we
need to flush the cache on the entries we've just written.
And then we'll need to recalculate 'pte', so clear it and
let it get set again in the if (!pte) block above.
If we're done (!nr_pages) we need to flush the cache too.
Also if we've been setting superpages, we may need to
recalculate 'pte' and switch back to smaller pages for the
end of the mapping, if the trailing size is not enough to
use another superpage (i.e. sg_res < lvl_pages). */
pte++;
if (!nr_pages || first_pte_in_page(pte)) {
if (!nr_pages || first_pte_in_page(pte) ||
(largepage_lvl > 1 && sg_res < lvl_pages)) {
domain_flush_cache(domain, first_pte,
(void *)pte - (void *)first_pte);
pte = NULL;
}
iov_pfn++;
pteval += VTD_PAGE_SIZE;
sg_res--;
if (!sg_res)
if (!sg_res && nr_pages)
sg = sg_next(sg);
}
return 0;
@ -2016,7 +2148,7 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
return 0;
return iommu_prepare_identity_map(pdev, rmrr->base_address,
rmrr->end_address + 1);
rmrr->end_address);
}
#ifdef CONFIG_DMAR_FLOPPY_WA
@ -2030,7 +2162,7 @@ static inline void iommu_prepare_isa(void)
return;
printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
if (ret)
printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
@ -2106,10 +2238,10 @@ static int identity_mapping(struct pci_dev *pdev)
if (likely(!iommu_identity_mapping))
return 0;
info = pdev->dev.archdata.iommu;
if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
return (info->domain == si_domain);
list_for_each_entry(info, &si_domain->devices, link)
if (info->dev == pdev)
return 1;
return 0;
}
@ -2187,8 +2319,19 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
* Assume that they will -- if they turn out not to be, then we can
* take them out of the 1:1 domain later.
*/
if (!startup)
return pdev->dma_mask > DMA_BIT_MASK(32);
if (!startup) {
/*
* If the device's dma_mask is less than the system's memory
* size then this is not a candidate for identity mapping.
*/
u64 dma_mask = pdev->dma_mask;
if (pdev->dev.coherent_dma_mask &&
pdev->dev.coherent_dma_mask < dma_mask)
dma_mask = pdev->dev.coherent_dma_mask;
return dma_mask >= dma_get_required_mask(&pdev->dev);
}
return 1;
}
@ -2203,6 +2346,9 @@ static int __init iommu_prepare_static_identity_mapping(int hw)
return -EFAULT;
for_each_pci_dev(pdev) {
/* Skip Host/PCI Bridge devices */
if (IS_BRIDGE_HOST_DEVICE(pdev))
continue;
if (iommu_should_identity_map(pdev, 1)) {
printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
hw ? "hardware" : "software", pci_name(pdev));
@ -2218,7 +2364,7 @@ static int __init iommu_prepare_static_identity_mapping(int hw)
return 0;
}
static int __init init_dmars(int force_on)
static int __init init_dmars(void)
{
struct dmar_drhd_unit *drhd;
struct dmar_rmrr_unit *rmrr;
@ -2592,8 +2738,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
iommu = domain_get_iommu(domain);
size = aligned_nrpages(paddr, size);
iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
pdev->dma_mask);
iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
if (!iova)
goto error;
@ -3118,7 +3263,17 @@ static int init_iommu_hw(void)
if (iommu->qi)
dmar_reenable_qi(iommu);
for_each_active_iommu(iommu, drhd) {
for_each_iommu(iommu, drhd) {
if (drhd->ignored) {
/*
* we always have to disable PMRs or DMA may fail on
* this device
*/
if (force_on)
iommu_disable_protect_mem_regions(iommu);
continue;
}
iommu_flush_write_buffer(iommu);
iommu_set_root_entry(iommu);
@ -3127,7 +3282,8 @@ static int init_iommu_hw(void)
DMA_CCMD_GLOBAL_INVL);
iommu->flush.flush_iotlb(iommu, 0, 0, 0,
DMA_TLB_GLOBAL_FLUSH);
iommu_enable_translation(iommu);
if (iommu_enable_translation(iommu))
return 1;
iommu_disable_protect_mem_regions(iommu);
}
@ -3194,7 +3350,10 @@ static void iommu_resume(void)
unsigned long flag;
if (init_iommu_hw()) {
WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
if (force_on)
panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
else
WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
return;
}
@ -3271,7 +3430,6 @@ static struct notifier_block device_nb = {
int __init intel_iommu_init(void)
{
int ret = 0;
int force_on = 0;
/* VT-d is required for a TXT/tboot launch, so enforce that */
force_on = tboot_force_iommu();
@ -3309,7 +3467,7 @@ int __init intel_iommu_init(void)
init_no_remapping_devices();
ret = init_dmars(force_on);
ret = init_dmars();
if (ret) {
if (force_on)
panic("tboot: Failed to initialize DMARs\n");
@ -3380,8 +3538,8 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
spin_lock_irqsave(&device_domain_lock, flags);
list_for_each_safe(entry, tmp, &domain->devices) {
info = list_entry(entry, struct device_domain_info, link);
/* No need to compare PCI domain; it has to be the same */
if (info->bus == pdev->bus->number &&
if (info->segment == pci_domain_nr(pdev->bus) &&
info->bus == pdev->bus->number &&
info->devfn == pdev->devfn) {
list_del(&info->link);
list_del(&info->global);
@ -3419,10 +3577,13 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
domain_update_iommu_cap(domain);
spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
spin_lock_irqsave(&iommu->lock, tmp_flags);
clear_bit(domain->id, iommu->domain_ids);
iommu->domains[domain->id] = NULL;
spin_unlock_irqrestore(&iommu->lock, tmp_flags);
if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
!(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
spin_lock_irqsave(&iommu->lock, tmp_flags);
clear_bit(domain->id, iommu->domain_ids);
iommu->domains[domain->id] = NULL;
spin_unlock_irqrestore(&iommu->lock, tmp_flags);
}
}
spin_unlock_irqrestore(&device_domain_lock, flags);
@ -3505,6 +3666,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
domain->iommu_count = 0;
domain->iommu_coherency = 0;
domain->iommu_snooping = 0;
domain->iommu_superpage = 0;
domain->max_addr = 0;
domain->nid = -1;
@ -3720,7 +3882,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
struct dma_pte *pte;
u64 phys = 0;
pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT);
pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
if (pte)
phys = dma_pte_addr(pte);

View file

@ -63,8 +63,16 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
curr = iovad->cached32_node;
cached_iova = container_of(curr, struct iova, node);
if (free->pfn_lo >= cached_iova->pfn_lo)
iovad->cached32_node = rb_next(&free->node);
if (free->pfn_lo >= cached_iova->pfn_lo) {
struct rb_node *node = rb_next(&free->node);
struct iova *iova = container_of(node, struct iova, node);
/* only cache if it's below 32bit pfn */
if (node && iova->pfn_lo < iovad->dma_32bit_pfn)
iovad->cached32_node = node;
else
iovad->cached32_node = NULL;
}
}
/* Computes the padding size required, to make the

View file

@ -144,7 +144,7 @@ static void handle_tx(struct vhost_net *net)
}
mutex_lock(&vq->mutex);
vhost_disable_notify(vq);
vhost_disable_notify(&net->dev, vq);
if (wmem < sock->sk->sk_sndbuf / 2)
tx_poll_stop(net);
@ -166,8 +166,8 @@ static void handle_tx(struct vhost_net *net)
set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
break;
}
if (unlikely(vhost_enable_notify(vq))) {
vhost_disable_notify(vq);
if (unlikely(vhost_enable_notify(&net->dev, vq))) {
vhost_disable_notify(&net->dev, vq);
continue;
}
break;
@ -315,7 +315,7 @@ static void handle_rx(struct vhost_net *net)
return;
mutex_lock(&vq->mutex);
vhost_disable_notify(vq);
vhost_disable_notify(&net->dev, vq);
vhost_hlen = vq->vhost_hlen;
sock_hlen = vq->sock_hlen;
@ -334,10 +334,10 @@ static void handle_rx(struct vhost_net *net)
break;
/* OK, now we need to know about added descriptors. */
if (!headcount) {
if (unlikely(vhost_enable_notify(vq))) {
if (unlikely(vhost_enable_notify(&net->dev, vq))) {
/* They have slipped one in as we were
* doing that: check again. */
vhost_disable_notify(vq);
vhost_disable_notify(&net->dev, vq);
continue;
}
/* Nothing new? Wait for eventfd to tell us

View file

@ -49,7 +49,7 @@ static void handle_vq(struct vhost_test *n)
return;
mutex_lock(&vq->mutex);
vhost_disable_notify(vq);
vhost_disable_notify(&n->dev, vq);
for (;;) {
head = vhost_get_vq_desc(&n->dev, vq, vq->iov,
@ -61,8 +61,8 @@ static void handle_vq(struct vhost_test *n)
break;
/* Nothing new? Wait for eventfd to tell us they refilled. */
if (head == vq->num) {
if (unlikely(vhost_enable_notify(vq))) {
vhost_disable_notify(vq);
if (unlikely(vhost_enable_notify(&n->dev, vq))) {
vhost_disable_notify(&n->dev, vq);
continue;
}
break;

View file

@ -37,6 +37,9 @@ enum {
VHOST_MEMORY_F_LOG = 0x1,
};
#define vhost_used_event(vq) ((u16 __user *)&vq->avail->ring[vq->num])
#define vhost_avail_event(vq) ((u16 __user *)&vq->used->ring[vq->num])
static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh,
poll_table *pt)
{
@ -161,6 +164,8 @@ static void vhost_vq_reset(struct vhost_dev *dev,
vq->last_avail_idx = 0;
vq->avail_idx = 0;
vq->last_used_idx = 0;
vq->signalled_used = 0;
vq->signalled_used_valid = false;
vq->used_flags = 0;
vq->log_used = false;
vq->log_addr = -1ull;
@ -489,16 +494,17 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem,
return 1;
}
static int vq_access_ok(unsigned int num,
static int vq_access_ok(struct vhost_dev *d, unsigned int num,
struct vring_desc __user *desc,
struct vring_avail __user *avail,
struct vring_used __user *used)
{
size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
return access_ok(VERIFY_READ, desc, num * sizeof *desc) &&
access_ok(VERIFY_READ, avail,
sizeof *avail + num * sizeof *avail->ring) &&
sizeof *avail + num * sizeof *avail->ring + s) &&
access_ok(VERIFY_WRITE, used,
sizeof *used + num * sizeof *used->ring);
sizeof *used + num * sizeof *used->ring + s);
}
/* Can we log writes? */
@ -514,9 +520,11 @@ int vhost_log_access_ok(struct vhost_dev *dev)
/* Verify access for write logging. */
/* Caller should have vq mutex and device mutex */
static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base)
static int vq_log_access_ok(struct vhost_dev *d, struct vhost_virtqueue *vq,
void __user *log_base)
{
struct vhost_memory *mp;
size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
mp = rcu_dereference_protected(vq->dev->memory,
lockdep_is_held(&vq->mutex));
@ -524,15 +532,15 @@ static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base)
vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) &&
(!vq->log_used || log_access_ok(log_base, vq->log_addr,
sizeof *vq->used +
vq->num * sizeof *vq->used->ring));
vq->num * sizeof *vq->used->ring + s));
}
/* Can we start vq? */
/* Caller should have vq mutex and device mutex */
int vhost_vq_access_ok(struct vhost_virtqueue *vq)
{
return vq_access_ok(vq->num, vq->desc, vq->avail, vq->used) &&
vq_log_access_ok(vq, vq->log_base);
return vq_access_ok(vq->dev, vq->num, vq->desc, vq->avail, vq->used) &&
vq_log_access_ok(vq->dev, vq, vq->log_base);
}
static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
@ -577,6 +585,7 @@ static int init_used(struct vhost_virtqueue *vq,
if (r)
return r;
vq->signalled_used_valid = false;
return get_user(vq->last_used_idx, &used->idx);
}
@ -674,7 +683,7 @@ static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp)
* If it is not, we don't as size might not have been setup.
* We will verify when backend is configured. */
if (vq->private_data) {
if (!vq_access_ok(vq->num,
if (!vq_access_ok(d, vq->num,
(void __user *)(unsigned long)a.desc_user_addr,
(void __user *)(unsigned long)a.avail_user_addr,
(void __user *)(unsigned long)a.used_user_addr)) {
@ -818,7 +827,7 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, unsigned long arg)
vq = d->vqs + i;
mutex_lock(&vq->mutex);
/* If ring is inactive, will check when it's enabled. */
if (vq->private_data && !vq_log_access_ok(vq, base))
if (vq->private_data && !vq_log_access_ok(d, vq, base))
r = -EFAULT;
else
vq->log_base = base;
@ -1219,6 +1228,10 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
/* On success, increment avail index. */
vq->last_avail_idx++;
/* Assume notifications from guest are disabled at this point,
* if they aren't we would need to update avail_event index. */
BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY));
return head;
}
@ -1267,6 +1280,12 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
eventfd_signal(vq->log_ctx, 1);
}
vq->last_used_idx++;
/* If the driver never bothers to signal in a very long while,
* used index might wrap around. If that happens, invalidate
* signalled_used index we stored. TODO: make sure driver
* signals at least once in 2^16 and remove this. */
if (unlikely(vq->last_used_idx == vq->signalled_used))
vq->signalled_used_valid = false;
return 0;
}
@ -1275,6 +1294,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
unsigned count)
{
struct vring_used_elem __user *used;
u16 old, new;
int start;
start = vq->last_used_idx % vq->num;
@ -1292,7 +1312,14 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
((void __user *)used - (void __user *)vq->used),
count * sizeof *used);
}
vq->last_used_idx += count;
old = vq->last_used_idx;
new = (vq->last_used_idx += count);
/* If the driver never bothers to signal in a very long while,
* used index might wrap around. If that happens, invalidate
* signalled_used index we stored. TODO: make sure driver
* signals at least once in 2^16 and remove this. */
if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old)))
vq->signalled_used_valid = false;
return 0;
}
@ -1331,29 +1358,47 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
return r;
}
/* This actually signals the guest, using eventfd. */
void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
{
__u16 flags;
__u16 old, new, event;
bool v;
/* Flush out used index updates. This is paired
* with the barrier that the Guest executes when enabling
* interrupts. */
smp_mb();
if (__get_user(flags, &vq->avail->flags)) {
vq_err(vq, "Failed to get flags");
return;
if (vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
unlikely(vq->avail_idx == vq->last_avail_idx))
return true;
if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
__u16 flags;
if (__get_user(flags, &vq->avail->flags)) {
vq_err(vq, "Failed to get flags");
return true;
}
return !(flags & VRING_AVAIL_F_NO_INTERRUPT);
}
old = vq->signalled_used;
v = vq->signalled_used_valid;
new = vq->signalled_used = vq->last_used_idx;
vq->signalled_used_valid = true;
/* If they don't want an interrupt, don't signal, unless empty. */
if ((flags & VRING_AVAIL_F_NO_INTERRUPT) &&
(vq->avail_idx != vq->last_avail_idx ||
!vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY)))
return;
if (unlikely(!v))
return true;
if (get_user(event, vhost_used_event(vq))) {
vq_err(vq, "Failed to get used event idx");
return true;
}
return vring_need_event(event, new, old);
}
/* This actually signals the guest, using eventfd. */
void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
{
/* Signal the Guest tell them we used something up. */
if (vq->call_ctx)
if (vq->call_ctx && vhost_notify(dev, vq))
eventfd_signal(vq->call_ctx, 1);
}
@ -1376,7 +1421,7 @@ void vhost_add_used_and_signal_n(struct vhost_dev *dev,
}
/* OK, now we need to know about added descriptors. */
bool vhost_enable_notify(struct vhost_virtqueue *vq)
bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
{
u16 avail_idx;
int r;
@ -1384,11 +1429,34 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq)
if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY))
return false;
vq->used_flags &= ~VRING_USED_F_NO_NOTIFY;
r = put_user(vq->used_flags, &vq->used->flags);
if (r) {
vq_err(vq, "Failed to enable notification at %p: %d\n",
&vq->used->flags, r);
return false;
if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
r = put_user(vq->used_flags, &vq->used->flags);
if (r) {
vq_err(vq, "Failed to enable notification at %p: %d\n",
&vq->used->flags, r);
return false;
}
} else {
r = put_user(vq->avail_idx, vhost_avail_event(vq));
if (r) {
vq_err(vq, "Failed to update avail event index at %p: %d\n",
vhost_avail_event(vq), r);
return false;
}
}
if (unlikely(vq->log_used)) {
void __user *used;
/* Make sure data is seen before log. */
smp_wmb();
used = vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX) ?
&vq->used->flags : vhost_avail_event(vq);
/* Log used flags or event index entry write. Both are 16 bit
* fields. */
log_write(vq->log_base, vq->log_addr +
(used - (void __user *)vq->used),
sizeof(u16));
if (vq->log_ctx)
eventfd_signal(vq->log_ctx, 1);
}
/* They could have slipped one in as we were doing that: make
* sure it's written, then check again. */
@ -1404,15 +1472,17 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq)
}
/* We don't need to be notified again. */
void vhost_disable_notify(struct vhost_virtqueue *vq)
void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
{
int r;
if (vq->used_flags & VRING_USED_F_NO_NOTIFY)
return;
vq->used_flags |= VRING_USED_F_NO_NOTIFY;
r = put_user(vq->used_flags, &vq->used->flags);
if (r)
vq_err(vq, "Failed to enable notification at %p: %d\n",
&vq->used->flags, r);
if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
r = put_user(vq->used_flags, &vq->used->flags);
if (r)
vq_err(vq, "Failed to enable notification at %p: %d\n",
&vq->used->flags, r);
}
}

View file

@ -84,6 +84,12 @@ struct vhost_virtqueue {
/* Used flags */
u16 used_flags;
/* Last used index value we have signalled on */
u16 signalled_used;
/* Last used index value we have signalled on */
bool signalled_used_valid;
/* Log writes to used structure. */
bool log_used;
u64 log_addr;
@ -149,8 +155,8 @@ void vhost_add_used_and_signal(struct vhost_dev *, struct vhost_virtqueue *,
void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *,
struct vring_used_elem *heads, unsigned count);
void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *);
void vhost_disable_notify(struct vhost_virtqueue *);
bool vhost_enable_notify(struct vhost_virtqueue *);
void vhost_disable_notify(struct vhost_dev *, struct vhost_virtqueue *);
bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *);
int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
unsigned int log_num, u64 len);
@ -162,11 +168,12 @@ int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
} while (0)
enum {
VHOST_FEATURES = (1 << VIRTIO_F_NOTIFY_ON_EMPTY) |
(1 << VIRTIO_RING_F_INDIRECT_DESC) |
(1 << VHOST_F_LOG_ALL) |
(1 << VHOST_NET_F_VIRTIO_NET_HDR) |
(1 << VIRTIO_NET_F_MRG_RXBUF),
VHOST_FEATURES = (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) |
(1ULL << VIRTIO_RING_F_INDIRECT_DESC) |
(1ULL << VIRTIO_RING_F_EVENT_IDX) |
(1ULL << VHOST_F_LOG_ALL) |
(1ULL << VHOST_NET_F_VIRTIO_NET_HDR) |
(1ULL << VIRTIO_NET_F_MRG_RXBUF),
};
static inline int vhost_has_feature(struct vhost_dev *dev, int bit)

View file

@ -40,9 +40,6 @@ struct virtio_balloon
/* Waiting for host to ack the pages we released. */
struct completion acked;
/* Do we have to tell Host *before* we reuse pages? */
bool tell_host_first;
/* The pages we've told the Host we're not using. */
unsigned int num_pages;
struct list_head pages;
@ -151,13 +148,14 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num)
vb->num_pages--;
}
if (vb->tell_host_first) {
tell_host(vb, vb->deflate_vq);
release_pages_by_pfn(vb->pfns, vb->num_pfns);
} else {
release_pages_by_pfn(vb->pfns, vb->num_pfns);
tell_host(vb, vb->deflate_vq);
}
/*
* Note that if
* virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST);
* is true, we *have* to do it in this order
*/
tell_host(vb, vb->deflate_vq);
release_pages_by_pfn(vb->pfns, vb->num_pfns);
}
static inline void update_stat(struct virtio_balloon *vb, int idx,
@ -325,9 +323,6 @@ static int virtballoon_probe(struct virtio_device *vdev)
goto out_del_vqs;
}
vb->tell_host_first
= virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST);
return 0;
out_del_vqs:

View file

@ -82,6 +82,9 @@ struct vring_virtqueue
/* Host supports indirect buffers */
bool indirect;
/* Host publishes avail event idx */
bool event;
/* Number of free buffers */
unsigned int num_free;
/* Head of free buffer list. */
@ -237,18 +240,22 @@ EXPORT_SYMBOL_GPL(virtqueue_add_buf_gfp);
void virtqueue_kick(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
u16 new, old;
START_USE(vq);
/* Descriptors and available array need to be set before we expose the
* new available array entries. */
virtio_wmb();
vq->vring.avail->idx += vq->num_added;
old = vq->vring.avail->idx;
new = vq->vring.avail->idx = old + vq->num_added;
vq->num_added = 0;
/* Need to update avail index before checking if we should notify */
virtio_mb();
if (!(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY))
if (vq->event ?
vring_need_event(vring_avail_event(&vq->vring), new, old) :
!(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY))
/* Prod other side to tell it about changes. */
vq->notify(&vq->vq);
@ -324,6 +331,14 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
ret = vq->data[i];
detach_buf(vq, i);
vq->last_used_idx++;
/* If we expect an interrupt for the next entry, tell host
* by writing event index and flush out the write before
* the read in the next get_buf call. */
if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
vring_used_event(&vq->vring) = vq->last_used_idx;
virtio_mb();
}
END_USE(vq);
return ret;
}
@ -345,7 +360,11 @@ bool virtqueue_enable_cb(struct virtqueue *_vq)
/* We optimistically turn back on interrupts, then check if there was
* more to do. */
/* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
* either clear the flags bit or point the event index at the next
* entry. Always do both to keep code simple. */
vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
vring_used_event(&vq->vring) = vq->last_used_idx;
virtio_mb();
if (unlikely(more_used(vq))) {
END_USE(vq);
@ -357,6 +376,33 @@ bool virtqueue_enable_cb(struct virtqueue *_vq)
}
EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
u16 bufs;
START_USE(vq);
/* We optimistically turn back on interrupts, then check if there was
* more to do. */
/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
* either clear the flags bit or point the event index at the next
* entry. Always do both to keep code simple. */
vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
/* TODO: tune this threshold */
bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4;
vring_used_event(&vq->vring) = vq->last_used_idx + bufs;
virtio_mb();
if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) {
END_USE(vq);
return false;
}
END_USE(vq);
return true;
}
EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
@ -438,6 +484,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
#endif
vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);
vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
/* No callback? Tell other side not to bother us. */
if (!callback)
@ -472,6 +519,8 @@ void vring_transport_features(struct virtio_device *vdev)
switch (i) {
case VIRTIO_RING_F_INDIRECT_DESC:
break;
case VIRTIO_RING_F_EVENT_IDX:
break;
default:
/* We don't understand this bit. */
clear_bit(i, vdev->features);

View file

@ -583,8 +583,6 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN))
return -EACCES;
dentry_unhash(dentry);
if (atomic_dec_and_test(&ino->count)) {
p_ino = autofs4_dentry_ino(dentry->d_parent);
if (p_ino && dentry->d_parent != dentry)

View file

@ -2579,6 +2579,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
if (error)
goto out;
shrink_dcache_parent(dentry);
error = dir->i_op->rmdir(dir, dentry);
if (error)
goto out;
@ -2993,6 +2994,8 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry))
goto out;
if (target)
shrink_dcache_parent(new_dentry);
error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
if (error)
goto out;

View file

@ -9,8 +9,12 @@
#define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT)
#define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK)
#define VTD_STRIDE_SHIFT (9)
#define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT)
#define DMA_PTE_READ (1)
#define DMA_PTE_WRITE (2)
#define DMA_PTE_LARGE_PAGE (1 << 7)
#define DMA_PTE_SNP (1 << 11)
#define CONTEXT_TT_MULTI_LEVEL 0

View file

@ -19,6 +19,7 @@
#include <linux/mtd/partitions.h>
struct map_info;
struct platform_device;
struct physmap_flash_data {
unsigned int width;

View file

@ -51,6 +51,13 @@ struct virtqueue {
* This re-enables callbacks; it returns "false" if there are pending
* buffers in the queue, to detect a possible race between the driver
* checking for more work, and enabling callbacks.
* virtqueue_enable_cb_delayed: restart callbacks after disable_cb.
* vq: the struct virtqueue we're talking about.
* This re-enables callbacks but hints to the other side to delay
* interrupts until most of the available buffers have been processed;
* it returns "false" if there are many pending buffers in the queue,
* to detect a possible race between the driver checking for more work,
* and enabling callbacks.
* virtqueue_detach_unused_buf: detach first unused buffer
* vq: the struct virtqueue we're talking about.
* Returns NULL or the "data" token handed to add_buf
@ -86,6 +93,8 @@ void virtqueue_disable_cb(struct virtqueue *vq);
bool virtqueue_enable_cb(struct virtqueue *vq);
bool virtqueue_enable_cb_delayed(struct virtqueue *vq);
void *virtqueue_detach_unused_buf(struct virtqueue *vq);
/**

View file

@ -1,7 +1,30 @@
#ifndef _LINUX_VIRTIO_9P_H
#define _LINUX_VIRTIO_9P_H
/* This header is BSD licensed so anyone can use the definitions to implement
* compatible drivers/servers. */
* compatible drivers/servers.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of IBM nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE. */
#include <linux/types.h>
#include <linux/virtio_ids.h>
#include <linux/virtio_config.h>

View file

@ -1,7 +1,30 @@
#ifndef _LINUX_VIRTIO_BALLOON_H
#define _LINUX_VIRTIO_BALLOON_H
/* This header is BSD licensed so anyone can use the definitions to implement
* compatible drivers/servers. */
* compatible drivers/servers.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of IBM nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE. */
#include <linux/virtio_ids.h>
#include <linux/virtio_config.h>

View file

@ -1,7 +1,30 @@
#ifndef _LINUX_VIRTIO_BLK_H
#define _LINUX_VIRTIO_BLK_H
/* This header is BSD licensed so anyone can use the definitions to implement
* compatible drivers/servers. */
* compatible drivers/servers.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of IBM nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE. */
#include <linux/types.h>
#include <linux/virtio_ids.h>
#include <linux/virtio_config.h>

View file

@ -1,7 +1,30 @@
#ifndef _LINUX_VIRTIO_CONFIG_H
#define _LINUX_VIRTIO_CONFIG_H
/* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
* anyone can use the definitions to implement compatible drivers/servers. */
* anyone can use the definitions to implement compatible drivers/servers.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of IBM nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE. */
/* Virtio devices use a standardized configuration space to define their
* features and pass configuration information, but each implementation can

View file

@ -5,7 +5,31 @@
#include <linux/virtio_config.h>
/*
* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
* anyone can use the definitions to implement compatible drivers/servers.
* anyone can use the definitions to implement compatible drivers/servers:
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of IBM nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* Copyright (C) Red Hat, Inc., 2009, 2010, 2011
* Copyright (C) Amit Shah <amit.shah@redhat.com>, 2009, 2010, 2011

View file

@ -5,7 +5,29 @@
*
* This header is BSD licensed so anyone can use the definitions to implement
* compatible drivers/servers.
*/
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of IBM nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE. */
#define VIRTIO_ID_NET 1 /* virtio net */
#define VIRTIO_ID_BLOCK 2 /* virtio block */

View file

@ -1,7 +1,30 @@
#ifndef _LINUX_VIRTIO_NET_H
#define _LINUX_VIRTIO_NET_H
/* This header is BSD licensed so anyone can use the definitions to implement
* compatible drivers/servers. */
* compatible drivers/servers.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of IBM nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE. */
#include <linux/types.h>
#include <linux/virtio_ids.h>
#include <linux/virtio_config.h>

View file

@ -11,6 +11,29 @@
*
* This header is BSD licensed so anyone can use the definitions to implement
* compatible drivers/servers.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of IBM nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef _LINUX_VIRTIO_PCI_H

View file

@ -7,6 +7,29 @@
* This header is BSD licensed so anyone can use the definitions to implement
* compatible drivers/servers.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of IBM nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* Copyright Rusty Russell IBM Corporation 2007. */
#include <linux/types.h>
@ -29,6 +52,12 @@
/* We support indirect buffer descriptors */
#define VIRTIO_RING_F_INDIRECT_DESC 28
/* The Guest publishes the used index for which it expects an interrupt
* at the end of the avail ring. Host should ignore the avail->flags field. */
/* The Host publishes the avail index for which it expects a kick
* at the end of the used ring. Guest should ignore the used->flags field. */
#define VIRTIO_RING_F_EVENT_IDX 29
/* Virtio ring descriptors: 16 bytes. These can chain together via "next". */
struct vring_desc {
/* Address (guest-physical). */
@ -83,6 +112,7 @@ struct vring {
* __u16 avail_flags;
* __u16 avail_idx;
* __u16 available[num];
* __u16 used_event_idx;
*
* // Padding to the next align boundary.
* char pad[];
@ -91,8 +121,14 @@ struct vring {
* __u16 used_flags;
* __u16 used_idx;
* struct vring_used_elem used[num];
* __u16 avail_event_idx;
* };
*/
/* We publish the used event index at the end of the available ring, and vice
* versa. They are at the end for backwards compatibility. */
#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num])
#define vring_avail_event(vr) (*(__u16 *)&(vr)->used->ring[(vr)->num])
static inline void vring_init(struct vring *vr, unsigned int num, void *p,
unsigned long align)
{
@ -107,7 +143,21 @@ static inline unsigned vring_size(unsigned int num, unsigned long align)
{
return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num)
+ align - 1) & ~(align - 1))
+ sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num;
+ sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num;
}
/* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */
/* Assuming a given event_idx value from the other size, if
* we have just incremented index from old to new_idx,
* should we trigger an event? */
static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old)
{
/* Note: Xen has similar logic for notification hold-off
* in include/xen/interface/io/ring.h with req_event and req_prod
* corresponding to event_idx + 1 and new_idx respectively.
* Note also that req_event and req_prod in Xen start at 1,
* event indexes in virtio start at 0. */
return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old);
}
#ifdef __KERNEL__

View file

@ -7388,26 +7388,12 @@ static int __perf_cgroup_move(void *info)
return 0;
}
static void perf_cgroup_move(struct task_struct *task)
static void
perf_cgroup_attach_task(struct cgroup *cgrp, struct task_struct *task)
{
task_function_call(task, __perf_cgroup_move, task);
}
static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
struct cgroup *old_cgrp, struct task_struct *task,
bool threadgroup)
{
perf_cgroup_move(task);
if (threadgroup) {
struct task_struct *c;
rcu_read_lock();
list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
perf_cgroup_move(c);
}
rcu_read_unlock();
}
}
static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
struct cgroup *old_cgrp, struct task_struct *task)
{
@ -7419,7 +7405,7 @@ static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
if (!(task->flags & PF_EXITING))
return;
perf_cgroup_move(task);
perf_cgroup_attach_task(cgrp, task);
}
struct cgroup_subsys perf_subsys = {
@ -7428,6 +7414,6 @@ struct cgroup_subsys perf_subsys = {
.create = perf_cgroup_create,
.destroy = perf_cgroup_destroy,
.exit = perf_cgroup_exit,
.attach = perf_cgroup_attach,
.attach_task = perf_cgroup_attach_task,
};
#endif /* CONFIG_CGROUP_PERF */

View file

@ -1648,7 +1648,6 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu)
if (IS_ERR(t))
return PTR_ERR(t);
kthread_bind(t, cpu);
set_task_state(t, TASK_INTERRUPTIBLE);
per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL);
per_cpu(rcu_cpu_kthread_task, cpu) = t;
@ -1756,7 +1755,6 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp,
if (IS_ERR(t))
return PTR_ERR(t);
raw_spin_lock_irqsave(&rnp->lock, flags);
set_task_state(t, TASK_INTERRUPTIBLE);
rnp->node_kthread_task = t;
raw_spin_unlock_irqrestore(&rnp->lock, flags);
sp.sched_priority = 99;
@ -1765,6 +1763,8 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp,
return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index);
}
static void rcu_wake_one_boost_kthread(struct rcu_node *rnp);
/*
* Spawn all kthreads -- called as soon as the scheduler is running.
*/
@ -1772,18 +1772,30 @@ static int __init rcu_spawn_kthreads(void)
{
int cpu;
struct rcu_node *rnp;
struct task_struct *t;
rcu_kthreads_spawnable = 1;
for_each_possible_cpu(cpu) {
per_cpu(rcu_cpu_has_work, cpu) = 0;
if (cpu_online(cpu))
if (cpu_online(cpu)) {
(void)rcu_spawn_one_cpu_kthread(cpu);
t = per_cpu(rcu_cpu_kthread_task, cpu);
if (t)
wake_up_process(t);
}
}
rnp = rcu_get_root(rcu_state);
(void)rcu_spawn_one_node_kthread(rcu_state, rnp);
if (rnp->node_kthread_task)
wake_up_process(rnp->node_kthread_task);
if (NUM_RCU_NODES > 1) {
rcu_for_each_leaf_node(rcu_state, rnp)
rcu_for_each_leaf_node(rcu_state, rnp) {
(void)rcu_spawn_one_node_kthread(rcu_state, rnp);
t = rnp->node_kthread_task;
if (t)
wake_up_process(t);
rcu_wake_one_boost_kthread(rnp);
}
}
return 0;
}
@ -2188,14 +2200,14 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
}
static void __cpuinit rcu_online_cpu(int cpu)
static void __cpuinit rcu_prepare_cpu(int cpu)
{
rcu_init_percpu_data(cpu, &rcu_sched_state, 0);
rcu_init_percpu_data(cpu, &rcu_bh_state, 0);
rcu_preempt_init_percpu_data(cpu);
}
static void __cpuinit rcu_online_kthreads(int cpu)
static void __cpuinit rcu_prepare_kthreads(int cpu)
{
struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
struct rcu_node *rnp = rdp->mynode;
@ -2208,6 +2220,31 @@ static void __cpuinit rcu_online_kthreads(int cpu)
}
}
/*
* kthread_create() creates threads in TASK_UNINTERRUPTIBLE state,
* but the RCU threads are woken on demand, and if demand is low this
* could be a while triggering the hung task watchdog.
*
* In order to avoid this, poke all tasks once the CPU is fully
* up and running.
*/
static void __cpuinit rcu_online_kthreads(int cpu)
{
struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
struct rcu_node *rnp = rdp->mynode;
struct task_struct *t;
t = per_cpu(rcu_cpu_kthread_task, cpu);
if (t)
wake_up_process(t);
t = rnp->node_kthread_task;
if (t)
wake_up_process(t);
rcu_wake_one_boost_kthread(rnp);
}
/*
* Handle CPU online/offline notification events.
*/
@ -2221,10 +2258,11 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
rcu_online_cpu(cpu);
rcu_online_kthreads(cpu);
rcu_prepare_cpu(cpu);
rcu_prepare_kthreads(cpu);
break;
case CPU_ONLINE:
rcu_online_kthreads(cpu);
case CPU_DOWN_FAILED:
rcu_node_kthread_setaffinity(rnp, -1);
rcu_cpu_kthread_setrt(cpu, 1);

View file

@ -1295,7 +1295,6 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
if (IS_ERR(t))
return PTR_ERR(t);
raw_spin_lock_irqsave(&rnp->lock, flags);
set_task_state(t, TASK_INTERRUPTIBLE);
rnp->boost_kthread_task = t;
raw_spin_unlock_irqrestore(&rnp->lock, flags);
sp.sched_priority = RCU_KTHREAD_PRIO;
@ -1303,6 +1302,12 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
return 0;
}
static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp)
{
if (rnp->boost_kthread_task)
wake_up_process(rnp->boost_kthread_task);
}
#else /* #ifdef CONFIG_RCU_BOOST */
static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
@ -1326,6 +1331,10 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
return 0;
}
static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp)
{
}
#endif /* #else #ifdef CONFIG_RCU_BOOST */
#ifndef CONFIG_SMP

View file

@ -938,6 +938,12 @@ static struct ctl_table kern_table[] = {
},
#endif
#ifdef CONFIG_PERF_EVENTS
/*
* User-space scripts rely on the existence of this file
* as a feature check for perf_events being enabled.
*
* So it's an ABI, do not remove!
*/
{
.procname = "perf_event_paranoid",
.data = &sysctl_perf_event_paranoid,

View file

@ -2247,10 +2247,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
if (should_fail_alloc_page(gfp_mask, order))
return NULL;
#ifndef CONFIG_ZONE_DMA
if (WARN_ON_ONCE(gfp_mask & __GFP_DMA))
return NULL;
#endif
/*
* Check the zones suitable for the gfp_mask contain at least one

View file

@ -593,7 +593,8 @@ static int apparmor_setprocattr(struct task_struct *task, char *name,
sa.aad.op = OP_SETPROCATTR;
sa.aad.info = name;
sa.aad.error = -EINVAL;
return aa_audit(AUDIT_APPARMOR_DENIED, NULL, GFP_KERNEL,
return aa_audit(AUDIT_APPARMOR_DENIED,
__aa_current_profile(), GFP_KERNEL,
&sa, NULL);
}
} else if (strcmp(name, "exec") == 0) {

View file

@ -197,6 +197,14 @@ const struct option longopts[] = {
.name = "help",
.val = 'h',
},
{
.name = "event-idx",
.val = 'E',
},
{
.name = "no-event-idx",
.val = 'e',
},
{
.name = "indirect",
.val = 'I',
@ -211,13 +219,17 @@ const struct option longopts[] = {
static void help()
{
fprintf(stderr, "Usage: virtio_test [--help] [--no-indirect]\n");
fprintf(stderr, "Usage: virtio_test [--help]"
" [--no-indirect]"
" [--no-event-idx]"
"\n");
}
int main(int argc, char **argv)
{
struct vdev_info dev;
unsigned long long features = 1ULL << VIRTIO_RING_F_INDIRECT_DESC;
unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) |
(1ULL << VIRTIO_RING_F_EVENT_IDX);
int o;
for (;;) {
@ -228,6 +240,9 @@ int main(int argc, char **argv)
case '?':
help();
exit(2);
case 'e':
features &= ~(1ULL << VIRTIO_RING_F_EVENT_IDX);
break;
case 'h':
help();
goto done;