From 3b920cef3470b7199acd2dc26e09fb9291131976 Mon Sep 17 00:00:00 2001 From: "Hyok S. Choi" Date: Mon, 24 Apr 2006 09:45:35 +0100 Subject: [PATCH 01/22] [ARM] nommu: trivial fixups for head-nommu.S and the Makefile This patch fix compilation problem of start-up codes. (head-nommu.S, arch/arm/kernel/Makefile) Signed-off-by: Hyok S. Choi Signed-off-by: Russell King --- arch/arm/kernel/Makefile | 2 +- arch/arm/kernel/head-nommu.S | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 2ce0e3a27a45..a601b8b55f35 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -29,7 +29,7 @@ ifneq ($(CONFIG_ARCH_EBSA110),y) obj-y += io.o endif -head-y := head.o +head-y := head$(MMUEXT).o obj-$(CONFIG_DEBUG_LL) += debug.o extra-y := $(head-y) init_task.o vmlinux.lds diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index b093ab8738b5..0bea65864051 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -20,10 +20,11 @@ #include #include #include -#include +#include #include #define PROCINFO_INITFUNC 12 +#define MACHINFO_TYPE 0 /* * Kernel startup entry point. @@ -79,5 +80,6 @@ __after_proc_init: mov pc, r13 @ clear the BSS and jump @ to start_kernel + .ltorg #include "head-common.S" From 3d63abe56be2147891b3438cb3bd37a9be72bda7 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 24 Apr 2006 11:27:02 +0100 Subject: [PATCH 02/22] [MMC] pxamci: fix data timeout calculation The MMC layer gives us two parts for the timeout calculation - a fixed timeout in nanoseconds, and a card clock-speed dependent part. The PXA MMC hardware allows for a timeout based on the fixed host clock speed only. This resulted in some cards being given a short timeout, and therefore failing to work. Signed-off-by: Russell King --- drivers/mmc/pxamci.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/pxamci.c b/drivers/mmc/pxamci.c index eb9a8826e9b5..eb42cb349420 100644 --- a/drivers/mmc/pxamci.c +++ b/drivers/mmc/pxamci.c @@ -65,11 +65,6 @@ struct pxamci_host { unsigned int dma_dir; }; -static inline unsigned int ns_to_clocks(unsigned int ns) -{ - return (ns * (CLOCKRATE / 1000000) + 999) / 1000; -} - static void pxamci_stop_clock(struct pxamci_host *host) { if (readl(host->base + MMC_STAT) & STAT_CLK_EN) { @@ -113,6 +108,7 @@ static void pxamci_disable_irq(struct pxamci_host *host, unsigned int mask) static void pxamci_setup_data(struct pxamci_host *host, struct mmc_data *data) { unsigned int nob = data->blocks; + unsigned long long clks; unsigned int timeout; u32 dcmd; int i; @@ -125,7 +121,9 @@ static void pxamci_setup_data(struct pxamci_host *host, struct mmc_data *data) writel(nob, host->base + MMC_NOB); writel(1 << data->blksz_bits, host->base + MMC_BLKLEN); - timeout = ns_to_clocks(data->timeout_ns) + data->timeout_clks; + clks = (unsigned long long)data->timeout_ns * CLOCKRATE; + do_div(clks, 1000000000UL); + timeout = (unsigned int)clks + (data->timeout_clks << host->clkrt); writel((timeout + 255) / 256, host->base + MMC_RDTO); if (data->flags & MMC_DATA_READ) { From c7fd84424f919740880d989cb0459c332da96013 Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Tue, 25 Apr 2006 20:36:04 +0100 Subject: [PATCH 03/22] [ARM] 3484/1: Correct AEABI CFLAGS for correct enum handling Patch from Richard Purdie The AAPCS says that enums can be variably sized depending on the range of valid values. This is not the accepted behaviour under linux so for compatibility gcc has an aapcs-linux target, the main difference being that enums are always of type int. Change the ARM Makefile to use this target. Signed-off-by: Richard Purdie Signed-off-by: Russell King --- arch/arm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 95a96275f88a..6f8e84c1c1f2 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -66,7 +66,7 @@ tune-$(CONFIG_CPU_XSC3) :=$(call cc-option,-mtune=xscale,-mtune=strongarm110) - tune-$(CONFIG_CPU_V6) :=$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm) ifeq ($(CONFIG_AEABI),y) -CFLAGS_ABI :=-mabi=aapcs -mno-thumb-interwork +CFLAGS_ABI :=-mabi=aapcs-linux -mno-thumb-interwork else CFLAGS_ABI :=$(call cc-option,-mapcs-32,-mabi=apcs-gnu) $(call cc-option,-mno-thumb-interwork,) endif From 928bd1b4709045355fad8bf858904884c0a1e87f Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 25 Apr 2006 20:41:27 +0100 Subject: [PATCH 04/22] [ARM] vfp: fix leak of VFP_NAN_FLAG into FPSCR The VFP code can leak VFP_NAN_FLAG into the FPSCR. It doesn't correspond to any real FPSCR bit (and overlaps one of the exception flags). Bug report from Daniel Jacobowitz Signed-off-by: Russell King --- arch/arm/vfp/vfpdouble.c | 2 +- arch/arm/vfp/vfpmodule.c | 2 +- arch/arm/vfp/vfpsingle.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/vfp/vfpdouble.c b/arch/arm/vfp/vfpdouble.c index febd115dba28..009038c8113e 100644 --- a/arch/arm/vfp/vfpdouble.c +++ b/arch/arm/vfp/vfpdouble.c @@ -197,7 +197,7 @@ u32 vfp_double_normaliseround(int dd, struct vfp_double *vd, u32 fpscr, u32 exce dd, d, exceptions); vfp_put_double(dd, d); } - return exceptions & ~VFP_NAN_FLAG; + return exceptions; } /* diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 22f3da4e0829..37ff8145b5b5 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -180,7 +180,7 @@ static u32 vfp_emulate_instruction(u32 inst, u32 fpscr, struct pt_regs *regs) * emulate it. */ } - return exceptions; + return exceptions & ~VFP_NAN_FLAG; } /* diff --git a/arch/arm/vfp/vfpsingle.c b/arch/arm/vfp/vfpsingle.c index 4ac27f193934..dae2c2f46052 100644 --- a/arch/arm/vfp/vfpsingle.c +++ b/arch/arm/vfp/vfpsingle.c @@ -203,7 +203,7 @@ u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exce vfp_put_float(sd, d); } - return exceptions & ~VFP_NAN_FLAG; + return exceptions; } /* From ba5f5d90c45a30e4e9a1bd136acf1b3973c905c8 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 25 Apr 2006 15:33:34 +0200 Subject: [PATCH 05/22] [PATCH] splice: fix min() warning Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- fs/splice.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/splice.c b/fs/splice.c index 0559e7577a04..4aa67254740f 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -283,7 +283,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, /* * this_len is the max we'll use from this page */ - this_len = min(len, PAGE_CACHE_SIZE - loff); + this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff); find_page: /* * lookup the page for this index From 016b661e2f717168e600f3c85f29e1a49f88e004 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 25 Apr 2006 15:42:00 +0200 Subject: [PATCH 06/22] [PATCH] splice: fix offset problems Make the move_from_pipe() actors return number of bytes processed, then move_from_pipe() can decide more cleverly when to move on to the next buffer. This fixes problems with pipe offset and differing file offset. Signed-off-by: Jens Axboe --- fs/splice.c | 46 +++++++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/fs/splice.c b/fs/splice.c index 4aa67254740f..8c6030c762e2 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -439,14 +439,13 @@ EXPORT_SYMBOL(generic_file_splice_read); /* * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' - * using sendpage(). + * using sendpage(). Return the number of bytes sent. */ static int pipe_to_sendpage(struct pipe_inode_info *info, struct pipe_buffer *buf, struct splice_desc *sd) { struct file *file = sd->file; loff_t pos = sd->pos; - unsigned int offset; ssize_t ret; void *ptr; int more; @@ -461,16 +460,13 @@ static int pipe_to_sendpage(struct pipe_inode_info *info, if (IS_ERR(ptr)) return PTR_ERR(ptr); - offset = pos & ~PAGE_CACHE_MASK; more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; - ret = file->f_op->sendpage(file, buf->page, offset, sd->len, &pos,more); + ret = file->f_op->sendpage(file, buf->page, buf->offset, sd->len, + &pos, more); buf->ops->unmap(info, buf); - if (ret == sd->len) - return 0; - - return -EIO; + return ret; } /* @@ -499,7 +495,7 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, struct file *file = sd->file; struct address_space *mapping = file->f_mapping; gfp_t gfp_mask = mapping_gfp_mask(mapping); - unsigned int offset; + unsigned int offset, this_len; struct page *page; pgoff_t index; char *src; @@ -515,6 +511,10 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, index = sd->pos >> PAGE_CACHE_SHIFT; offset = sd->pos & ~PAGE_CACHE_MASK; + this_len = sd->len; + if (this_len + offset > PAGE_CACHE_SIZE) + this_len = PAGE_CACHE_SIZE - offset; + /* * Reuse buf page, if SPLICE_F_MOVE is set. */ @@ -558,7 +558,7 @@ find_page: * the full page. */ if (!PageUptodate(page)) { - if (sd->len < PAGE_CACHE_SIZE) { + if (this_len < PAGE_CACHE_SIZE) { ret = mapping->a_ops->readpage(file, page); if (unlikely(ret)) goto out; @@ -582,7 +582,7 @@ find_page: } } - ret = mapping->a_ops->prepare_write(file, page, 0, sd->len); + ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len); if (ret == AOP_TRUNCATED_PAGE) { page_cache_release(page); goto find_page; @@ -592,18 +592,22 @@ find_page: if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { char *dst = kmap_atomic(page, KM_USER0); - memcpy(dst + offset, src + buf->offset, sd->len); + memcpy(dst + offset, src + buf->offset, this_len); flush_dcache_page(page); kunmap_atomic(dst, KM_USER0); } - ret = mapping->a_ops->commit_write(file, page, 0, sd->len); + ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len); if (ret == AOP_TRUNCATED_PAGE) { page_cache_release(page); goto find_page; } else if (ret) goto out; + /* + * Return the number of bytes written. + */ + ret = this_len; mark_page_accessed(page); balance_dirty_pages_ratelimited(mapping); out: @@ -652,16 +656,22 @@ static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out, sd.len = sd.total_len; err = actor(pipe, buf, &sd); - if (err) { + if (err <= 0) { if (!ret && err != -ENODATA) ret = err; break; } - ret += sd.len; - buf->offset += sd.len; - buf->len -= sd.len; + ret += err; + buf->offset += err; + buf->len -= err; + + sd.len -= err; + sd.pos += err; + sd.total_len -= err; + if (sd.len) + continue; if (!buf->len) { buf->ops = NULL; @@ -672,8 +682,6 @@ static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out, do_wakeup = 1; } - sd.pos += sd.len; - sd.total_len -= sd.len; if (!sd.total_len) break; } From 5a5fb1ea74d8b82ca1461b885a1334fb21e037be Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 26 Apr 2006 10:48:55 +0200 Subject: [PATCH 07/22] Revert "[fuse] fix deadlock between fuse_put_super() and request_end()" This reverts 73ce8355c243a434524a34c05cc417dd0467996e commit. It was wrong, because it didn't take into account the requirement, that iput() for background requests must be performed synchronously with ->put_super(), otherwise active inodes may remain after unmount. The right solution is to keep the sbput_sem and perform iput() within the locked region, but move fput() outside sbput_sem. Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 28 ++++++++++++---------------- fs/fuse/fuse_i.h | 12 +++++++++--- fs/fuse/inode.c | 27 ++++++++++----------------- 3 files changed, 31 insertions(+), 36 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index cc750c68fe70..4967bd40b953 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -128,14 +128,20 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) } } -void fuse_remove_background(struct fuse_conn *fc, struct fuse_req *req) +void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req) { - list_del_init(&req->bg_entry); + iput(req->inode); + iput(req->inode2); + if (req->file) + fput(req->file); + spin_lock(&fc->lock); + list_del(&req->bg_entry); if (fc->num_background == FUSE_MAX_BACKGROUND) { fc->blocked = 0; wake_up_all(&fc->blocked_waitq); } fc->num_background--; + spin_unlock(&fc->lock); } /* @@ -165,27 +171,17 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) wake_up(&req->waitq); fuse_put_request(fc, req); } else { - struct inode *inode = req->inode; - struct inode *inode2 = req->inode2; - struct file *file = req->file; void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; req->end = NULL; - req->inode = NULL; - req->inode2 = NULL; - req->file = NULL; - if (!list_empty(&req->bg_entry)) - fuse_remove_background(fc, req); spin_unlock(&fc->lock); - + down_read(&fc->sbput_sem); + if (fc->mounted) + fuse_release_background(fc, req); + up_read(&fc->sbput_sem); if (end) end(fc, req); else fuse_put_request(fc, req); - - if (file) - fput(file); - iput(inode); - iput(inode2); } } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 59661c481d9d..0474202cb5dc 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -258,9 +258,15 @@ struct fuse_conn { /** waitq for blocked connection */ wait_queue_head_t blocked_waitq; + /** RW semaphore for exclusion with fuse_put_super() */ + struct rw_semaphore sbput_sem; + /** The next unique request id */ u64 reqctr; + /** Mount is active */ + unsigned mounted; + /** Connection established, cleared on umount, connection abort and device release */ unsigned connected; @@ -471,11 +477,11 @@ void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req); void request_send_background(struct fuse_conn *fc, struct fuse_req *req); /** - * Remove request from the the background list + * Release inodes and file associated with background request */ -void fuse_remove_background(struct fuse_conn *fc, struct fuse_req *req); +void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req); -/** Abort all requests */ +/* Abort all requests */ void fuse_abort_conn(struct fuse_conn *fc); /** diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 43a6fc0db8a7..fd34037b0588 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -204,26 +204,17 @@ static void fuse_put_super(struct super_block *sb) { struct fuse_conn *fc = get_fuse_conn_super(sb); + down_write(&fc->sbput_sem); + while (!list_empty(&fc->background)) + fuse_release_background(fc, + list_entry(fc->background.next, + struct fuse_req, bg_entry)); + spin_lock(&fc->lock); + fc->mounted = 0; fc->connected = 0; - while (!list_empty(&fc->background)) { - struct fuse_req *req = list_entry(fc->background.next, - struct fuse_req, bg_entry); - struct inode *inode = req->inode; - struct inode *inode2 = req->inode2; - - /* File would hold a reference to vfsmount */ - BUG_ON(req->file); - req->inode = NULL; - req->inode2 = NULL; - fuse_remove_background(fc, req); - - spin_unlock(&fc->lock); - iput(inode); - iput(inode2); - spin_lock(&fc->lock); - } spin_unlock(&fc->lock); + up_write(&fc->sbput_sem); /* Flush all readers on this fs */ kill_fasync(&fc->fasync, SIGIO, POLL_IN); wake_up_all(&fc->waitq); @@ -395,6 +386,7 @@ static struct fuse_conn *new_conn(void) INIT_LIST_HEAD(&fc->processing); INIT_LIST_HEAD(&fc->io); INIT_LIST_HEAD(&fc->background); + init_rwsem(&fc->sbput_sem); kobj_set_kset_s(fc, connections_subsys); kobject_init(&fc->kobj); atomic_set(&fc->num_waiting, 0); @@ -549,6 +541,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) goto err_free_req; sb->s_root = root_dentry; + fc->mounted = 1; fc->connected = 1; kobject_get(&fc->kobj); file->private_data = fc; From 6dbbcb120570d747b00783820ee02d1e1bcf63de Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 26 Apr 2006 10:49:06 +0200 Subject: [PATCH 08/22] [fuse] fix deadlock between fuse_put_super() and request_end(), try #2 A deadlock was possible, when the last reference to the superblock was held due to a background request containing a file reference. Releasing the file would release the vfsmount which in turn would release the superblock. Since sbput_sem is held during the fput() and fuse_put_super() tries to acquire this same semaphore, a deadlock results. The solution is to move the fput() outside the region protected by sbput_sem. Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 4967bd40b953..104a62dadb94 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -128,12 +128,16 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) } } +/* + * Called with sbput_sem held for read (request_end) or write + * (fuse_put_super). By the time fuse_put_super() is finished, all + * inodes belonging to background requests must be released, so the + * iputs have to be done within the locked region. + */ void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req) { iput(req->inode); iput(req->inode2); - if (req->file) - fput(req->file); spin_lock(&fc->lock); list_del(&req->bg_entry); if (fc->num_background == FUSE_MAX_BACKGROUND) { @@ -178,6 +182,11 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) if (fc->mounted) fuse_release_background(fc, req); up_read(&fc->sbput_sem); + + /* fput must go outside sbput_sem, otherwise it can deadlock */ + if (req->file) + fput(req->file); + if (end) end(fc, req); else From 8aa09a50b5d9dbdf627f79e19d72d82994348089 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 26 Apr 2006 10:49:16 +0200 Subject: [PATCH 09/22] [fuse] fix race between checking and setting file->private_data BKL does not protect against races if the task may sleep between checking and setting a value. So move checking of file->private_data near to setting it in fuse_fill_super(). Found by Al Viro. Signed-off-by: Miklos Szeredi --- fs/fuse/inode.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index fd34037b0588..7627022446b2 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -500,11 +500,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (file->f_op != &fuse_dev_operations) return -EINVAL; - /* Setting file->private_data can't race with other mount() - instances, since BKL is held for ->get_sb() */ - if (file->private_data) - return -EINVAL; - fc = new_conn(); if (!fc) return -ENOMEM; @@ -540,6 +535,12 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (err) goto err_free_req; + /* Setting file->private_data can't race with other mount() + instances, since BKL is held for ->get_sb() */ + err = -EINVAL; + if (file->private_data) + goto err_kobject_del; + sb->s_root = root_dentry; fc->mounted = 1; fc->connected = 1; @@ -556,6 +557,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) return 0; + err_kobject_del: + kobject_del(&fc->kobj); err_free_req: fuse_request_free(init_req); err_put_root: From c86d90df2676d9d212c0ab1f3c64e0abdf5b4c09 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 26 Apr 2006 10:49:26 +0200 Subject: [PATCH 10/22] [doc] add paragraph about 'fs' subsystem to sysfs.txt Signed-off-by: Miklos Szeredi --- Documentation/filesystems/sysfs.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt index c8bce82ddcac..89b1d196ca80 100644 --- a/Documentation/filesystems/sysfs.txt +++ b/Documentation/filesystems/sysfs.txt @@ -246,6 +246,7 @@ class/ devices/ firmware/ net/ +fs/ devices/ contains a filesystem representation of the device tree. It maps directly to the internal kernel device tree, which is a hierarchy of @@ -264,6 +265,10 @@ drivers/ contains a directory for each device driver that is loaded for devices on that particular bus (this assumes that drivers do not span multiple bus types). +fs/ contains a directory for some filesystems. Currently each +filesystem wanting to export attributes must create its own hierarchy +below fs/ (see ./fuse.txt for an example). + More information can driver-model specific features can be found in Documentation/driver-model/. From 912d35f86781e64d73be1ef358f703c08905ac37 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 26 Apr 2006 10:59:21 +0200 Subject: [PATCH 11/22] [PATCH] Add support for the sys_vmsplice syscall sys_splice() moves data to/from pipes with a file input/output. sys_vmsplice() moves data to a pipe, with the input being a user address range instead. This uses an approach suggested by Linus, where we can hold partial ranges inside the pages[] map. Hopefully this will be useful for network receive support as well. Signed-off-by: Jens Axboe --- arch/ia64/kernel/entry.S | 1 + arch/powerpc/kernel/systbl.S | 1 + arch/powerpc/platforms/cell/spu_callbacks.c | 1 + fs/splice.c | 292 +++++++++++++++++--- include/asm-i386/unistd.h | 3 +- include/asm-ia64/unistd.h | 3 +- include/asm-powerpc/unistd.h | 3 +- include/asm-x86_64/unistd.h | 4 +- include/linux/syscalls.h | 3 + 9 files changed, 268 insertions(+), 43 deletions(-) diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index e30798811216..bcb80ca5cf40 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1610,5 +1610,6 @@ sys_call_table: data8 sys_get_robust_list data8 sys_sync_file_range // 1300 data8 sys_tee + data8 sys_vmsplice .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S index 8d1522690501..0b98eea73c5e 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.S @@ -324,6 +324,7 @@ COMPAT_SYS(ppoll) SYSCALL(unshare) SYSCALL(splice) SYSCALL(tee) +SYSCALL(vmsplice) /* * please add new calls to arch/powerpc/platforms/cell/spu_callbacks.c diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c index deb3afb94484..b283380a2a18 100644 --- a/arch/powerpc/platforms/cell/spu_callbacks.c +++ b/arch/powerpc/platforms/cell/spu_callbacks.c @@ -318,6 +318,7 @@ void *spu_syscall_table[] = { [__NR_unshare] sys_unshare, [__NR_splice] sys_splice, [__NR_tee] sys_tee, + [__NR_vmsplice] sys_vmsplice, }; long spu_sys_callback(struct spu_syscall_block *s) diff --git a/fs/splice.c b/fs/splice.c index 8c6030c762e2..0b2c1f060cae 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -27,6 +27,7 @@ #include #include #include +#include /* * Passed to the actors @@ -38,6 +39,22 @@ struct splice_desc { loff_t pos; /* file position */ }; +struct partial_page { + unsigned int offset; + unsigned int len; +}; + +/* + * Passed to move_to_pipe + */ +struct splice_pipe_desc { + struct page **pages; /* page map */ + struct partial_page *partial; /* pages[] may not be contig */ + int nr_pages; /* number of pages in map */ + unsigned int flags; /* splice flags */ + struct pipe_buf_operations *ops;/* ops associated with output pipe */ +}; + /* * Attempt to steal a page from a pipe buffer. This should perhaps go into * a vm helper function, it's already simplified quite a bit by the @@ -128,6 +145,19 @@ static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, kunmap(buf->page); } +static void *user_page_pipe_buf_map(struct file *file, + struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + return kmap(buf->page); +} + +static void user_page_pipe_buf_unmap(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + kunmap(buf->page); +} + static void page_cache_pipe_buf_get(struct pipe_inode_info *info, struct pipe_buffer *buf) { @@ -143,19 +173,33 @@ static struct pipe_buf_operations page_cache_pipe_buf_ops = { .get = page_cache_pipe_buf_get, }; +static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + return 1; +} + +static struct pipe_buf_operations user_page_pipe_buf_ops = { + .can_merge = 0, + .map = user_page_pipe_buf_map, + .unmap = user_page_pipe_buf_unmap, + .release = page_cache_pipe_buf_release, + .steal = user_page_pipe_buf_steal, + .get = page_cache_pipe_buf_get, +}; + /* * Pipe output worker. This sets up our pipe format with the page cache * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). */ -static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages, - int nr_pages, unsigned long len, - unsigned int offset, unsigned int flags) +static ssize_t move_to_pipe(struct pipe_inode_info *pipe, + struct splice_pipe_desc *spd) { - int ret, do_wakeup, i; + int ret, do_wakeup, page_nr; ret = 0; do_wakeup = 0; - i = 0; + page_nr = 0; if (pipe->inode) mutex_lock(&pipe->inode->i_mutex); @@ -171,27 +215,19 @@ static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages, if (pipe->nrbufs < PIPE_BUFFERS) { int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1); struct pipe_buffer *buf = pipe->bufs + newbuf; - struct page *page = pages[i++]; - unsigned long this_len; - this_len = PAGE_CACHE_SIZE - offset; - if (this_len > len) - this_len = len; - - buf->page = page; - buf->offset = offset; - buf->len = this_len; - buf->ops = &page_cache_pipe_buf_ops; + buf->page = spd->pages[page_nr]; + buf->offset = spd->partial[page_nr].offset; + buf->len = spd->partial[page_nr].len; + buf->ops = spd->ops; pipe->nrbufs++; + page_nr++; + ret += buf->len; + if (pipe->inode) do_wakeup = 1; - ret += this_len; - len -= this_len; - offset = 0; - if (!--nr_pages) - break; - if (!len) + if (!--spd->nr_pages) break; if (pipe->nrbufs < PIPE_BUFFERS) continue; @@ -199,7 +235,7 @@ static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages, break; } - if (flags & SPLICE_F_NONBLOCK) { + if (spd->flags & SPLICE_F_NONBLOCK) { if (!ret) ret = -EAGAIN; break; @@ -234,8 +270,8 @@ static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages, kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } - while (i < nr_pages) - page_cache_release(pages[i++]); + while (page_nr < spd->nr_pages) + page_cache_release(spd->pages[page_nr++]); return ret; } @@ -246,17 +282,24 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, unsigned int flags) { struct address_space *mapping = in->f_mapping; - unsigned int loff, offset, nr_pages; + unsigned int loff, nr_pages; struct page *pages[PIPE_BUFFERS]; + struct partial_page partial[PIPE_BUFFERS]; struct page *page; pgoff_t index, end_index; loff_t isize; - size_t bytes; - int i, error; + size_t total_len; + int error; + struct splice_pipe_desc spd = { + .pages = pages, + .partial = partial, + .flags = flags, + .ops = &page_cache_pipe_buf_ops, + }; index = *ppos >> PAGE_CACHE_SHIFT; - loff = offset = *ppos & ~PAGE_CACHE_MASK; - nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + loff = *ppos & ~PAGE_CACHE_MASK; + nr_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; if (nr_pages > PIPE_BUFFERS) nr_pages = PIPE_BUFFERS; @@ -266,15 +309,15 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, * read-ahead if this is a non-zero offset (we are likely doing small * chunk splice and the page is already there) for a single page. */ - if (!offset || nr_pages > 1) - do_page_cache_readahead(mapping, in, index, nr_pages); + if (!loff || spd.nr_pages > 1) + do_page_cache_readahead(mapping, in, index, spd.nr_pages); /* * Now fill in the holes: */ error = 0; - bytes = 0; - for (i = 0; i < nr_pages; i++, index++) { + total_len = 0; + for (spd.nr_pages = 0; spd.nr_pages < nr_pages; spd.nr_pages++, index++) { unsigned int this_len; if (!len) @@ -367,26 +410,29 @@ readpage: */ if (end_index == index) { loff = PAGE_CACHE_SIZE - (isize & ~PAGE_CACHE_MASK); - if (bytes + loff > isize) { + if (total_len + loff > isize) { page_cache_release(page); break; } /* * force quit after adding this page */ - nr_pages = i; + nr_pages = spd.nr_pages; this_len = min(this_len, loff); + loff = 0; } } fill_it: - pages[i] = page; - bytes += this_len; + pages[spd.nr_pages] = page; + partial[spd.nr_pages].offset = loff; + partial[spd.nr_pages].len = this_len; len -= this_len; + total_len += this_len; loff = 0; } - if (i) - return move_to_pipe(pipe, pages, i, bytes, offset, flags); + if (spd.nr_pages) + return move_to_pipe(pipe, &spd); return error; } @@ -1018,6 +1064,174 @@ static long do_splice(struct file *in, loff_t __user *off_in, return -EINVAL; } +/* + * Map an iov into an array of pages and offset/length tupples. With the + * partial_page structure, we can map several non-contiguous ranges into + * our ones pages[] map instead of splitting that operation into pieces. + * Could easily be exported as a generic helper for other users, in which + * case one would probably want to add a 'max_nr_pages' parameter as well. + */ +static int get_iovec_page_array(const struct iovec __user *iov, + unsigned int nr_vecs, struct page **pages, + struct partial_page *partial) +{ + int buffers = 0, error = 0; + + /* + * It's ok to take the mmap_sem for reading, even + * across a "get_user()". + */ + down_read(¤t->mm->mmap_sem); + + while (nr_vecs) { + unsigned long off, npages; + void __user *base; + size_t len; + int i; + + /* + * Get user address base and length for this iovec. + */ + error = get_user(base, &iov->iov_base); + if (unlikely(error)) + break; + error = get_user(len, &iov->iov_len); + if (unlikely(error)) + break; + + /* + * Sanity check this iovec. 0 read succeeds. + */ + if (unlikely(!len)) + break; + error = -EFAULT; + if (unlikely(!base)) + break; + + /* + * Get this base offset and number of pages, then map + * in the user pages. + */ + off = (unsigned long) base & ~PAGE_MASK; + npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + if (npages > PIPE_BUFFERS - buffers) + npages = PIPE_BUFFERS - buffers; + + error = get_user_pages(current, current->mm, + (unsigned long) base, npages, 0, 0, + &pages[buffers], NULL); + + if (unlikely(error <= 0)) + break; + + /* + * Fill this contiguous range into the partial page map. + */ + for (i = 0; i < error; i++) { + const int plen = min_t(size_t, len, PAGE_SIZE) - off; + + partial[buffers].offset = off; + partial[buffers].len = plen; + + off = 0; + len -= plen; + buffers++; + } + + /* + * We didn't complete this iov, stop here since it probably + * means we have to move some of this into a pipe to + * be able to continue. + */ + if (len) + break; + + /* + * Don't continue if we mapped fewer pages than we asked for, + * or if we mapped the max number of pages that we have + * room for. + */ + if (error < npages || buffers == PIPE_BUFFERS) + break; + + nr_vecs--; + iov++; + } + + up_read(¤t->mm->mmap_sem); + + if (buffers) + return buffers; + + return error; +} + +/* + * vmsplice splices a user address range into a pipe. It can be thought of + * as splice-from-memory, where the regular splice is splice-from-file (or + * to file). In both cases the output is a pipe, naturally. + * + * Note that vmsplice only supports splicing _from_ user memory to a pipe, + * not the other way around. Splicing from user memory is a simple operation + * that can be supported without any funky alignment restrictions or nasty + * vm tricks. We simply map in the user memory and fill them into a pipe. + * The reverse isn't quite as easy, though. There are two possible solutions + * for that: + * + * - memcpy() the data internally, at which point we might as well just + * do a regular read() on the buffer anyway. + * - Lots of nasty vm tricks, that are neither fast nor flexible (it + * has restriction limitations on both ends of the pipe). + * + * Alas, it isn't here. + * + */ +static long do_vmsplice(struct file *file, const struct iovec __user *iov, + unsigned long nr_segs, unsigned int flags) +{ + struct pipe_inode_info *pipe = file->f_dentry->d_inode->i_pipe; + struct page *pages[PIPE_BUFFERS]; + struct partial_page partial[PIPE_BUFFERS]; + struct splice_pipe_desc spd = { + .pages = pages, + .partial = partial, + .flags = flags, + .ops = &user_page_pipe_buf_ops, + }; + + if (unlikely(!pipe)) + return -EBADF; + if (unlikely(nr_segs > UIO_MAXIOV)) + return -EINVAL; + else if (unlikely(!nr_segs)) + return 0; + + spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial); + if (spd.nr_pages <= 0) + return spd.nr_pages; + + return move_to_pipe(pipe, &spd); +} + +asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov, + unsigned long nr_segs, unsigned int flags) +{ + struct file *file; + long error; + int fput; + + error = -EBADF; + file = fget_light(fd, &fput); + if (file) { + if (file->f_mode & FMODE_WRITE) + error = do_vmsplice(file, iov, nr_segs, flags); + + fput_light(file, fput); + } + + return error; +} + asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, int fd_out, loff_t __user *off_out, size_t len, unsigned int flags) diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h index d81d6cfc1bb4..eb4b152c82fc 100644 --- a/include/asm-i386/unistd.h +++ b/include/asm-i386/unistd.h @@ -321,8 +321,9 @@ #define __NR_splice 313 #define __NR_sync_file_range 314 #define __NR_tee 315 +#define __NR_vmsplice 316 -#define NR_syscalls 316 +#define NR_syscalls 317 /* * user-visible error numbers are in the range -1 - -128: see diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h index a40ebec6aeeb..7107763168bf 100644 --- a/include/asm-ia64/unistd.h +++ b/include/asm-ia64/unistd.h @@ -290,12 +290,13 @@ #define __NR_get_robust_list 1299 #define __NR_sync_file_range 1300 #define __NR_tee 1301 +#define __NR_vmsplice 1302 #ifdef __KERNEL__ #include -#define NR_syscalls 278 /* length of syscall table */ +#define NR_syscalls 279 /* length of syscall table */ #define __ARCH_WANT_SYS_RT_SIGACTION diff --git a/include/asm-powerpc/unistd.h b/include/asm-powerpc/unistd.h index c612f1a62772..34325e292596 100644 --- a/include/asm-powerpc/unistd.h +++ b/include/asm-powerpc/unistd.h @@ -303,8 +303,9 @@ #define __NR_unshare 282 #define __NR_splice 283 #define __NR_tee 284 +#define __NR_vmsplice 285 -#define __NR_syscalls 285 +#define __NR_syscalls 286 #ifdef __KERNEL__ #define __NR__exit __NR_exit diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h index 98c36eae567c..feb77cb8c044 100644 --- a/include/asm-x86_64/unistd.h +++ b/include/asm-x86_64/unistd.h @@ -615,8 +615,10 @@ __SYSCALL(__NR_splice, sys_splice) __SYSCALL(__NR_tee, sys_tee) #define __NR_sync_file_range 277 __SYSCALL(__NR_sync_file_range, sys_sync_file_range) +#define __NR_vmsplice 278 +__SYSCALL(__NR_vmsplice, sys_vmsplice) -#define __NR_syscall_max __NR_sync_file_range +#define __NR_syscall_max __NR_vmsplice #ifndef __NO_STUBS diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index d3ebc0e68b2b..3996960fc565 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -574,6 +574,9 @@ asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, int fd_out, loff_t __user *off_out, size_t len, unsigned int flags); +asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov, + unsigned long nr_segs, unsigned int flags); + asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags); asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes, From 85ca719e5756d79ea0f10c469e3762462ac45e22 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 26 Apr 2006 02:39:19 -0700 Subject: [PATCH 12/22] [BRIDGE]: allow full size vlan packets Need to allow for VLAN header when bridging. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_forward.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 2d24fb400e0c..56f3aa47e758 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "br_private.h" @@ -29,10 +30,15 @@ static inline int should_deliver(const struct net_bridge_port *p, return 1; } +static inline unsigned packet_length(const struct sk_buff *skb) +{ + return skb->len - (skb->protocol == htons(ETH_P_8021Q) ? VLAN_HLEN : 0); +} + int br_dev_queue_push_xmit(struct sk_buff *skb) { /* drop mtu oversized packets except tso */ - if (skb->len > skb->dev->mtu && !skb_shinfo(skb)->tso_size) + if (packet_length(skb) > skb->dev->mtu && !skb_shinfo(skb)->tso_size) kfree_skb(skb); else { #ifdef CONFIG_BRIDGE_NETFILTER From 00522fb41a2a9bf0f98a007c0e2b516a3873148c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 26 Apr 2006 14:39:29 +0200 Subject: [PATCH 13/22] [PATCH] splice: rearrange moving to/from pipe helpers We need these for people writing their own ->splice_read/write hooks. Signed-off-by: Jens Axboe --- fs/splice.c | 35 +++++++++++------------------------ include/linux/pipe_fs_i.h | 17 +++++++++++++++++ 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/fs/splice.c b/fs/splice.c index 0b2c1f060cae..447ebc0a37f3 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -29,23 +29,13 @@ #include #include -/* - * Passed to the actors - */ -struct splice_desc { - unsigned int len, total_len; /* current and remaining length */ - unsigned int flags; /* splice flags */ - struct file *file; /* file to read/write */ - loff_t pos; /* file position */ -}; - struct partial_page { unsigned int offset; unsigned int len; }; /* - * Passed to move_to_pipe + * Passed to splice_to_pipe */ struct splice_pipe_desc { struct page **pages; /* page map */ @@ -192,8 +182,8 @@ static struct pipe_buf_operations user_page_pipe_buf_ops = { * Pipe output worker. This sets up our pipe format with the page cache * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). */ -static ssize_t move_to_pipe(struct pipe_inode_info *pipe, - struct splice_pipe_desc *spd) +static ssize_t splice_to_pipe(struct pipe_inode_info *pipe, + struct splice_pipe_desc *spd) { int ret, do_wakeup, page_nr; @@ -432,7 +422,7 @@ fill_it: } if (spd.nr_pages) - return move_to_pipe(pipe, &spd); + return splice_to_pipe(pipe, &spd); return error; } @@ -666,17 +656,14 @@ out_nomem: return ret; } -typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *, - struct splice_desc *); - /* * Pipe input worker. Most of this logic works like a regular pipe, the * key here is the 'actor' worker passed in that actually moves the data * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. */ -static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out, - loff_t *ppos, size_t len, unsigned int flags, - splice_actor *actor) +ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags, + splice_actor *actor) { int ret, do_wakeup, err; struct splice_desc sd; @@ -795,7 +782,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, struct address_space *mapping = out->f_mapping; ssize_t ret; - ret = move_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); + ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); if (ret > 0) { struct inode *inode = mapping->host; @@ -837,7 +824,7 @@ EXPORT_SYMBOL(generic_file_splice_write); ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { - return move_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage); + return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage); } EXPORT_SYMBOL(generic_splice_sendpage); @@ -924,7 +911,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, /* * We don't have an immediate reader, but we'll read the stuff - * out of the pipe right after the move_to_pipe(). So set + * out of the pipe right after the splice_to_pipe(). So set * PIPE_READERS appropriately. */ pipe->readers = 1; @@ -1210,7 +1197,7 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov, if (spd.nr_pages <= 0) return spd.nr_pages; - return move_to_pipe(pipe, &spd); + return splice_to_pipe(pipe, &spd); } asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov, diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index ef7f33c0be19..0008d4bd4059 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -61,4 +61,21 @@ void __free_pipe_info(struct pipe_inode_info *); /* from/to, of course */ #define SPLICE_F_MORE (0x04) /* expect more data */ +/* + * Passed to the actors + */ +struct splice_desc { + unsigned int len, total_len; /* current and remaining length */ + unsigned int flags; /* splice flags */ + struct file *file; /* file to read/write */ + loff_t pos; /* file position */ +}; + +typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *, + struct splice_desc *); + +extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *, + loff_t *, size_t, unsigned int, + splice_actor *); + #endif From 1ebd32fc54bd04de6b3944587f25513c0681f98e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 26 Apr 2006 14:40:08 +0200 Subject: [PATCH 14/22] [PATCH] splice: add ->splice_write support for /dev/null Useful for testing. Signed-off-by: Jens Axboe --- drivers/char/mem.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 66719f9d294c..1fa9fa157c12 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -578,6 +579,18 @@ static ssize_t write_null(struct file * file, const char __user * buf, return count; } +static int pipe_to_null(struct pipe_inode_info *info, struct pipe_buffer *buf, + struct splice_desc *sd) +{ + return sd->len; +} + +static ssize_t splice_write_null(struct pipe_inode_info *pipe,struct file *out, + loff_t *ppos, size_t len, unsigned int flags) +{ + return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_null); +} + #ifdef CONFIG_MMU /* * For fun, we are using the MMU for this. @@ -785,6 +798,7 @@ static struct file_operations null_fops = { .llseek = null_lseek, .read = read_null, .write = write_null, + .splice_write = splice_write_null, }; #if defined(CONFIG_ISA) || !defined(__mc68000__) From c82ffb07cd1aa356c599999c4f0dc5155a91d318 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 26 Apr 2006 07:20:48 +0100 Subject: [PATCH 15/22] [PATCH] fix leak in activate_ep_files() Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- drivers/usb/gadget/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c index 42b457030b03..0eb010a3f5bc 100644 --- a/drivers/usb/gadget/inode.c +++ b/drivers/usb/gadget/inode.c @@ -1614,6 +1614,7 @@ static int activate_ep_files (struct dev_data *dev) data, &ep_config_operations, &data->dentry); if (!data->inode) { + usb_ep_free_request(ep, data->req); kfree (data); goto enomem; } From de0bb97aff6743f71abb8ec581238e2bdae9cdd1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 26 Apr 2006 07:26:09 +0100 Subject: [PATCH 16/22] [PATCH] forgotten ->b_data in memcpy() call in ext3/resize.c (oopsable) sbi->s_group_desc is an array of pointers to buffer_head. memcpy() of buffer size from address of buffer_head is a bad idea - it will generate junk in any case, may oops if buffer_head is close to the end of slab page and next page is not mapped and isn't what was intended there. IOW, ->b_data is missing in that call. Fortunately, result doesn't go into the primary on-disk data structures, so only backup ones get crap written to them; that had allowed this bug to remain unnoticed until now. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- fs/ext3/resize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index c5ffa8523968..8aac5334680d 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c @@ -213,7 +213,7 @@ static int setup_new_group_blocks(struct super_block *sb, goto exit_bh; } lock_buffer(bh); - memcpy(gdb->b_data, sbi->s_group_desc[i], bh->b_size); + memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, bh->b_size); set_buffer_uptodate(gdb); unlock_buffer(bh); ext3_journal_dirty_metadata(handle, gdb); From 6ad0013b316367671ef316cff91ab2d912e309c9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 26 Apr 2006 07:28:09 +0100 Subject: [PATCH 17/22] [PATCH] fix mips sys32_p{read,write} Switched to use of sys_pread64()/sys_pwrite64() rather than keep duplicating their guts; among the little things that had been missing there were such as ret = security_file_permission (file, MAY_READ); Gotta love the LSM robustness, right? Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/mips/kernel/linux32.c | 64 ++------------------------------------ 1 file changed, 2 insertions(+), 62 deletions(-) diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index 7c953bcc5f6a..a7d2bb3cf835 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -356,73 +356,13 @@ asmlinkage int sys32_llseek(unsigned int fd, unsigned int offset_high, asmlinkage ssize_t sys32_pread(unsigned int fd, char __user * buf, size_t count, u32 unused, u64 a4, u64 a5) { - ssize_t ret; - struct file * file; - ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); - loff_t pos; - - ret = -EBADF; - file = fget(fd); - if (!file) - goto bad_file; - if (!(file->f_mode & FMODE_READ)) - goto out; - pos = merge_64(a4, a5); - ret = rw_verify_area(READ, file, &pos, count); - if (ret < 0) - goto out; - ret = -EINVAL; - if (!file->f_op || !(read = file->f_op->read)) - goto out; - if (pos < 0) - goto out; - ret = -ESPIPE; - if (!(file->f_mode & FMODE_PREAD)) - goto out; - ret = read(file, buf, count, &pos); - if (ret > 0) - dnotify_parent(file->f_dentry, DN_ACCESS); -out: - fput(file); -bad_file: - return ret; + return sys_pread64(fd, buf, count, merge_64(a4, a5)); } asmlinkage ssize_t sys32_pwrite(unsigned int fd, const char __user * buf, size_t count, u32 unused, u64 a4, u64 a5) { - ssize_t ret; - struct file * file; - ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); - loff_t pos; - - ret = -EBADF; - file = fget(fd); - if (!file) - goto bad_file; - if (!(file->f_mode & FMODE_WRITE)) - goto out; - pos = merge_64(a4, a5); - ret = rw_verify_area(WRITE, file, &pos, count); - if (ret < 0) - goto out; - ret = -EINVAL; - if (!file->f_op || !(write = file->f_op->write)) - goto out; - if (pos < 0) - goto out; - - ret = -ESPIPE; - if (!(file->f_mode & FMODE_PWRITE)) - goto out; - - ret = write(file, buf, count, &pos); - if (ret > 0) - dnotify_parent(file->f_dentry, DN_MODIFY); -out: - fput(file); -bad_file: - return ret; + return sys_pwrite64(fd, buf, count, merge_64(a4, a5)); } asmlinkage int sys32_sched_rr_get_interval(compat_pid_t pid, From a090d9132c1e53e3517111123680c15afb25c0a4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 26 Apr 2006 07:32:40 +0100 Subject: [PATCH 18/22] [PATCH] protect ext3 ioctl modifying append_only, immutable, etc. with i_mutex All modifications of ->i_flags in inodes that might be visible to somebody else must be under ->i_mutex. That patch fixes ext3 ioctl() setting S_APPEND and friends. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- fs/ext3/ioctl.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c index aaf1da17b6d4..8c22aa9a7fbb 100644 --- a/fs/ext3/ioctl.c +++ b/fs/ext3/ioctl.c @@ -48,6 +48,7 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, if (!S_ISDIR(inode->i_mode)) flags &= ~EXT3_DIRSYNC_FL; + mutex_lock(&inode->i_mutex); oldflags = ei->i_flags; /* The JOURNAL_DATA flag is modifiable only by root */ @@ -60,8 +61,10 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, * This test looks nicer. Thanks to Pauline Middelink */ if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) { - if (!capable(CAP_LINUX_IMMUTABLE)) + if (!capable(CAP_LINUX_IMMUTABLE)) { + mutex_unlock(&inode->i_mutex); return -EPERM; + } } /* @@ -69,14 +72,18 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, * the relevant capability. */ if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) { - if (!capable(CAP_SYS_RESOURCE)) + if (!capable(CAP_SYS_RESOURCE)) { + mutex_unlock(&inode->i_mutex); return -EPERM; + } } handle = ext3_journal_start(inode, 1); - if (IS_ERR(handle)) + if (IS_ERR(handle)) { + mutex_unlock(&inode->i_mutex); return PTR_ERR(handle); + } if (IS_SYNC(inode)) handle->h_sync = 1; err = ext3_reserve_inode_write(handle, inode, &iloc); @@ -93,11 +100,14 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, err = ext3_mark_iloc_dirty(handle, inode, &iloc); flags_err: ext3_journal_stop(handle); - if (err) + if (err) { + mutex_unlock(&inode->i_mutex); return err; + } if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) err = ext3_change_inode_journal_flag(inode, jflag); + mutex_unlock(&inode->i_mutex); return err; } case EXT3_IOC_GETVERSION: From e7edf9cdeddc0cff125e8e658216efb2ff2b2219 Mon Sep 17 00:00:00 2001 From: James Morris Date: Wed, 26 Apr 2006 02:45:03 -0400 Subject: [PATCH 19/22] [PATCH] LSM: add missing hook to do_compat_readv_writev() This patch addresses a flaw in LSM, where there is no mediation of readv() and writev() in for 32-bit compatible apps using a 64-bit kernel. This bug was discovered and fixed initially in the native readv/writev code [1], but was not fixed in the compat code. Thanks to Al for spotting this one. [1] http://lwn.net/Articles/154282/ Signed-off-by: James Morris Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- fs/compat.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/compat.c b/fs/compat.c index 7f8e26ea427c..2e32bd340474 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1217,6 +1217,10 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, if (ret < 0) goto out; + ret = security_file_permission(file, type == READ ? MAY_READ:MAY_WRITE); + if (ret) + goto out; + fnv = NULL; if (type == READ) { fn = file->f_op->read; From 649bbaa484bcdce94f40a1b97a6a2ded0549e8a2 Mon Sep 17 00:00:00 2001 From: Chandra Seetharaman Date: Mon, 24 Apr 2006 19:35:15 -0700 Subject: [PATCH 20/22] [PATCH] Remove __devinitdata from notifier block definitions Few of the notifier_chain_register() callers use __devinitdata in the definition of notifier_block data structure. It is incorrect as the data structure should be available after the initializations (they do not unregister them during initializations). This was leading to an oops when notifier_chain_register() call is invoked for those callback chains after initialization. This patch fixes all such usages to _not_ have the notifier_block data structure in the init data section. Signed-off-by: Chandra Seetharaman Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/sysfs.c | 2 +- arch/s390/appldata/appldata_base.c | 2 +- block/ll_rw_blk.c | 2 +- kernel/hrtimer.c | 2 +- kernel/rcupdate.c | 2 +- kernel/sched.c | 2 +- kernel/softirq.c | 2 +- kernel/softlockup.c | 2 +- kernel/timer.c | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 73560ef6f802..0235eb7ecba6 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -297,7 +297,7 @@ static int __devinit sysfs_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __devinitdata sysfs_cpu_nb = { +static struct notifier_block sysfs_cpu_nb = { .notifier_call = sysfs_cpu_notify, }; diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index 54d35c130907..9a22434a580c 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -652,7 +652,7 @@ appldata_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __devinitdata appldata_nb = { +static struct notifier_block appldata_nb = { .notifier_call = appldata_cpu_notify, }; diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 1755c053fd68..e5041a02e21f 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -3385,7 +3385,7 @@ static int blk_cpu_notify(struct notifier_block *self, unsigned long action, } -static struct notifier_block __devinitdata blk_cpu_notifier = { +static struct notifier_block blk_cpu_notifier = { .notifier_call = blk_cpu_notify, }; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index d2a7296c8251..4b63bb9797b2 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -860,7 +860,7 @@ static int __devinit hrtimer_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __devinitdata hrtimers_nb = { +static struct notifier_block hrtimers_nb = { .notifier_call = hrtimer_cpu_notify, }; diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 13458bbaa1be..193e5a0df198 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -537,7 +537,7 @@ static int __devinit rcu_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __devinitdata rcu_nb = { +static struct notifier_block rcu_nb = { .notifier_call = rcu_cpu_notify, }; diff --git a/kernel/sched.c b/kernel/sched.c index 365f0b90b4de..4c64f85698ae 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4814,7 +4814,7 @@ static int migration_call(struct notifier_block *nfb, unsigned long action, /* Register at highest priority so that task migration (migrate_all_tasks) * happens before everything else. */ -static struct notifier_block __devinitdata migration_notifier = { +static struct notifier_block migration_notifier = { .notifier_call = migration_call, .priority = 10 }; diff --git a/kernel/softirq.c b/kernel/softirq.c index ec8fed42a86f..a13f2b342e4b 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -484,7 +484,7 @@ static int __devinit cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -static struct notifier_block __devinitdata cpu_nfb = { +static struct notifier_block cpu_nfb = { .notifier_call = cpu_callback }; diff --git a/kernel/softlockup.c b/kernel/softlockup.c index ced91e1ff564..dabebac50868 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -140,7 +140,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) return NOTIFY_OK; } -static struct notifier_block __devinitdata cpu_nfb = { +static struct notifier_block cpu_nfb = { .notifier_call = cpu_callback }; diff --git a/kernel/timer.c b/kernel/timer.c index 883773788836..d355d5a4d5ae 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1334,7 +1334,7 @@ static int __devinit timer_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __devinitdata timers_nb = { +static struct notifier_block timers_nb = { .notifier_call = timer_cpu_notify, }; From 83d722f7e198b034699b1500d98729beff930efd Mon Sep 17 00:00:00 2001 From: Chandra Seetharaman Date: Mon, 24 Apr 2006 19:35:21 -0700 Subject: [PATCH 21/22] [PATCH] Remove __devinit and __cpuinit from notifier_call definitions Few of the notifier_chain_register() callers use __init in the definition of notifier_call. It is incorrect as the function definition should be available after the initializations (they do not unregister them during initializations). This patch fixes all such usages to _not_ have the notifier_call __init section. Signed-off-by: Chandra Seetharaman Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/intel_cacheinfo.c | 2 +- arch/ia64/kernel/palinfo.c | 2 +- arch/ia64/kernel/salinfo.c | 2 +- arch/ia64/kernel/topology.c | 2 +- arch/powerpc/kernel/sysfs.c | 2 +- arch/x86_64/kernel/mce.c | 2 +- arch/x86_64/kernel/mce_amd.c | 2 +- drivers/base/topology.c | 2 +- drivers/cpufreq/cpufreq.c | 2 +- kernel/hrtimer.c | 2 +- kernel/profile.c | 2 +- kernel/rcupdate.c | 2 +- kernel/softirq.c | 2 +- kernel/softlockup.c | 2 +- kernel/timer.c | 2 +- kernel/workqueue.c | 2 +- mm/page_alloc.c | 2 +- mm/slab.c | 2 +- mm/vmscan.c | 2 +- 19 files changed, 19 insertions(+), 19 deletions(-) diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index 9df87b03612c..c8547a6fa7e6 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -642,7 +642,7 @@ static void __cpuexit cache_remove_dev(struct sys_device * sys_dev) return; } -static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, +static int cacheinfo_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c index 6386f63c413e..859fb37ff49b 100644 --- a/arch/ia64/kernel/palinfo.c +++ b/arch/ia64/kernel/palinfo.c @@ -959,7 +959,7 @@ remove_palinfo_proc_entries(unsigned int hcpu) } } -static int __devinit palinfo_cpu_callback(struct notifier_block *nfb, +static int palinfo_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index 9d5a823479a3..663a186ad194 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -572,7 +572,7 @@ static struct file_operations salinfo_data_fops = { }; #ifdef CONFIG_HOTPLUG_CPU -static int __devinit +static int salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) { unsigned int i, cpu = (unsigned long)hcpu; diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index b47476d655f1..7da4739f536e 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -429,7 +429,7 @@ static int __cpuinit cache_remove_dev(struct sys_device * sys_dev) * When a cpu is hot-plugged, do a check and initiate * cache kobject if necessary */ -static int __cpuinit cache_cpu_callback(struct notifier_block *nfb, +static int cache_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 0235eb7ecba6..ed737cacf92d 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -279,7 +279,7 @@ static void unregister_cpu_online(unsigned int cpu) } #endif /* CONFIG_HOTPLUG_CPU */ -static int __devinit sysfs_cpu_notify(struct notifier_block *self, +static int sysfs_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned int)(long)hcpu; diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 6f0790e8b6d3..c69fc43cee7b 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -629,7 +629,7 @@ static __cpuinit void mce_remove_device(unsigned int cpu) #endif /* Get notified when a cpu comes on/off. Be hotplug friendly. */ -static __cpuinit int +static int mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c index d3ad7d81266d..d13b241ad094 100644 --- a/arch/x86_64/kernel/mce_amd.c +++ b/arch/x86_64/kernel/mce_amd.c @@ -482,7 +482,7 @@ static void threshold_remove_device(unsigned int cpu) #endif /* get notified when a cpu comes on/off */ -static __cpuinit int threshold_cpu_callback(struct notifier_block *nfb, +static int threshold_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { /* cpu was unsigned int to begin with */ diff --git a/drivers/base/topology.c b/drivers/base/topology.c index 915810f6237e..8c52421cbc54 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -107,7 +107,7 @@ static int __cpuinit topology_remove_dev(struct sys_device * sys_dev) return 0; } -static int __cpuinit topology_cpu_callback(struct notifier_block *nfb, +static int topology_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 9759d05b1972..29b2fa5534ae 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1497,7 +1497,7 @@ int cpufreq_update_policy(unsigned int cpu) } EXPORT_SYMBOL(cpufreq_update_policy); -static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb, +static int cpufreq_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 4b63bb9797b2..b7f0388bd71c 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -836,7 +836,7 @@ static void migrate_hrtimers(int cpu) } #endif /* CONFIG_HOTPLUG_CPU */ -static int __devinit hrtimer_cpu_notify(struct notifier_block *self, +static int hrtimer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { long cpu = (long)hcpu; diff --git a/kernel/profile.c b/kernel/profile.c index 5a730fdb1a2c..68afe121e507 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -299,7 +299,7 @@ out: } #ifdef CONFIG_HOTPLUG_CPU -static int __devinit profile_cpu_callback(struct notifier_block *info, +static int profile_cpu_callback(struct notifier_block *info, unsigned long action, void *__cpu) { int node, cpu = (unsigned long)__cpu; diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 193e5a0df198..6d32ff26f948 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -520,7 +520,7 @@ static void __devinit rcu_online_cpu(int cpu) tasklet_init(&per_cpu(rcu_tasklet, cpu), rcu_process_callbacks, 0UL); } -static int __devinit rcu_cpu_notify(struct notifier_block *self, +static int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { long cpu = (long)hcpu; diff --git a/kernel/softirq.c b/kernel/softirq.c index a13f2b342e4b..336f92d64e2e 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -446,7 +446,7 @@ static void takeover_tasklets(unsigned int cpu) } #endif /* CONFIG_HOTPLUG_CPU */ -static int __devinit cpu_callback(struct notifier_block *nfb, +static int cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { diff --git a/kernel/softlockup.c b/kernel/softlockup.c index dabebac50868..14c7faf02909 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -104,7 +104,7 @@ static int watchdog(void * __bind_cpu) /* * Create/destroy watchdog threads as CPUs come and go: */ -static int __devinit +static int cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { int hotcpu = (unsigned long)hcpu; diff --git a/kernel/timer.c b/kernel/timer.c index d355d5a4d5ae..67eaf0f54096 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1314,7 +1314,7 @@ static void __devinit migrate_timers(int cpu) } #endif /* CONFIG_HOTPLUG_CPU */ -static int __devinit timer_cpu_notify(struct notifier_block *self, +static int timer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { long cpu = (long)hcpu; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index e9e464a90376..880fb415a8f6 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -547,7 +547,7 @@ static void take_over_work(struct workqueue_struct *wq, unsigned int cpu) } /* We're holding the cpucontrol mutex here */ -static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, +static int workqueue_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 123c60586740..ea77c999047e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1962,7 +1962,7 @@ static inline void free_zone_pagesets(int cpu) } } -static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb, +static int pageset_cpuup_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { diff --git a/mm/slab.c b/mm/slab.c index e6ef9bd52335..af5c5237e11a 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1036,7 +1036,7 @@ static inline void free_alien_cache(struct array_cache **ac_ptr) #endif -static int __devinit cpuup_callback(struct notifier_block *nfb, +static int cpuup_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { long cpu = (long)hcpu; diff --git a/mm/vmscan.c b/mm/vmscan.c index acdf001d6941..4649a63a8cb6 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1328,7 +1328,7 @@ repeat: not required for correctness. So if the last cpu in a node goes away, we get changed to run anywhere: as the first one comes back, restore their cpu bindings. */ -static int __devinit cpu_callback(struct notifier_block *nfb, +static int cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { pg_data_t *pgdat; From 2be4d50295e2b6f62c07b614e1b103e280dddb84 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 26 Apr 2006 19:19:25 -0700 Subject: [PATCH 22/22] Linux v2.6.17-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a940eaefe1c4..6bf99624bd4c 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 17 -EXTRAVERSION =-rc2 +EXTRAVERSION =-rc3 NAME=Sliding Snow Leopard # *DOCUMENTATION*