From 7909b1c64078087ac153fb47a2f50793fe3ee7d0 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 12 Jul 2010 14:41:40 +0200 Subject: [PATCH 1/3] fuse: don't use atomic kmap Don't use atomic kmap for mapping userspace buffers in device read/write/splice. This is necessary because the next patch (adding store notify) requires that caller of fuse_copy_page() may sleep between invocations. The simplest way to ensure this is to change the atomic kmaps to non-atomic ones. Thankfully architectures where kmap() is not a no-op are going out of fashion, so we can ignore the (probably negligible) performance impact of this change. Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 9424796d6634..7eb80d33c4f3 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -535,13 +535,13 @@ static void fuse_copy_finish(struct fuse_copy_state *cs) if (!cs->write) { buf->ops->unmap(cs->pipe, buf, cs->mapaddr); } else { - kunmap_atomic(cs->mapaddr, KM_USER0); + kunmap(buf->page); buf->len = PAGE_SIZE - cs->len; } cs->currbuf = NULL; cs->mapaddr = NULL; } else if (cs->mapaddr) { - kunmap_atomic(cs->mapaddr, KM_USER0); + kunmap(cs->pg); if (cs->write) { flush_dcache_page(cs->pg); set_page_dirty_lock(cs->pg); @@ -572,7 +572,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs) BUG_ON(!cs->nr_segs); cs->currbuf = buf; - cs->mapaddr = buf->ops->map(cs->pipe, buf, 1); + cs->mapaddr = buf->ops->map(cs->pipe, buf, 0); cs->len = buf->len; cs->buf = cs->mapaddr + buf->offset; cs->pipebufs++; @@ -592,7 +592,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs) buf->len = 0; cs->currbuf = buf; - cs->mapaddr = kmap_atomic(page, KM_USER0); + cs->mapaddr = kmap(page); cs->buf = cs->mapaddr; cs->len = PAGE_SIZE; cs->pipebufs++; @@ -611,7 +611,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs) return err; BUG_ON(err != 1); offset = cs->addr % PAGE_SIZE; - cs->mapaddr = kmap_atomic(cs->pg, KM_USER0); + cs->mapaddr = kmap(cs->pg); cs->buf = cs->mapaddr + offset; cs->len = min(PAGE_SIZE - offset, cs->seglen); cs->seglen -= cs->len; From a1d75f258230b75d46aecdf28b2e732413028863 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 12 Jul 2010 14:41:40 +0200 Subject: [PATCH 2/3] fuse: add store request Userspace filesystem can request data to be stored in the inode's mapping. This request is synchronous and has no reply. If the write to the fuse device returns an error then the store request was not fully completed (but may have updated some pages). If the stored data overflows the current file size, then the size is extended, similarly to a write(2) on the filesystem. Pages which have been completely stored are marked uptodate. Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 88 ++++++++++++++++++++++++++++++++++++++++++++ fs/fuse/file.c | 2 +- fs/fuse/fuse_i.h | 2 + include/linux/fuse.h | 13 ++++++- 4 files changed, 103 insertions(+), 2 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 7eb80d33c4f3..8e01c865586e 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1231,6 +1231,91 @@ err: return err; } +static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_store_out outarg; + struct inode *inode; + struct address_space *mapping; + u64 nodeid; + int err; + pgoff_t index; + unsigned int offset; + unsigned int num; + loff_t file_size; + loff_t end; + + err = -EINVAL; + if (size < sizeof(outarg)) + goto out_finish; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + goto out_finish; + + err = -EINVAL; + if (size - sizeof(outarg) != outarg.size) + goto out_finish; + + nodeid = outarg.nodeid; + + down_read(&fc->killsb); + + err = -ENOENT; + if (!fc->sb) + goto out_up_killsb; + + inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid); + if (!inode) + goto out_up_killsb; + + mapping = inode->i_mapping; + index = outarg.offset >> PAGE_CACHE_SHIFT; + offset = outarg.offset & ~PAGE_CACHE_MASK; + file_size = i_size_read(inode); + end = outarg.offset + outarg.size; + if (end > file_size) { + file_size = end; + fuse_write_update_size(inode, file_size); + } + + num = outarg.size; + while (num) { + struct page *page; + unsigned int this_num; + + err = -ENOMEM; + page = find_or_create_page(mapping, index, + mapping_gfp_mask(mapping)); + if (!page) + goto out_iput; + + this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset); + err = fuse_copy_page(cs, &page, offset, this_num, 0); + if (!err && offset == 0 && (num != 0 || file_size == end)) + SetPageUptodate(page); + unlock_page(page); + page_cache_release(page); + + if (err) + goto out_iput; + + num -= this_num; + offset = 0; + index++; + } + + err = 0; + +out_iput: + iput(inode); +out_up_killsb: + up_read(&fc->killsb); +out_finish: + fuse_copy_finish(cs); + return err; +} + static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, unsigned int size, struct fuse_copy_state *cs) { @@ -1244,6 +1329,9 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, case FUSE_NOTIFY_INVAL_ENTRY: return fuse_notify_inval_entry(fc, size, cs); + case FUSE_NOTIFY_STORE: + return fuse_notify_store(fc, size, cs); + default: fuse_copy_finish(cs); return -EINVAL; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index ada0adeb3bb5..147c1f71bdb9 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -706,7 +706,7 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping, return 0; } -static void fuse_write_update_size(struct inode *inode, loff_t pos) +void fuse_write_update_size(struct inode *inode, loff_t pos) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 8f309f04064e..61267d8d527b 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -748,4 +748,6 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, unsigned fuse_file_poll(struct file *file, poll_table *wait); int fuse_dev_release(struct inode *inode, struct file *file); +void fuse_write_update_size(struct inode *inode, loff_t pos); + #endif /* _FS_FUSE_I_H */ diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 88e0eb596919..a90bd49834aa 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -37,6 +37,9 @@ * * 7.14 * - add splice support to fuse device + * + * 7.15 + * - add store notify */ #ifndef _LINUX_FUSE_H @@ -68,7 +71,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 14 +#define FUSE_KERNEL_MINOR_VERSION 15 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -260,6 +263,7 @@ enum fuse_notify_code { FUSE_NOTIFY_POLL = 1, FUSE_NOTIFY_INVAL_INODE = 2, FUSE_NOTIFY_INVAL_ENTRY = 3, + FUSE_NOTIFY_STORE = 4, FUSE_NOTIFY_CODE_MAX, }; @@ -568,4 +572,11 @@ struct fuse_notify_inval_entry_out { __u32 padding; }; +struct fuse_notify_store_out { + __u64 nodeid; + __u64 offset; + __u32 size; + __u32 padding; +}; + #endif /* _LINUX_FUSE_H */ From 2d45ba381a74a743eeaa2b06c7c5c0d2bf73ba1a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 12 Jul 2010 14:41:40 +0200 Subject: [PATCH 3/3] fuse: add retrieve request Userspace filesystem can request data to be retrieved from the inode's mapping. This request is synchronous and the retrieved data is queued as a new request. If the write to the fuse device returns an error then the retrieve request was not completed and a reply will not be sent. Only present pages are returned in the retrieve reply. Retrieving stops when it finds a non-present page and only data prior to that is returned. This request doesn't change the dirty state of pages. Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 131 ++++++++++++++++++++++++++++++++++++++++++- fs/fuse/fuse_i.h | 1 + include/linux/fuse.h | 21 +++++++ 3 files changed, 152 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 8e01c865586e..69ad053ffd78 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -239,7 +239,6 @@ static u64 fuse_get_unique(struct fuse_conn *fc) static void queue_request(struct fuse_conn *fc, struct fuse_req *req) { - req->in.h.unique = fuse_get_unique(fc); req->in.h.len = sizeof(struct fuse_in_header) + len_args(req->in.numargs, (struct fuse_arg *) req->in.args); list_add_tail(&req->list, &fc->pending); @@ -261,6 +260,7 @@ static void flush_bg_queue(struct fuse_conn *fc) req = list_entry(fc->bg_queue.next, struct fuse_req, list); list_del(&req->list); fc->active_background++; + req->in.h.unique = fuse_get_unique(fc); queue_request(fc, req); } } @@ -398,6 +398,7 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) else if (fc->conn_error) req->out.h.error = -ECONNREFUSED; else { + req->in.h.unique = fuse_get_unique(fc); queue_request(fc, req); /* acquire extra reference, since request is still needed after request_end() */ @@ -450,6 +451,23 @@ void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req) } EXPORT_SYMBOL_GPL(fuse_request_send_background); +static int fuse_request_send_notify_reply(struct fuse_conn *fc, + struct fuse_req *req, u64 unique) +{ + int err = -ENODEV; + + req->isreply = 0; + req->in.h.unique = unique; + spin_lock(&fc->lock); + if (fc->connected) { + queue_request(fc, req); + err = 0; + } + spin_unlock(&fc->lock); + + return err; +} + /* * Called under fc->lock * @@ -1316,6 +1334,114 @@ out_finish: return err; } +static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req) +{ + int i; + + for (i = 0; i < req->num_pages; i++) { + struct page *page = req->pages[i]; + page_cache_release(page); + } +} + +static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, + struct fuse_notify_retrieve_out *outarg) +{ + int err; + struct address_space *mapping = inode->i_mapping; + struct fuse_req *req; + pgoff_t index; + loff_t file_size; + unsigned int num; + unsigned int offset; + size_t total_len; + + req = fuse_get_req(fc); + if (IS_ERR(req)) + return PTR_ERR(req); + + offset = outarg->offset & ~PAGE_CACHE_MASK; + + req->in.h.opcode = FUSE_NOTIFY_REPLY; + req->in.h.nodeid = outarg->nodeid; + req->in.numargs = 2; + req->in.argpages = 1; + req->page_offset = offset; + req->end = fuse_retrieve_end; + + index = outarg->offset >> PAGE_CACHE_SHIFT; + file_size = i_size_read(inode); + num = outarg->size; + if (outarg->offset > file_size) + num = 0; + else if (outarg->offset + num > file_size) + num = file_size - outarg->offset; + + while (num) { + struct page *page; + unsigned int this_num; + + page = find_get_page(mapping, index); + if (!page) + break; + + this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset); + req->pages[req->num_pages] = page; + req->num_pages++; + + num -= this_num; + total_len += this_num; + } + req->misc.retrieve_in.offset = outarg->offset; + req->misc.retrieve_in.size = total_len; + req->in.args[0].size = sizeof(req->misc.retrieve_in); + req->in.args[0].value = &req->misc.retrieve_in; + req->in.args[1].size = total_len; + + err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique); + if (err) + fuse_retrieve_end(fc, req); + + return err; +} + +static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_retrieve_out outarg; + struct inode *inode; + int err; + + err = -EINVAL; + if (size != sizeof(outarg)) + goto copy_finish; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + goto copy_finish; + + fuse_copy_finish(cs); + + down_read(&fc->killsb); + err = -ENOENT; + if (fc->sb) { + u64 nodeid = outarg.nodeid; + + inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid); + if (inode) { + err = fuse_retrieve(fc, inode, &outarg); + iput(inode); + } + } + up_read(&fc->killsb); + + return err; + +copy_finish: + fuse_copy_finish(cs); + return err; +} + static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, unsigned int size, struct fuse_copy_state *cs) { @@ -1332,6 +1458,9 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, case FUSE_NOTIFY_STORE: return fuse_notify_store(fc, size, cs); + case FUSE_NOTIFY_RETRIEVE: + return fuse_notify_retrieve(fc, size, cs); + default: fuse_copy_finish(cs); return -EINVAL; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 61267d8d527b..57d4a3a0f102 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -272,6 +272,7 @@ struct fuse_req { struct fuse_write_in in; struct fuse_write_out out; } write; + struct fuse_notify_retrieve_in retrieve_in; struct fuse_lk_in lk_in; } misc; diff --git a/include/linux/fuse.h b/include/linux/fuse.h index a90bd49834aa..c3c578e09833 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -40,6 +40,7 @@ * * 7.15 * - add store notify + * - add retrieve notify */ #ifndef _LINUX_FUSE_H @@ -254,6 +255,7 @@ enum fuse_opcode { FUSE_DESTROY = 38, FUSE_IOCTL = 39, FUSE_POLL = 40, + FUSE_NOTIFY_REPLY = 41, /* CUSE specific operations */ CUSE_INIT = 4096, @@ -264,6 +266,7 @@ enum fuse_notify_code { FUSE_NOTIFY_INVAL_INODE = 2, FUSE_NOTIFY_INVAL_ENTRY = 3, FUSE_NOTIFY_STORE = 4, + FUSE_NOTIFY_RETRIEVE = 5, FUSE_NOTIFY_CODE_MAX, }; @@ -579,4 +582,22 @@ struct fuse_notify_store_out { __u32 padding; }; +struct fuse_notify_retrieve_out { + __u64 notify_unique; + __u64 nodeid; + __u64 offset; + __u32 size; + __u32 padding; +}; + +/* Matches the size of fuse_write_in */ +struct fuse_notify_retrieve_in { + __u64 dummy1; + __u64 offset; + __u32 size; + __u32 dummy2; + __u64 dummy3; + __u64 dummy4; +}; + #endif /* _LINUX_FUSE_H */