1
0
Fork 0

Highlights:

- add a new knfsd file cache, so that we don't have to open and
 	  close on each (NFSv2/v3) READ or WRITE.  This can speed up
 	  read and write in some cases.  It also replaces our readahead
 	  cache.
 	- Prevent silent data loss on write errors, by treating write
 	  errors like server reboots for the purposes of write caching,
 	  thus forcing clients to resend their writes.
 	- Tweak the code that allocates sessions to be more forgiving,
 	  so that NFSv4.1 mounts are less likely to hang when a server
 	  already has a lot of clients.
 	- Eliminate an arbitrary limit on NFSv4 ACL sizes; they should
 	  now be limited only by the backend filesystem and the
 	  maximum RPC size.
 	- Allow the server to enforce use of the correct kerberos
 	  credentials when a client reclaims state after a reboot.
 
 And some miscellaneous smaller bugfixes and cleanup.
 -----BEGIN PGP SIGNATURE-----
 
 iQJJBAABCAAzFiEEYtFWavXG9hZotryuJ5vNeUKO4b4FAl2OoFcVHGJmaWVsZHNA
 ZmllbGRzZXMub3JnAAoJECebzXlCjuG+dRoP/3OW1NxPjpjbCQWZL0M+O3AYJJla
 W8E+uoZKMosFEe/ymokMD0Vn5s47jPaMCifMjHZa2GygW8zHN9X2v0HURx/lob+o
 /rJXwMn78N/8kdbfDz2FvaCPeT0IuNzRIFBV8/sSXofqwCBwvPo+cl0QGrd4/xLp
 X35qlupx62TRk+kbdRjvv8kpS5SJ7BvR+FSA1WubNYWw2hpdEsr2OCFdGq2Wvthy
 DK6AfGBXfJGsOE+HGCSj6ejRV6i0UOJ17P8gRSsx+YT0DOe5E7ROjt+qvvRwk489
 wmR8Vjuqr1e40eGAUq3xuLfk5F5NgycY4ekVxk/cTVFNwWcz2DfdjXQUlyPAbrSD
 SqIyxN1qdKT24gtr7AHOXUWJzBYPWDgObCVBXUGzyL81RiDdhf38HRNjL2TcSDld
 tzCjQ0wbPw+iT74v6qQRY05oS+h3JOtDjU4pxsBnxVtNn4WhGJtaLfWW8o1C1QwU
 bc4aX3TlYhDmzU7n7Zjt4rFXGJfyokM+o6tPao1Z60Pmsv1gOk4KQlzLtW/jPHx4
 ZwYTwVQUKRDBfC62nmgqDyGI3/Qu11FuIxL2KXUCgkwDxNWN4YkwYjOGw9Lb5qKM
 wFpxq6CDNZB/IWLEu8Yg85kDPPUJMoI657lZb7Osr/MfBpU0YljcMOIzLBy8uV1u
 9COUbPaQipiWGu/0
 =diBo
 -----END PGP SIGNATURE-----

Merge tag 'nfsd-5.4' of git://linux-nfs.org/~bfields/linux

Pull nfsd updates from Bruce Fields:
 "Highlights:

   - Add a new knfsd file cache, so that we don't have to open and close
     on each (NFSv2/v3) READ or WRITE. This can speed up read and write
     in some cases. It also replaces our readahead cache.

   - Prevent silent data loss on write errors, by treating write errors
     like server reboots for the purposes of write caching, thus forcing
     clients to resend their writes.

   - Tweak the code that allocates sessions to be more forgiving, so
     that NFSv4.1 mounts are less likely to hang when a server already
     has a lot of clients.

   - Eliminate an arbitrary limit on NFSv4 ACL sizes; they should now be
     limited only by the backend filesystem and the maximum RPC size.

   - Allow the server to enforce use of the correct kerberos credentials
     when a client reclaims state after a reboot.

  And some miscellaneous smaller bugfixes and cleanup"

* tag 'nfsd-5.4' of git://linux-nfs.org/~bfields/linux: (34 commits)
  sunrpc: clean up indentation issue
  nfsd: fix nfs read eof detection
  nfsd: Make nfsd_reset_boot_verifier_locked static
  nfsd: degraded slot-count more gracefully as allocation nears exhaustion.
  nfsd: handle drc over-allocation gracefully.
  nfsd: add support for upcall version 2
  nfsd: add a "GetVersion" upcall for nfsdcld
  nfsd: Reset the boot verifier on all write I/O errors
  nfsd: Don't garbage collect files that might contain write errors
  nfsd: Support the server resetting the boot verifier
  nfsd: nfsd_file cache entries should be per net namespace
  nfsd: eliminate an unnecessary acl size limit
  Deprecate nfsd fault injection
  nfsd: remove duplicated include from filecache.c
  nfsd: Fix the documentation for svcxdr_tmpalloc()
  nfsd: Fix up some unused variable warnings
  nfsd: close cached files prior to a REMOVE or RENAME that would replace target
  nfsd: rip out the raparms cache
  nfsd: have nfsd_test_lock use the nfsd_file cache
  nfsd: hook up nfs4_preprocess_stateid_op to the nfsd_file cache
  ...
alistair/sunxi64-5.4-dsi
Linus Torvalds 2019-09-27 17:00:27 -07:00
commit 298fb76a55
40 changed files with 2086 additions and 603 deletions

View File

@ -327,6 +327,7 @@ void flush_delayed_fput(void)
{ {
delayed_fput(NULL); delayed_fput(NULL);
} }
EXPORT_SYMBOL_GPL(flush_delayed_fput);
static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput); static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);

View File

@ -212,6 +212,7 @@ struct file_lock_list_struct {
static DEFINE_PER_CPU(struct file_lock_list_struct, file_lock_list); static DEFINE_PER_CPU(struct file_lock_list_struct, file_lock_list);
DEFINE_STATIC_PERCPU_RWSEM(file_rwsem); DEFINE_STATIC_PERCPU_RWSEM(file_rwsem);
/* /*
* The blocked_hash is used to find POSIX lock loops for deadlock detection. * The blocked_hash is used to find POSIX lock loops for deadlock detection.
* It is protected by blocked_lock_lock. * It is protected by blocked_lock_lock.
@ -1991,6 +1992,64 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
} }
EXPORT_SYMBOL(generic_setlease); EXPORT_SYMBOL(generic_setlease);
#if IS_ENABLED(CONFIG_SRCU)
/*
* Kernel subsystems can register to be notified on any attempt to set
* a new lease with the lease_notifier_chain. This is used by (e.g.) nfsd
* to close files that it may have cached when there is an attempt to set a
* conflicting lease.
*/
static struct srcu_notifier_head lease_notifier_chain;
static inline void
lease_notifier_chain_init(void)
{
srcu_init_notifier_head(&lease_notifier_chain);
}
static inline void
setlease_notifier(long arg, struct file_lock *lease)
{
if (arg != F_UNLCK)
srcu_notifier_call_chain(&lease_notifier_chain, arg, lease);
}
int lease_register_notifier(struct notifier_block *nb)
{
return srcu_notifier_chain_register(&lease_notifier_chain, nb);
}
EXPORT_SYMBOL_GPL(lease_register_notifier);
void lease_unregister_notifier(struct notifier_block *nb)
{
srcu_notifier_chain_unregister(&lease_notifier_chain, nb);
}
EXPORT_SYMBOL_GPL(lease_unregister_notifier);
#else /* !IS_ENABLED(CONFIG_SRCU) */
static inline void
lease_notifier_chain_init(void)
{
}
static inline void
setlease_notifier(long arg, struct file_lock *lease)
{
}
int lease_register_notifier(struct notifier_block *nb)
{
return 0;
}
EXPORT_SYMBOL_GPL(lease_register_notifier);
void lease_unregister_notifier(struct notifier_block *nb)
{
}
EXPORT_SYMBOL_GPL(lease_unregister_notifier);
#endif /* IS_ENABLED(CONFIG_SRCU) */
/** /**
* vfs_setlease - sets a lease on an open file * vfs_setlease - sets a lease on an open file
* @filp: file pointer * @filp: file pointer
@ -2011,6 +2070,8 @@ EXPORT_SYMBOL(generic_setlease);
int int
vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv) vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
{ {
if (lease)
setlease_notifier(arg, *lease);
if (filp->f_op->setlease) if (filp->f_op->setlease)
return filp->f_op->setlease(filp, arg, lease, priv); return filp->f_op->setlease(filp, arg, lease, priv);
else else
@ -2924,6 +2985,7 @@ static int __init filelock_init(void)
INIT_HLIST_HEAD(&fll->hlist); INIT_HLIST_HEAD(&fll->hlist);
} }
lease_notifier_chain_init();
return 0; return 0;
} }
core_initcall(filelock_init); core_initcall(filelock_init);

View File

@ -3,6 +3,7 @@ config NFSD
tristate "NFS server support" tristate "NFS server support"
depends on INET depends on INET
depends on FILE_LOCKING depends on FILE_LOCKING
depends on FSNOTIFY
select LOCKD select LOCKD
select SUNRPC select SUNRPC
select EXPORTFS select EXPORTFS
@ -147,7 +148,7 @@ config NFSD_V4_SECURITY_LABEL
config NFSD_FAULT_INJECTION config NFSD_FAULT_INJECTION
bool "NFS server manual fault injection" bool "NFS server manual fault injection"
depends on NFSD_V4 && DEBUG_KERNEL && DEBUG_FS depends on NFSD_V4 && DEBUG_KERNEL && DEBUG_FS && BROKEN
help help
This option enables support for manually injecting faults This option enables support for manually injecting faults
into the NFS server. This is intended to be used for into the NFS server. This is intended to be used for

View File

@ -11,7 +11,8 @@ obj-$(CONFIG_NFSD) += nfsd.o
nfsd-y += trace.o nfsd-y += trace.o
nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o export.o auth.o lockd.o nfscache.o nfsxdr.o \
stats.o filecache.o
nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o
nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o

View File

@ -39,14 +39,6 @@ struct nfs4_acl;
struct svc_fh; struct svc_fh;
struct svc_rqst; struct svc_rqst;
/*
* Maximum ACL we'll accept from a client; chosen (somewhat
* arbitrarily) so that kmalloc'ing the ACL shouldn't require a
* high-order allocation. This allows 204 ACEs on x86_64:
*/
#define NFS4_ACL_MAX ((PAGE_SIZE - sizeof(struct nfs4_acl)) \
/ sizeof(struct nfs4_ace))
int nfs4_acl_bytes(int entries); int nfs4_acl_bytes(int entries);
int nfs4_acl_get_whotype(char *, u32); int nfs4_acl_get_whotype(char *, u32);
__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who); __be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who);

View File

@ -15,6 +15,7 @@
#include "blocklayoutxdr.h" #include "blocklayoutxdr.h"
#include "pnfs.h" #include "pnfs.h"
#include "filecache.h"
#define NFSDDBG_FACILITY NFSDDBG_PNFS #define NFSDDBG_FACILITY NFSDDBG_PNFS
@ -404,7 +405,7 @@ static void
nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls) nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls)
{ {
struct nfs4_client *clp = ls->ls_stid.sc_client; struct nfs4_client *clp = ls->ls_stid.sc_client;
struct block_device *bdev = ls->ls_file->f_path.mnt->mnt_sb->s_bdev; struct block_device *bdev = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_bdev;
bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY, bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
nfsd4_scsi_pr_key(clp), 0, true); nfsd4_scsi_pr_key(clp), 0, true);

View File

@ -22,6 +22,7 @@
#include "nfsfh.h" #include "nfsfh.h"
#include "netns.h" #include "netns.h"
#include "pnfs.h" #include "pnfs.h"
#include "filecache.h"
#define NFSDDBG_FACILITY NFSDDBG_EXPORT #define NFSDDBG_FACILITY NFSDDBG_EXPORT
@ -232,6 +233,17 @@ static struct cache_head *expkey_alloc(void)
return NULL; return NULL;
} }
static void expkey_flush(void)
{
/*
* Take the nfsd_mutex here to ensure that the file cache is not
* destroyed while we're in the middle of flushing.
*/
mutex_lock(&nfsd_mutex);
nfsd_file_cache_purge(current->nsproxy->net_ns);
mutex_unlock(&nfsd_mutex);
}
static const struct cache_detail svc_expkey_cache_template = { static const struct cache_detail svc_expkey_cache_template = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.hash_size = EXPKEY_HASHMAX, .hash_size = EXPKEY_HASHMAX,
@ -244,6 +256,7 @@ static const struct cache_detail svc_expkey_cache_template = {
.init = expkey_init, .init = expkey_init,
.update = expkey_update, .update = expkey_update,
.alloc = expkey_alloc, .alloc = expkey_alloc,
.flush = expkey_flush,
}; };
static int static int

934
fs/nfsd/filecache.c 100644
View File

@ -0,0 +1,934 @@
/*
* Open file cache.
*
* (c) 2015 - Jeff Layton <jeff.layton@primarydata.com>
*/
#include <linux/hash.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/sched.h>
#include <linux/list_lru.h>
#include <linux/fsnotify_backend.h>
#include <linux/fsnotify.h>
#include <linux/seq_file.h>
#include "vfs.h"
#include "nfsd.h"
#include "nfsfh.h"
#include "netns.h"
#include "filecache.h"
#include "trace.h"
#define NFSDDBG_FACILITY NFSDDBG_FH
/* FIXME: dynamically size this for the machine somehow? */
#define NFSD_FILE_HASH_BITS 12
#define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS)
#define NFSD_LAUNDRETTE_DELAY (2 * HZ)
#define NFSD_FILE_LRU_RESCAN (0)
#define NFSD_FILE_SHUTDOWN (1)
#define NFSD_FILE_LRU_THRESHOLD (4096UL)
#define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2)
/* We only care about NFSD_MAY_READ/WRITE for this cache */
#define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE)
struct nfsd_fcache_bucket {
struct hlist_head nfb_head;
spinlock_t nfb_lock;
unsigned int nfb_count;
unsigned int nfb_maxcount;
};
static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
static struct kmem_cache *nfsd_file_slab;
static struct kmem_cache *nfsd_file_mark_slab;
static struct nfsd_fcache_bucket *nfsd_file_hashtbl;
static struct list_lru nfsd_file_lru;
static long nfsd_file_lru_flags;
static struct fsnotify_group *nfsd_file_fsnotify_group;
static atomic_long_t nfsd_filecache_count;
static struct delayed_work nfsd_filecache_laundrette;
enum nfsd_file_laundrette_ctl {
NFSD_FILE_LAUNDRETTE_NOFLUSH = 0,
NFSD_FILE_LAUNDRETTE_MAY_FLUSH
};
static void
nfsd_file_schedule_laundrette(enum nfsd_file_laundrette_ctl ctl)
{
long count = atomic_long_read(&nfsd_filecache_count);
if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags))
return;
/* Be more aggressive about scanning if over the threshold */
if (count > NFSD_FILE_LRU_THRESHOLD)
mod_delayed_work(system_wq, &nfsd_filecache_laundrette, 0);
else
schedule_delayed_work(&nfsd_filecache_laundrette, NFSD_LAUNDRETTE_DELAY);
if (ctl == NFSD_FILE_LAUNDRETTE_NOFLUSH)
return;
/* ...and don't delay flushing if we're out of control */
if (count >= NFSD_FILE_LRU_LIMIT)
flush_delayed_work(&nfsd_filecache_laundrette);
}
static void
nfsd_file_slab_free(struct rcu_head *rcu)
{
struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu);
put_cred(nf->nf_cred);
kmem_cache_free(nfsd_file_slab, nf);
}
static void
nfsd_file_mark_free(struct fsnotify_mark *mark)
{
struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark,
nfm_mark);
kmem_cache_free(nfsd_file_mark_slab, nfm);
}
static struct nfsd_file_mark *
nfsd_file_mark_get(struct nfsd_file_mark *nfm)
{
if (!atomic_inc_not_zero(&nfm->nfm_ref))
return NULL;
return nfm;
}
static void
nfsd_file_mark_put(struct nfsd_file_mark *nfm)
{
if (atomic_dec_and_test(&nfm->nfm_ref)) {
fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group);
fsnotify_put_mark(&nfm->nfm_mark);
}
}
static struct nfsd_file_mark *
nfsd_file_mark_find_or_create(struct nfsd_file *nf)
{
int err;
struct fsnotify_mark *mark;
struct nfsd_file_mark *nfm = NULL, *new;
struct inode *inode = nf->nf_inode;
do {
mutex_lock(&nfsd_file_fsnotify_group->mark_mutex);
mark = fsnotify_find_mark(&inode->i_fsnotify_marks,
nfsd_file_fsnotify_group);
if (mark) {
nfm = nfsd_file_mark_get(container_of(mark,
struct nfsd_file_mark,
nfm_mark));
mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
fsnotify_put_mark(mark);
if (likely(nfm))
break;
} else
mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
/* allocate a new nfm */
new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL);
if (!new)
return NULL;
fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group);
new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF;
atomic_set(&new->nfm_ref, 1);
err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0);
/*
* If the add was successful, then return the object.
* Otherwise, we need to put the reference we hold on the
* nfm_mark. The fsnotify code will take a reference and put
* it on failure, so we can't just free it directly. It's also
* not safe to call fsnotify_destroy_mark on it as the
* mark->group will be NULL. Thus, we can't let the nfm_ref
* counter drive the destruction at this point.
*/
if (likely(!err))
nfm = new;
else
fsnotify_put_mark(&new->nfm_mark);
} while (unlikely(err == -EEXIST));
return nfm;
}
static struct nfsd_file *
nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
struct net *net)
{
struct nfsd_file *nf;
nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL);
if (nf) {
INIT_HLIST_NODE(&nf->nf_node);
INIT_LIST_HEAD(&nf->nf_lru);
nf->nf_file = NULL;
nf->nf_cred = get_current_cred();
nf->nf_net = net;
nf->nf_flags = 0;
nf->nf_inode = inode;
nf->nf_hashval = hashval;
atomic_set(&nf->nf_ref, 1);
nf->nf_may = may & NFSD_FILE_MAY_MASK;
if (may & NFSD_MAY_NOT_BREAK_LEASE) {
if (may & NFSD_MAY_WRITE)
__set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags);
if (may & NFSD_MAY_READ)
__set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
}
nf->nf_mark = NULL;
trace_nfsd_file_alloc(nf);
}
return nf;
}
static bool
nfsd_file_free(struct nfsd_file *nf)
{
bool flush = false;
trace_nfsd_file_put_final(nf);
if (nf->nf_mark)
nfsd_file_mark_put(nf->nf_mark);
if (nf->nf_file) {
get_file(nf->nf_file);
filp_close(nf->nf_file, NULL);
fput(nf->nf_file);
flush = true;
}
call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
return flush;
}
static bool
nfsd_file_check_writeback(struct nfsd_file *nf)
{
struct file *file = nf->nf_file;
struct address_space *mapping;
if (!file || !(file->f_mode & FMODE_WRITE))
return false;
mapping = file->f_mapping;
return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) ||
mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
}
static int
nfsd_file_check_write_error(struct nfsd_file *nf)
{
struct file *file = nf->nf_file;
if (!file || !(file->f_mode & FMODE_WRITE))
return 0;
return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
}
static bool
nfsd_file_in_use(struct nfsd_file *nf)
{
return nfsd_file_check_writeback(nf) ||
nfsd_file_check_write_error(nf);
}
static void
nfsd_file_do_unhash(struct nfsd_file *nf)
{
lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
trace_nfsd_file_unhash(nf);
if (nfsd_file_check_write_error(nf))
nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id));
--nfsd_file_hashtbl[nf->nf_hashval].nfb_count;
hlist_del_rcu(&nf->nf_node);
if (!list_empty(&nf->nf_lru))
list_lru_del(&nfsd_file_lru, &nf->nf_lru);
atomic_long_dec(&nfsd_filecache_count);
}
static bool
nfsd_file_unhash(struct nfsd_file *nf)
{
if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
nfsd_file_do_unhash(nf);
return true;
}
return false;
}
/*
* Return true if the file was unhashed.
*/
static bool
nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose)
{
lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
trace_nfsd_file_unhash_and_release_locked(nf);
if (!nfsd_file_unhash(nf))
return false;
/* keep final reference for nfsd_file_lru_dispose */
if (atomic_add_unless(&nf->nf_ref, -1, 1))
return true;
list_add(&nf->nf_lru, dispose);
return true;
}
static int
nfsd_file_put_noref(struct nfsd_file *nf)
{
int count;
trace_nfsd_file_put(nf);
count = atomic_dec_return(&nf->nf_ref);
if (!count) {
WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags));
nfsd_file_free(nf);
}
return count;
}
void
nfsd_file_put(struct nfsd_file *nf)
{
bool is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0;
bool unused = !nfsd_file_in_use(nf);
set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
if (nfsd_file_put_noref(nf) == 1 && is_hashed && unused)
nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_MAY_FLUSH);
}
struct nfsd_file *
nfsd_file_get(struct nfsd_file *nf)
{
if (likely(atomic_inc_not_zero(&nf->nf_ref)))
return nf;
return NULL;
}
static void
nfsd_file_dispose_list(struct list_head *dispose)
{
struct nfsd_file *nf;
while(!list_empty(dispose)) {
nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
list_del(&nf->nf_lru);
nfsd_file_put_noref(nf);
}
}
static void
nfsd_file_dispose_list_sync(struct list_head *dispose)
{
bool flush = false;
struct nfsd_file *nf;
while(!list_empty(dispose)) {
nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
list_del(&nf->nf_lru);
if (!atomic_dec_and_test(&nf->nf_ref))
continue;
if (nfsd_file_free(nf))
flush = true;
}
if (flush)
flush_delayed_fput();
}
/*
* Note this can deadlock with nfsd_file_cache_purge.
*/
static enum lru_status
nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
spinlock_t *lock, void *arg)
__releases(lock)
__acquires(lock)
{
struct list_head *head = arg;
struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru);
/*
* Do a lockless refcount check. The hashtable holds one reference, so
* we look to see if anything else has a reference, or if any have
* been put since the shrinker last ran. Those don't get unhashed and
* released.
*
* Note that in the put path, we set the flag and then decrement the
* counter. Here we check the counter and then test and clear the flag.
* That order is deliberate to ensure that we can do this locklessly.
*/
if (atomic_read(&nf->nf_ref) > 1)
goto out_skip;
/*
* Don't throw out files that are still undergoing I/O or
* that have uncleared errors pending.
*/
if (nfsd_file_check_writeback(nf))
goto out_skip;
if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags))
goto out_rescan;
if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags))
goto out_skip;
list_lru_isolate_move(lru, &nf->nf_lru, head);
return LRU_REMOVED;
out_rescan:
set_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags);
out_skip:
return LRU_SKIP;
}
static void
nfsd_file_lru_dispose(struct list_head *head)
{
while(!list_empty(head)) {
struct nfsd_file *nf = list_first_entry(head,
struct nfsd_file, nf_lru);
list_del_init(&nf->nf_lru);
spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
nfsd_file_do_unhash(nf);
spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
nfsd_file_put_noref(nf);
}
}
static unsigned long
nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc)
{
return list_lru_count(&nfsd_file_lru);
}
static unsigned long
nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
{
LIST_HEAD(head);
unsigned long ret;
ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &head);
nfsd_file_lru_dispose(&head);
return ret;
}
static struct shrinker nfsd_file_shrinker = {
.scan_objects = nfsd_file_lru_scan,
.count_objects = nfsd_file_lru_count,
.seeks = 1,
};
static void
__nfsd_file_close_inode(struct inode *inode, unsigned int hashval,
struct list_head *dispose)
{
struct nfsd_file *nf;
struct hlist_node *tmp;
spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) {
if (inode == nf->nf_inode)
nfsd_file_unhash_and_release_locked(nf, dispose);
}
spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
}
/**
* nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
* @inode: inode of the file to attempt to remove
*
* Walk the whole hash bucket, looking for any files that correspond to "inode".
* If any do, then unhash them and put the hashtable reference to them and
* destroy any that had their last reference put. Also ensure that any of the
* fputs also have their final __fput done as well.
*/
void
nfsd_file_close_inode_sync(struct inode *inode)
{
unsigned int hashval = (unsigned int)hash_long(inode->i_ino,
NFSD_FILE_HASH_BITS);
LIST_HEAD(dispose);
__nfsd_file_close_inode(inode, hashval, &dispose);
trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose));
nfsd_file_dispose_list_sync(&dispose);
}
/**
* nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
* @inode: inode of the file to attempt to remove
*
* Walk the whole hash bucket, looking for any files that correspond to "inode".
* If any do, then unhash them and put the hashtable reference to them and
* destroy any that had their last reference put.
*/
static void
nfsd_file_close_inode(struct inode *inode)
{
unsigned int hashval = (unsigned int)hash_long(inode->i_ino,
NFSD_FILE_HASH_BITS);
LIST_HEAD(dispose);
__nfsd_file_close_inode(inode, hashval, &dispose);
trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose));
nfsd_file_dispose_list(&dispose);
}
/**
* nfsd_file_delayed_close - close unused nfsd_files
* @work: dummy
*
* Walk the LRU list and close any entries that have not been used since
* the last scan.
*
* Note this can deadlock with nfsd_file_cache_purge.
*/
static void
nfsd_file_delayed_close(struct work_struct *work)
{
LIST_HEAD(head);
list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &head, LONG_MAX);
if (test_and_clear_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags))
nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_NOFLUSH);
if (!list_empty(&head)) {
nfsd_file_lru_dispose(&head);
flush_delayed_fput();
}
}
static int
nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg,
void *data)
{
struct file_lock *fl = data;
/* Only close files for F_SETLEASE leases */
if (fl->fl_flags & FL_LEASE)
nfsd_file_close_inode_sync(file_inode(fl->fl_file));
return 0;
}
static struct notifier_block nfsd_file_lease_notifier = {
.notifier_call = nfsd_file_lease_notifier_call,
};
static int
nfsd_file_fsnotify_handle_event(struct fsnotify_group *group,
struct inode *inode,
u32 mask, const void *data, int data_type,
const struct qstr *file_name, u32 cookie,
struct fsnotify_iter_info *iter_info)
{
trace_nfsd_file_fsnotify_handle_event(inode, mask);
/* Should be no marks on non-regular files */
if (!S_ISREG(inode->i_mode)) {
WARN_ON_ONCE(1);
return 0;
}
/* don't close files if this was not the last link */
if (mask & FS_ATTRIB) {
if (inode->i_nlink)
return 0;
}
nfsd_file_close_inode(inode);
return 0;
}
static const struct fsnotify_ops nfsd_file_fsnotify_ops = {
.handle_event = nfsd_file_fsnotify_handle_event,
.free_mark = nfsd_file_mark_free,
};
int
nfsd_file_cache_init(void)
{
int ret = -ENOMEM;
unsigned int i;
clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
if (nfsd_file_hashtbl)
return 0;
nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE,
sizeof(*nfsd_file_hashtbl), GFP_KERNEL);
if (!nfsd_file_hashtbl) {
pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n");
goto out_err;
}
nfsd_file_slab = kmem_cache_create("nfsd_file",
sizeof(struct nfsd_file), 0, 0, NULL);
if (!nfsd_file_slab) {
pr_err("nfsd: unable to create nfsd_file_slab\n");
goto out_err;
}
nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark",
sizeof(struct nfsd_file_mark), 0, 0, NULL);
if (!nfsd_file_mark_slab) {
pr_err("nfsd: unable to create nfsd_file_mark_slab\n");
goto out_err;
}
ret = list_lru_init(&nfsd_file_lru);
if (ret) {
pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret);
goto out_err;
}
ret = register_shrinker(&nfsd_file_shrinker);
if (ret) {
pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret);
goto out_lru;
}
ret = lease_register_notifier(&nfsd_file_lease_notifier);
if (ret) {
pr_err("nfsd: unable to register lease notifier: %d\n", ret);
goto out_shrinker;
}
nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops);
if (IS_ERR(nfsd_file_fsnotify_group)) {
pr_err("nfsd: unable to create fsnotify group: %ld\n",
PTR_ERR(nfsd_file_fsnotify_group));
nfsd_file_fsnotify_group = NULL;
goto out_notifier;
}
for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head);
spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock);
}
INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_delayed_close);
out:
return ret;
out_notifier:
lease_unregister_notifier(&nfsd_file_lease_notifier);
out_shrinker:
unregister_shrinker(&nfsd_file_shrinker);
out_lru:
list_lru_destroy(&nfsd_file_lru);
out_err:
kmem_cache_destroy(nfsd_file_slab);
nfsd_file_slab = NULL;
kmem_cache_destroy(nfsd_file_mark_slab);
nfsd_file_mark_slab = NULL;
kfree(nfsd_file_hashtbl);
nfsd_file_hashtbl = NULL;
goto out;
}
/*
* Note this can deadlock with nfsd_file_lru_cb.
*/
void
nfsd_file_cache_purge(struct net *net)
{
unsigned int i;
struct nfsd_file *nf;
struct hlist_node *next;
LIST_HEAD(dispose);
bool del;
if (!nfsd_file_hashtbl)
return;
for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i];
spin_lock(&nfb->nfb_lock);
hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) {
if (net && nf->nf_net != net)
continue;
del = nfsd_file_unhash_and_release_locked(nf, &dispose);
/*
* Deadlock detected! Something marked this entry as
* unhased, but hasn't removed it from the hash list.
*/
WARN_ON_ONCE(!del);
}
spin_unlock(&nfb->nfb_lock);
nfsd_file_dispose_list(&dispose);
}
}
void
nfsd_file_cache_shutdown(void)
{
LIST_HEAD(dispose);
set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
lease_unregister_notifier(&nfsd_file_lease_notifier);
unregister_shrinker(&nfsd_file_shrinker);
/*
* make sure all callers of nfsd_file_lru_cb are done before
* calling nfsd_file_cache_purge
*/
cancel_delayed_work_sync(&nfsd_filecache_laundrette);
nfsd_file_cache_purge(NULL);
list_lru_destroy(&nfsd_file_lru);
rcu_barrier();
fsnotify_put_group(nfsd_file_fsnotify_group);
nfsd_file_fsnotify_group = NULL;
kmem_cache_destroy(nfsd_file_slab);
nfsd_file_slab = NULL;
fsnotify_wait_marks_destroyed();
kmem_cache_destroy(nfsd_file_mark_slab);
nfsd_file_mark_slab = NULL;
kfree(nfsd_file_hashtbl);
nfsd_file_hashtbl = NULL;
}
static bool
nfsd_match_cred(const struct cred *c1, const struct cred *c2)
{
int i;
if (!uid_eq(c1->fsuid, c2->fsuid))
return false;
if (!gid_eq(c1->fsgid, c2->fsgid))
return false;
if (c1->group_info == NULL || c2->group_info == NULL)
return c1->group_info == c2->group_info;
if (c1->group_info->ngroups != c2->group_info->ngroups)
return false;
for (i = 0; i < c1->group_info->ngroups; i++) {
if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i]))
return false;
}
return true;
}
static struct nfsd_file *
nfsd_file_find_locked(struct inode *inode, unsigned int may_flags,
unsigned int hashval, struct net *net)
{
struct nfsd_file *nf;
unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
nf_node) {
if ((need & nf->nf_may) != need)
continue;
if (nf->nf_inode != inode)
continue;
if (nf->nf_net != net)
continue;
if (!nfsd_match_cred(nf->nf_cred, current_cred()))
continue;
if (nfsd_file_get(nf) != NULL)
return nf;
}
return NULL;
}
/**
* nfsd_file_is_cached - are there any cached open files for this fh?
* @inode: inode of the file to check
*
* Scan the hashtable for open files that match this fh. Returns true if there
* are any, and false if not.
*/
bool
nfsd_file_is_cached(struct inode *inode)
{
bool ret = false;
struct nfsd_file *nf;
unsigned int hashval;
hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
rcu_read_lock();
hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
nf_node) {
if (inode == nf->nf_inode) {
ret = true;
break;
}
}
rcu_read_unlock();
trace_nfsd_file_is_cached(inode, hashval, (int)ret);
return ret;
}
__be32
nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **pnf)
{
__be32 status;
struct net *net = SVC_NET(rqstp);
struct nfsd_file *nf, *new;
struct inode *inode;
unsigned int hashval;
/* FIXME: skip this if fh_dentry is already set? */
status = fh_verify(rqstp, fhp, S_IFREG,
may_flags|NFSD_MAY_OWNER_OVERRIDE);
if (status != nfs_ok)
return status;
inode = d_inode(fhp->fh_dentry);
hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
retry:
rcu_read_lock();
nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
rcu_read_unlock();
if (nf)
goto wait_for_construction;
new = nfsd_file_alloc(inode, may_flags, hashval, net);
if (!new) {
trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags,
NULL, nfserr_jukebox);
return nfserr_jukebox;
}
spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
if (nf == NULL)
goto open_file;
spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
nfsd_file_slab_free(&new->nf_rcu);
wait_for_construction:
wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE);
/* Did construction of this file fail? */
if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
nfsd_file_put_noref(nf);
goto retry;
}
this_cpu_inc(nfsd_file_cache_hits);
if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) {
bool write = (may_flags & NFSD_MAY_WRITE);
if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) ||
(test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) {
status = nfserrno(nfsd_open_break_lease(
file_inode(nf->nf_file), may_flags));
if (status == nfs_ok) {
clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
if (write)
clear_bit(NFSD_FILE_BREAK_WRITE,
&nf->nf_flags);
}
}
}
out:
if (status == nfs_ok) {
*pnf = nf;
} else {
nfsd_file_put(nf);
nf = NULL;
}
trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status);
return status;
open_file:
nf = new;
/* Take reference for the hashtable */
atomic_inc(&nf->nf_ref);
__set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
__set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
list_lru_add(&nfsd_file_lru, &nf->nf_lru);
hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head);
++nfsd_file_hashtbl[hashval].nfb_count;
nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount,
nfsd_file_hashtbl[hashval].nfb_count);
spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
atomic_long_inc(&nfsd_filecache_count);
nf->nf_mark = nfsd_file_mark_find_or_create(nf);
if (nf->nf_mark)
status = nfsd_open_verified(rqstp, fhp, S_IFREG,
may_flags, &nf->nf_file);
else
status = nfserr_jukebox;
/*
* If construction failed, or we raced with a call to unlink()
* then unhash.
*/
if (status != nfs_ok || inode->i_nlink == 0) {
bool do_free;
spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
do_free = nfsd_file_unhash(nf);
spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
if (do_free)
nfsd_file_put_noref(nf);
}
clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags);
smp_mb__after_atomic();
wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
goto out;
}
/*
* Note that fields may be added, removed or reordered in the future. Programs
* scraping this file for info should test the labels to ensure they're
* getting the correct field.
*/
static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
{
unsigned int i, count = 0, longest = 0;
unsigned long hits = 0;
/*
* No need for spinlocks here since we're not terribly interested in
* accuracy. We do take the nfsd_mutex simply to ensure that we
* don't end up racing with server shutdown
*/
mutex_lock(&nfsd_mutex);
if (nfsd_file_hashtbl) {
for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
count += nfsd_file_hashtbl[i].nfb_count;
longest = max(longest, nfsd_file_hashtbl[i].nfb_count);
}
}
mutex_unlock(&nfsd_mutex);
for_each_possible_cpu(i)
hits += per_cpu(nfsd_file_cache_hits, i);
seq_printf(m, "total entries: %u\n", count);
seq_printf(m, "longest chain: %u\n", longest);
seq_printf(m, "cache hits: %lu\n", hits);
return 0;
}
int nfsd_file_cache_stats_open(struct inode *inode, struct file *file)
{
return single_open(file, nfsd_file_cache_stats_show, NULL);
}

View File

@ -0,0 +1,61 @@
#ifndef _FS_NFSD_FILECACHE_H
#define _FS_NFSD_FILECACHE_H
#include <linux/fsnotify_backend.h>
/*
* This is the fsnotify_mark container that nfsd attaches to the files that it
* is holding open. Note that we have a separate refcount here aside from the
* one in the fsnotify_mark. We only want a single fsnotify_mark attached to
* the inode, and for each nfsd_file to hold a reference to it.
*
* The fsnotify_mark is itself refcounted, but that's not sufficient to tell us
* how to put that reference. If there are still outstanding nfsd_files that
* reference the mark, then we would want to call fsnotify_put_mark on it.
* If there were not, then we'd need to call fsnotify_destroy_mark. Since we
* can't really tell the difference, we use the nfm_mark to keep track of how
* many nfsd_files hold references to the mark. When that counter goes to zero
* then we know to call fsnotify_destroy_mark on it.
*/
struct nfsd_file_mark {
struct fsnotify_mark nfm_mark;
atomic_t nfm_ref;
};
/*
* A representation of a file that has been opened by knfsd. These are hashed
* in the hashtable by inode pointer value. Note that this object doesn't
* hold a reference to the inode by itself, so the nf_inode pointer should
* never be dereferenced, only used for comparison.
*/
struct nfsd_file {
struct hlist_node nf_node;
struct list_head nf_lru;
struct rcu_head nf_rcu;
struct file *nf_file;
const struct cred *nf_cred;
struct net *nf_net;
#define NFSD_FILE_HASHED (0)
#define NFSD_FILE_PENDING (1)
#define NFSD_FILE_BREAK_READ (2)
#define NFSD_FILE_BREAK_WRITE (3)
#define NFSD_FILE_REFERENCED (4)
unsigned long nf_flags;
struct inode *nf_inode;
unsigned int nf_hashval;
atomic_t nf_ref;
unsigned char nf_may;
struct nfsd_file_mark *nf_mark;
};
int nfsd_file_cache_init(void);
void nfsd_file_cache_purge(struct net *);
void nfsd_file_cache_shutdown(void);
void nfsd_file_put(struct nfsd_file *nf);
struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
void nfsd_file_close_inode_sync(struct inode *inode);
bool nfsd_file_is_cached(struct inode *inode);
__be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **nfp);
int nfsd_file_cache_stats_open(struct inode *, struct file *);
#endif /* _FS_NFSD_FILECACHE_H */

View File

@ -104,6 +104,7 @@ struct nfsd_net {
/* Time of server startup */ /* Time of server startup */
struct timespec64 nfssvc_boot; struct timespec64 nfssvc_boot;
seqlock_t boot_lock;
/* /*
* Max number of connections this nfsd container will allow. Defaults * Max number of connections this nfsd container will allow. Defaults
@ -179,4 +180,7 @@ struct nfsd_net {
extern void nfsd_netns_free_versions(struct nfsd_net *nn); extern void nfsd_netns_free_versions(struct nfsd_net *nn);
extern unsigned int nfsd_net_id; extern unsigned int nfsd_net_id;
void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn);
void nfsd_reset_boot_verifier(struct nfsd_net *nn);
#endif /* __NFSD_NETNS_H__ */ #endif /* __NFSD_NETNS_H__ */

View File

@ -172,13 +172,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
nfserr = nfsd_read(rqstp, &resp->fh, nfserr = nfsd_read(rqstp, &resp->fh,
argp->offset, argp->offset,
rqstp->rq_vec, argp->vlen, rqstp->rq_vec, argp->vlen,
&resp->count); &resp->count,
if (nfserr == 0) { &resp->eof);
struct inode *inode = d_inode(resp->fh.fh_dentry);
resp->eof = nfsd_eof_on_read(cnt, resp->count, argp->offset,
inode->i_size);
}
RETURN_STATUS(nfserr); RETURN_STATUS(nfserr);
} }

View File

@ -27,6 +27,7 @@ static u32 nfs3_ftypes[] = {
NF3SOCK, NF3BAD, NF3LNK, NF3BAD, NF3SOCK, NF3BAD, NF3LNK, NF3BAD,
}; };
/* /*
* XDR functions for basic NFS types * XDR functions for basic NFS types
*/ */
@ -751,14 +752,16 @@ nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p)
{ {
struct nfsd3_writeres *resp = rqstp->rq_resp; struct nfsd3_writeres *resp = rqstp->rq_resp;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
__be32 verf[2];
p = encode_wcc_data(rqstp, p, &resp->fh); p = encode_wcc_data(rqstp, p, &resp->fh);
if (resp->status == 0) { if (resp->status == 0) {
*p++ = htonl(resp->count); *p++ = htonl(resp->count);
*p++ = htonl(resp->committed); *p++ = htonl(resp->committed);
/* unique identifier, y2038 overflow can be ignored */ /* unique identifier, y2038 overflow can be ignored */
*p++ = htonl((u32)nn->nfssvc_boot.tv_sec); nfsd_copy_boot_verifier(verf, nn);
*p++ = htonl(nn->nfssvc_boot.tv_nsec); *p++ = verf[0];
*p++ = verf[1];
} }
return xdr_ressize_check(rqstp, p); return xdr_ressize_check(rqstp, p);
} }
@ -1125,13 +1128,15 @@ nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p)
{ {
struct nfsd3_commitres *resp = rqstp->rq_resp; struct nfsd3_commitres *resp = rqstp->rq_resp;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
__be32 verf[2];
p = encode_wcc_data(rqstp, p, &resp->fh); p = encode_wcc_data(rqstp, p, &resp->fh);
/* Write verifier */ /* Write verifier */
if (resp->status == 0) { if (resp->status == 0) {
/* unique identifier, y2038 overflow can be ignored */ /* unique identifier, y2038 overflow can be ignored */
*p++ = htonl((u32)nn->nfssvc_boot.tv_sec); nfsd_copy_boot_verifier(verf, nn);
*p++ = htonl(nn->nfssvc_boot.tv_nsec); *p++ = verf[0];
*p++ = verf[1];
} }
return xdr_ressize_check(rqstp, p); return xdr_ressize_check(rqstp, p);
} }

View File

@ -512,11 +512,9 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
if (unlikely(status)) if (unlikely(status))
return status; return status;
if (cb != NULL) { status = decode_cb_sequence4res(xdr, cb);
status = decode_cb_sequence4res(xdr, cb); if (unlikely(status || cb->cb_seq_status))
if (unlikely(status || cb->cb_seq_status)) return status;
return status;
}
return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status); return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
} }
@ -604,11 +602,10 @@ static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp,
if (unlikely(status)) if (unlikely(status))
return status; return status;
if (cb) { status = decode_cb_sequence4res(xdr, cb);
status = decode_cb_sequence4res(xdr, cb); if (unlikely(status || cb->cb_seq_status))
if (unlikely(status || cb->cb_seq_status)) return status;
return status;
}
return decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &cb->cb_status); return decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &cb->cb_status);
} }
#endif /* CONFIG_NFSD_PNFS */ #endif /* CONFIG_NFSD_PNFS */
@ -663,11 +660,10 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
if (unlikely(status)) if (unlikely(status))
return status; return status;
if (cb) { status = decode_cb_sequence4res(xdr, cb);
status = decode_cb_sequence4res(xdr, cb); if (unlikely(status || cb->cb_seq_status))
if (unlikely(status || cb->cb_seq_status)) return status;
return status;
}
return decode_cb_op_status(xdr, OP_CB_NOTIFY_LOCK, &cb->cb_status); return decode_cb_op_status(xdr, OP_CB_NOTIFY_LOCK, &cb->cb_status);
} }
@ -759,11 +755,10 @@ static int nfs4_xdr_dec_cb_offload(struct rpc_rqst *rqstp,
if (unlikely(status)) if (unlikely(status))
return status; return status;
if (cb) { status = decode_cb_sequence4res(xdr, cb);
status = decode_cb_sequence4res(xdr, cb); if (unlikely(status || cb->cb_seq_status))
if (unlikely(status || cb->cb_seq_status)) return status;
return status;
}
return decode_cb_op_status(xdr, OP_CB_OFFLOAD, &cb->cb_status); return decode_cb_op_status(xdr, OP_CB_OFFLOAD, &cb->cb_status);
} }
/* /*

View File

@ -169,8 +169,8 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid)
spin_unlock(&fp->fi_lock); spin_unlock(&fp->fi_lock);
if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls) if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
vfs_setlease(ls->ls_file, F_UNLCK, NULL, (void **)&ls); vfs_setlease(ls->ls_file->nf_file, F_UNLCK, NULL, (void **)&ls);
fput(ls->ls_file); nfsd_file_put(ls->ls_file);
if (ls->ls_recalled) if (ls->ls_recalled)
atomic_dec(&ls->ls_stid.sc_file->fi_lo_recalls); atomic_dec(&ls->ls_stid.sc_file->fi_lo_recalls);
@ -197,7 +197,7 @@ nfsd4_layout_setlease(struct nfs4_layout_stateid *ls)
fl->fl_end = OFFSET_MAX; fl->fl_end = OFFSET_MAX;
fl->fl_owner = ls; fl->fl_owner = ls;
fl->fl_pid = current->tgid; fl->fl_pid = current->tgid;
fl->fl_file = ls->ls_file; fl->fl_file = ls->ls_file->nf_file;
status = vfs_setlease(fl->fl_file, fl->fl_type, &fl, NULL); status = vfs_setlease(fl->fl_file, fl->fl_type, &fl, NULL);
if (status) { if (status) {
@ -236,13 +236,13 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
NFSPROC4_CLNT_CB_LAYOUT); NFSPROC4_CLNT_CB_LAYOUT);
if (parent->sc_type == NFS4_DELEG_STID) if (parent->sc_type == NFS4_DELEG_STID)
ls->ls_file = get_file(fp->fi_deleg_file); ls->ls_file = nfsd_file_get(fp->fi_deleg_file);
else else
ls->ls_file = find_any_file(fp); ls->ls_file = find_any_file(fp);
BUG_ON(!ls->ls_file); BUG_ON(!ls->ls_file);
if (nfsd4_layout_setlease(ls)) { if (nfsd4_layout_setlease(ls)) {
fput(ls->ls_file); nfsd_file_put(ls->ls_file);
put_nfs4_file(fp); put_nfs4_file(fp);
kmem_cache_free(nfs4_layout_stateid_cache, ls); kmem_cache_free(nfs4_layout_stateid_cache, ls);
return NULL; return NULL;
@ -626,7 +626,7 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
argv[0] = (char *)nfsd_recall_failed; argv[0] = (char *)nfsd_recall_failed;
argv[1] = addr_str; argv[1] = addr_str;
argv[2] = ls->ls_file->f_path.mnt->mnt_sb->s_id; argv[2] = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_id;
argv[3] = NULL; argv[3] = NULL;
error = call_usermodehelper(nfsd_recall_failed, argv, envp, error = call_usermodehelper(nfsd_recall_failed, argv, envp,

View File

@ -568,17 +568,11 @@ nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net) static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
{ {
__be32 verf[2]; __be32 *verf = (__be32 *)verifier->data;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
/* BUILD_BUG_ON(2*sizeof(*verf) != sizeof(verifier->data));
* This is opaque to client, so no need to byte-swap. Use
* __force to keep sparse happy. y2038 time_t overflow is nfsd_copy_boot_verifier(verf, net_generic(net, nfsd_net_id));
* irrelevant in this usage.
*/
verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec;
memcpy(verifier->data, verf, sizeof(verifier->data));
} }
static __be32 static __be32
@ -761,7 +755,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_read *read = &u->read; struct nfsd4_read *read = &u->read;
__be32 status; __be32 status;
read->rd_filp = NULL; read->rd_nf = NULL;
if (read->rd_offset >= OFFSET_MAX) if (read->rd_offset >= OFFSET_MAX)
return nfserr_inval; return nfserr_inval;
@ -782,7 +776,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
/* check stateid */ /* check stateid */
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&read->rd_stateid, RD_STATE, &read->rd_stateid, RD_STATE,
&read->rd_filp, &read->rd_tmp_file); &read->rd_nf);
if (status) { if (status) {
dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
goto out; goto out;
@ -798,8 +792,8 @@ out:
static void static void
nfsd4_read_release(union nfsd4_op_u *u) nfsd4_read_release(union nfsd4_op_u *u)
{ {
if (u->read.rd_filp) if (u->read.rd_nf)
fput(u->read.rd_filp); nfsd_file_put(u->read.rd_nf);
trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
u->read.rd_offset, u->read.rd_length); u->read.rd_offset, u->read.rd_length);
} }
@ -954,7 +948,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
status = nfs4_preprocess_stateid_op(rqstp, cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate,
&cstate->current_fh, &setattr->sa_stateid, &cstate->current_fh, &setattr->sa_stateid,
WR_STATE, NULL, NULL); WR_STATE, NULL);
if (status) { if (status) {
dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
return status; return status;
@ -993,7 +987,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
{ {
struct nfsd4_write *write = &u->write; struct nfsd4_write *write = &u->write;
stateid_t *stateid = &write->wr_stateid; stateid_t *stateid = &write->wr_stateid;
struct file *filp = NULL; struct nfsd_file *nf = NULL;
__be32 status = nfs_ok; __be32 status = nfs_ok;
unsigned long cnt; unsigned long cnt;
int nvecs; int nvecs;
@ -1005,7 +999,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
trace_nfsd_write_start(rqstp, &cstate->current_fh, trace_nfsd_write_start(rqstp, &cstate->current_fh,
write->wr_offset, cnt); write->wr_offset, cnt);
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
stateid, WR_STATE, &filp, NULL); stateid, WR_STATE, &nf);
if (status) { if (status) {
dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
return status; return status;
@ -1018,10 +1012,10 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
&write->wr_head, write->wr_buflen); &write->wr_head, write->wr_buflen);
WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec)); WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp, status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf->nf_file,
write->wr_offset, rqstp->rq_vec, nvecs, &cnt, write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
write->wr_how_written); write->wr_how_written);
fput(filp); nfsd_file_put(nf);
write->wr_bytes_written = cnt; write->wr_bytes_written = cnt;
trace_nfsd_write_done(rqstp, &cstate->current_fh, trace_nfsd_write_done(rqstp, &cstate->current_fh,
@ -1031,8 +1025,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
static __be32 static __be32
nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
stateid_t *src_stateid, struct file **src, stateid_t *src_stateid, struct nfsd_file **src,
stateid_t *dst_stateid, struct file **dst) stateid_t *dst_stateid, struct nfsd_file **dst)
{ {
__be32 status; __be32 status;
@ -1040,22 +1034,22 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return nfserr_nofilehandle; return nfserr_nofilehandle;
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
src_stateid, RD_STATE, src, NULL); src_stateid, RD_STATE, src);
if (status) { if (status) {
dprintk("NFSD: %s: couldn't process src stateid!\n", __func__); dprintk("NFSD: %s: couldn't process src stateid!\n", __func__);
goto out; goto out;
} }
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
dst_stateid, WR_STATE, dst, NULL); dst_stateid, WR_STATE, dst);
if (status) { if (status) {
dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__); dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
goto out_put_src; goto out_put_src;
} }
/* fix up for NFS-specific error code */ /* fix up for NFS-specific error code */
if (!S_ISREG(file_inode(*src)->i_mode) || if (!S_ISREG(file_inode((*src)->nf_file)->i_mode) ||
!S_ISREG(file_inode(*dst)->i_mode)) { !S_ISREG(file_inode((*dst)->nf_file)->i_mode)) {
status = nfserr_wrong_type; status = nfserr_wrong_type;
goto out_put_dst; goto out_put_dst;
} }
@ -1063,9 +1057,9 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
out: out:
return status; return status;
out_put_dst: out_put_dst:
fput(*dst); nfsd_file_put(*dst);
out_put_src: out_put_src:
fput(*src); nfsd_file_put(*src);
goto out; goto out;
} }
@ -1074,7 +1068,7 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u) union nfsd4_op_u *u)
{ {
struct nfsd4_clone *clone = &u->clone; struct nfsd4_clone *clone = &u->clone;
struct file *src, *dst; struct nfsd_file *src, *dst;
__be32 status; __be32 status;
status = nfsd4_verify_copy(rqstp, cstate, &clone->cl_src_stateid, &src, status = nfsd4_verify_copy(rqstp, cstate, &clone->cl_src_stateid, &src,
@ -1082,11 +1076,11 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status) if (status)
goto out; goto out;
status = nfsd4_clone_file_range(src, clone->cl_src_pos, status = nfsd4_clone_file_range(src->nf_file, clone->cl_src_pos,
dst, clone->cl_dst_pos, clone->cl_count); dst->nf_file, clone->cl_dst_pos, clone->cl_count);
fput(dst); nfsd_file_put(dst);
fput(src); nfsd_file_put(src);
out: out:
return status; return status;
} }
@ -1176,8 +1170,9 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
do { do {
if (kthread_should_stop()) if (kthread_should_stop())
break; break;
bytes_copied = nfsd_copy_file_range(copy->file_src, src_pos, bytes_copied = nfsd_copy_file_range(copy->nf_src->nf_file,
copy->file_dst, dst_pos, bytes_total); src_pos, copy->nf_dst->nf_file, dst_pos,
bytes_total);
if (bytes_copied <= 0) if (bytes_copied <= 0)
break; break;
bytes_total -= bytes_copied; bytes_total -= bytes_copied;
@ -1204,8 +1199,8 @@ static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync)
status = nfs_ok; status = nfs_ok;
} }
fput(copy->file_src); nfsd_file_put(copy->nf_src);
fput(copy->file_dst); nfsd_file_put(copy->nf_dst);
return status; return status;
} }
@ -1218,16 +1213,16 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
memcpy(&dst->cp_res, &src->cp_res, sizeof(src->cp_res)); memcpy(&dst->cp_res, &src->cp_res, sizeof(src->cp_res));
memcpy(&dst->fh, &src->fh, sizeof(src->fh)); memcpy(&dst->fh, &src->fh, sizeof(src->fh));
dst->cp_clp = src->cp_clp; dst->cp_clp = src->cp_clp;
dst->file_dst = get_file(src->file_dst); dst->nf_dst = nfsd_file_get(src->nf_dst);
dst->file_src = get_file(src->file_src); dst->nf_src = nfsd_file_get(src->nf_src);
memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid)); memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid));
} }
static void cleanup_async_copy(struct nfsd4_copy *copy) static void cleanup_async_copy(struct nfsd4_copy *copy)
{ {
nfs4_free_cp_state(copy); nfs4_free_cp_state(copy);
fput(copy->file_dst); nfsd_file_put(copy->nf_dst);
fput(copy->file_src); nfsd_file_put(copy->nf_src);
spin_lock(&copy->cp_clp->async_lock); spin_lock(&copy->cp_clp->async_lock);
list_del(&copy->copies); list_del(&copy->copies);
spin_unlock(&copy->cp_clp->async_lock); spin_unlock(&copy->cp_clp->async_lock);
@ -1264,8 +1259,8 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_copy *async_copy = NULL; struct nfsd4_copy *async_copy = NULL;
status = nfsd4_verify_copy(rqstp, cstate, &copy->cp_src_stateid, status = nfsd4_verify_copy(rqstp, cstate, &copy->cp_src_stateid,
&copy->file_src, &copy->cp_dst_stateid, &copy->nf_src, &copy->cp_dst_stateid,
&copy->file_dst); &copy->nf_dst);
if (status) if (status)
goto out; goto out;
@ -1347,21 +1342,21 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_fallocate *fallocate, int flags) struct nfsd4_fallocate *fallocate, int flags)
{ {
__be32 status; __be32 status;
struct file *file; struct nfsd_file *nf;
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&fallocate->falloc_stateid, &fallocate->falloc_stateid,
WR_STATE, &file, NULL); WR_STATE, &nf);
if (status != nfs_ok) { if (status != nfs_ok) {
dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n"); dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n");
return status; return status;
} }
status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, file, status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, nf->nf_file,
fallocate->falloc_offset, fallocate->falloc_offset,
fallocate->falloc_length, fallocate->falloc_length,
flags); flags);
fput(file); nfsd_file_put(nf);
return status; return status;
} }
static __be32 static __be32
@ -1406,11 +1401,11 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_seek *seek = &u->seek; struct nfsd4_seek *seek = &u->seek;
int whence; int whence;
__be32 status; __be32 status;
struct file *file; struct nfsd_file *nf;
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&seek->seek_stateid, &seek->seek_stateid,
RD_STATE, &file, NULL); RD_STATE, &nf);
if (status) { if (status) {
dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n"); dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
return status; return status;
@ -1432,14 +1427,14 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
* Note: This call does change file->f_pos, but nothing in NFSD * Note: This call does change file->f_pos, but nothing in NFSD
* should ever file->f_pos. * should ever file->f_pos.
*/ */
seek->seek_pos = vfs_llseek(file, seek->seek_offset, whence); seek->seek_pos = vfs_llseek(nf->nf_file, seek->seek_offset, whence);
if (seek->seek_pos < 0) if (seek->seek_pos < 0)
status = nfserrno(seek->seek_pos); status = nfserrno(seek->seek_pos);
else if (seek->seek_pos >= i_size_read(file_inode(file))) else if (seek->seek_pos >= i_size_read(file_inode(nf->nf_file)))
seek->seek_eof = true; seek->seek_eof = true;
out: out:
fput(file); nfsd_file_put(nf);
return status; return status;
} }

View File

@ -59,8 +59,13 @@ struct nfsd4_client_tracking_ops {
void (*remove)(struct nfs4_client *); void (*remove)(struct nfs4_client *);
int (*check)(struct nfs4_client *); int (*check)(struct nfs4_client *);
void (*grace_done)(struct nfsd_net *); void (*grace_done)(struct nfsd_net *);
uint8_t version;
size_t msglen;
}; };
static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops;
static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v2;
/* Globals */ /* Globals */
static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery"; static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
@ -173,6 +178,7 @@ __nfsd4_create_reclaim_record_grace(struct nfs4_client *clp,
const char *dname, int len, struct nfsd_net *nn) const char *dname, int len, struct nfsd_net *nn)
{ {
struct xdr_netobj name; struct xdr_netobj name;
struct xdr_netobj princhash = { .len = 0, .data = NULL };
struct nfs4_client_reclaim *crp; struct nfs4_client_reclaim *crp;
name.data = kmemdup(dname, len, GFP_KERNEL); name.data = kmemdup(dname, len, GFP_KERNEL);
@ -182,7 +188,7 @@ __nfsd4_create_reclaim_record_grace(struct nfs4_client *clp,
return; return;
} }
name.len = len; name.len = len;
crp = nfs4_client_to_reclaim(name, nn); crp = nfs4_client_to_reclaim(name, princhash, nn);
if (!crp) { if (!crp) {
kfree(name.data); kfree(name.data);
return; return;
@ -482,6 +488,7 @@ static int
load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
{ {
struct xdr_netobj name; struct xdr_netobj name;
struct xdr_netobj princhash = { .len = 0, .data = NULL };
if (child->d_name.len != HEXDIR_LEN - 1) { if (child->d_name.len != HEXDIR_LEN - 1) {
printk("%s: illegal name %pd in recovery directory\n", printk("%s: illegal name %pd in recovery directory\n",
@ -496,7 +503,7 @@ load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
goto out; goto out;
} }
name.len = HEXDIR_LEN; name.len = HEXDIR_LEN;
if (!nfs4_client_to_reclaim(name, nn)) if (!nfs4_client_to_reclaim(name, princhash, nn))
kfree(name.data); kfree(name.data);
out: out:
return 0; return 0;
@ -718,6 +725,8 @@ static const struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = {
.remove = nfsd4_remove_clid_dir, .remove = nfsd4_remove_clid_dir,
.check = nfsd4_check_legacy_client, .check = nfsd4_check_legacy_client,
.grace_done = nfsd4_recdir_purge_old, .grace_done = nfsd4_recdir_purge_old,
.version = 1,
.msglen = 0,
}; };
/* Globals */ /* Globals */
@ -731,25 +740,32 @@ struct cld_net {
struct list_head cn_list; struct list_head cn_list;
unsigned int cn_xid; unsigned int cn_xid;
bool cn_has_legacy; bool cn_has_legacy;
struct crypto_shash *cn_tfm;
}; };
struct cld_upcall { struct cld_upcall {
struct list_head cu_list; struct list_head cu_list;
struct cld_net *cu_net; struct cld_net *cu_net;
struct completion cu_done; struct completion cu_done;
struct cld_msg cu_msg; union {
struct cld_msg_hdr cu_hdr;
struct cld_msg cu_msg;
struct cld_msg_v2 cu_msg_v2;
} cu_u;
}; };
static int static int
__cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg) __cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg)
{ {
int ret; int ret;
struct rpc_pipe_msg msg; struct rpc_pipe_msg msg;
struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_msg); struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_u);
struct nfsd_net *nn = net_generic(pipe->dentry->d_sb->s_fs_info,
nfsd_net_id);
memset(&msg, 0, sizeof(msg)); memset(&msg, 0, sizeof(msg));
msg.data = cmsg; msg.data = cmsg;
msg.len = sizeof(*cmsg); msg.len = nn->client_tracking_ops->msglen;
ret = rpc_queue_upcall(pipe, &msg); ret = rpc_queue_upcall(pipe, &msg);
if (ret < 0) { if (ret < 0) {
@ -765,7 +781,7 @@ out:
} }
static int static int
cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg) cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg)
{ {
int ret; int ret;
@ -781,11 +797,11 @@ cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
} }
static ssize_t static ssize_t
__cld_pipe_inprogress_downcall(const struct cld_msg __user *cmsg, __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg,
struct nfsd_net *nn) struct nfsd_net *nn)
{ {
uint8_t cmd; uint8_t cmd, princhashlen;
struct xdr_netobj name; struct xdr_netobj name, princhash = { .len = 0, .data = NULL };
uint16_t namelen; uint16_t namelen;
struct cld_net *cn = nn->cld_net; struct cld_net *cn = nn->cld_net;
@ -794,22 +810,48 @@ __cld_pipe_inprogress_downcall(const struct cld_msg __user *cmsg,
return -EFAULT; return -EFAULT;
} }
if (cmd == Cld_GraceStart) { if (cmd == Cld_GraceStart) {
if (get_user(namelen, &cmsg->cm_u.cm_name.cn_len)) if (nn->client_tracking_ops->version >= 2) {
return -EFAULT; const struct cld_clntinfo __user *ci;
name.data = memdup_user(&cmsg->cm_u.cm_name.cn_id, namelen);
if (IS_ERR_OR_NULL(name.data)) ci = &cmsg->cm_u.cm_clntinfo;
return -EFAULT; if (get_user(namelen, &ci->cc_name.cn_len))
name.len = namelen; return -EFAULT;
name.data = memdup_user(&ci->cc_name.cn_id, namelen);
if (IS_ERR_OR_NULL(name.data))
return -EFAULT;
name.len = namelen;
get_user(princhashlen, &ci->cc_princhash.cp_len);
if (princhashlen > 0) {
princhash.data = memdup_user(
&ci->cc_princhash.cp_data,
princhashlen);
if (IS_ERR_OR_NULL(princhash.data))
return -EFAULT;
princhash.len = princhashlen;
} else
princhash.len = 0;
} else {
const struct cld_name __user *cnm;
cnm = &cmsg->cm_u.cm_name;
if (get_user(namelen, &cnm->cn_len))
return -EFAULT;
name.data = memdup_user(&cnm->cn_id, namelen);
if (IS_ERR_OR_NULL(name.data))
return -EFAULT;
name.len = namelen;
}
if (name.len > 5 && memcmp(name.data, "hash:", 5) == 0) { if (name.len > 5 && memcmp(name.data, "hash:", 5) == 0) {
name.len = name.len - 5; name.len = name.len - 5;
memmove(name.data, name.data + 5, name.len); memmove(name.data, name.data + 5, name.len);
cn->cn_has_legacy = true; cn->cn_has_legacy = true;
} }
if (!nfs4_client_to_reclaim(name, nn)) { if (!nfs4_client_to_reclaim(name, princhash, nn)) {
kfree(name.data); kfree(name.data);
kfree(princhash.data);
return -EFAULT; return -EFAULT;
} }
return sizeof(*cmsg); return nn->client_tracking_ops->msglen;
} }
return -EFAULT; return -EFAULT;
} }
@ -818,21 +860,22 @@ static ssize_t
cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
{ {
struct cld_upcall *tmp, *cup; struct cld_upcall *tmp, *cup;
struct cld_msg __user *cmsg = (struct cld_msg __user *)src; struct cld_msg_hdr __user *hdr = (struct cld_msg_hdr __user *)src;
struct cld_msg_v2 __user *cmsg = (struct cld_msg_v2 __user *)src;
uint32_t xid; uint32_t xid;
struct nfsd_net *nn = net_generic(file_inode(filp)->i_sb->s_fs_info, struct nfsd_net *nn = net_generic(file_inode(filp)->i_sb->s_fs_info,
nfsd_net_id); nfsd_net_id);
struct cld_net *cn = nn->cld_net; struct cld_net *cn = nn->cld_net;
int16_t status; int16_t status;
if (mlen != sizeof(*cmsg)) { if (mlen != nn->client_tracking_ops->msglen) {
dprintk("%s: got %zu bytes, expected %zu\n", __func__, mlen, dprintk("%s: got %zu bytes, expected %zu\n", __func__, mlen,
sizeof(*cmsg)); nn->client_tracking_ops->msglen);
return -EINVAL; return -EINVAL;
} }
/* copy just the xid so we can try to find that */ /* copy just the xid so we can try to find that */
if (copy_from_user(&xid, &cmsg->cm_xid, sizeof(xid)) != 0) { if (copy_from_user(&xid, &hdr->cm_xid, sizeof(xid)) != 0) {
dprintk("%s: error when copying xid from userspace", __func__); dprintk("%s: error when copying xid from userspace", __func__);
return -EFAULT; return -EFAULT;
} }
@ -842,7 +885,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
* list (for -EINPROGRESS, we just want to make sure the xid is * list (for -EINPROGRESS, we just want to make sure the xid is
* valid, not remove the upcall from the list) * valid, not remove the upcall from the list)
*/ */
if (get_user(status, &cmsg->cm_status)) { if (get_user(status, &hdr->cm_status)) {
dprintk("%s: error when copying status from userspace", __func__); dprintk("%s: error when copying status from userspace", __func__);
return -EFAULT; return -EFAULT;
} }
@ -851,7 +894,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
cup = NULL; cup = NULL;
spin_lock(&cn->cn_lock); spin_lock(&cn->cn_lock);
list_for_each_entry(tmp, &cn->cn_list, cu_list) { list_for_each_entry(tmp, &cn->cn_list, cu_list) {
if (get_unaligned(&tmp->cu_msg.cm_xid) == xid) { if (get_unaligned(&tmp->cu_u.cu_hdr.cm_xid) == xid) {
cup = tmp; cup = tmp;
if (status != -EINPROGRESS) if (status != -EINPROGRESS)
list_del_init(&cup->cu_list); list_del_init(&cup->cu_list);
@ -869,7 +912,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
if (status == -EINPROGRESS) if (status == -EINPROGRESS)
return __cld_pipe_inprogress_downcall(cmsg, nn); return __cld_pipe_inprogress_downcall(cmsg, nn);
if (copy_from_user(&cup->cu_msg, src, mlen) != 0) if (copy_from_user(&cup->cu_u.cu_msg_v2, src, mlen) != 0)
return -EFAULT; return -EFAULT;
complete(&cup->cu_done); complete(&cup->cu_done);
@ -881,7 +924,7 @@ cld_pipe_destroy_msg(struct rpc_pipe_msg *msg)
{ {
struct cld_msg *cmsg = msg->data; struct cld_msg *cmsg = msg->data;
struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, struct cld_upcall *cup = container_of(cmsg, struct cld_upcall,
cu_msg); cu_u.cu_msg);
/* errno >= 0 means we got a downcall */ /* errno >= 0 means we got a downcall */
if (msg->errno >= 0) if (msg->errno >= 0)
@ -1007,14 +1050,17 @@ nfsd4_remove_cld_pipe(struct net *net)
nfsd4_cld_unregister_net(net, cn->cn_pipe); nfsd4_cld_unregister_net(net, cn->cn_pipe);
rpc_destroy_pipe_data(cn->cn_pipe); rpc_destroy_pipe_data(cn->cn_pipe);
if (cn->cn_tfm)
crypto_free_shash(cn->cn_tfm);
kfree(nn->cld_net); kfree(nn->cld_net);
nn->cld_net = NULL; nn->cld_net = NULL;
} }
static struct cld_upcall * static struct cld_upcall *
alloc_cld_upcall(struct cld_net *cn) alloc_cld_upcall(struct nfsd_net *nn)
{ {
struct cld_upcall *new, *tmp; struct cld_upcall *new, *tmp;
struct cld_net *cn = nn->cld_net;
new = kzalloc(sizeof(*new), GFP_KERNEL); new = kzalloc(sizeof(*new), GFP_KERNEL);
if (!new) if (!new)
@ -1024,20 +1070,20 @@ alloc_cld_upcall(struct cld_net *cn)
restart_search: restart_search:
spin_lock(&cn->cn_lock); spin_lock(&cn->cn_lock);
list_for_each_entry(tmp, &cn->cn_list, cu_list) { list_for_each_entry(tmp, &cn->cn_list, cu_list) {
if (tmp->cu_msg.cm_xid == cn->cn_xid) { if (tmp->cu_u.cu_msg.cm_xid == cn->cn_xid) {
cn->cn_xid++; cn->cn_xid++;
spin_unlock(&cn->cn_lock); spin_unlock(&cn->cn_lock);
goto restart_search; goto restart_search;
} }
} }
init_completion(&new->cu_done); init_completion(&new->cu_done);
new->cu_msg.cm_vers = CLD_UPCALL_VERSION; new->cu_u.cu_msg.cm_vers = nn->client_tracking_ops->version;
put_unaligned(cn->cn_xid++, &new->cu_msg.cm_xid); put_unaligned(cn->cn_xid++, &new->cu_u.cu_msg.cm_xid);
new->cu_net = cn; new->cu_net = cn;
list_add(&new->cu_list, &cn->cn_list); list_add(&new->cu_list, &cn->cn_list);
spin_unlock(&cn->cn_lock); spin_unlock(&cn->cn_lock);
dprintk("%s: allocated xid %u\n", __func__, new->cu_msg.cm_xid); dprintk("%s: allocated xid %u\n", __func__, new->cu_u.cu_msg.cm_xid);
return new; return new;
} }
@ -1066,20 +1112,20 @@ nfsd4_cld_create(struct nfs4_client *clp)
if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
return; return;
cup = alloc_cld_upcall(cn); cup = alloc_cld_upcall(nn);
if (!cup) { if (!cup) {
ret = -ENOMEM; ret = -ENOMEM;
goto out_err; goto out_err;
} }
cup->cu_msg.cm_cmd = Cld_Create; cup->cu_u.cu_msg.cm_cmd = Cld_Create;
cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len; cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data, memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
clp->cl_name.len); clp->cl_name.len);
ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
if (!ret) { if (!ret) {
ret = cup->cu_msg.cm_status; ret = cup->cu_u.cu_msg.cm_status;
set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
} }
@ -1090,6 +1136,75 @@ out_err:
"record on stable storage: %d\n", ret); "record on stable storage: %d\n", ret);
} }
/* Ask daemon to create a new record */
static void
nfsd4_cld_create_v2(struct nfs4_client *clp)
{
int ret;
struct cld_upcall *cup;
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
struct cld_net *cn = nn->cld_net;
struct cld_msg_v2 *cmsg;
struct crypto_shash *tfm = cn->cn_tfm;
struct xdr_netobj cksum;
char *principal = NULL;
SHASH_DESC_ON_STACK(desc, tfm);
/* Don't upcall if it's already stored */
if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
return;
cup = alloc_cld_upcall(nn);
if (!cup) {
ret = -ENOMEM;
goto out_err;
}
cmsg = &cup->cu_u.cu_msg_v2;
cmsg->cm_cmd = Cld_Create;
cmsg->cm_u.cm_clntinfo.cc_name.cn_len = clp->cl_name.len;
memcpy(cmsg->cm_u.cm_clntinfo.cc_name.cn_id, clp->cl_name.data,
clp->cl_name.len);
if (clp->cl_cred.cr_raw_principal)
principal = clp->cl_cred.cr_raw_principal;
else if (clp->cl_cred.cr_principal)
principal = clp->cl_cred.cr_principal;
if (principal) {
desc->tfm = tfm;
cksum.len = crypto_shash_digestsize(tfm);
cksum.data = kmalloc(cksum.len, GFP_KERNEL);
if (cksum.data == NULL) {
ret = -ENOMEM;
goto out;
}
ret = crypto_shash_digest(desc, principal, strlen(principal),
cksum.data);
shash_desc_zero(desc);
if (ret) {
kfree(cksum.data);
goto out;
}
cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = cksum.len;
memcpy(cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data,
cksum.data, cksum.len);
kfree(cksum.data);
} else
cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = 0;
ret = cld_pipe_upcall(cn->cn_pipe, cmsg);
if (!ret) {
ret = cmsg->cm_status;
set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
}
out:
free_cld_upcall(cup);
out_err:
if (ret)
pr_err("NFSD: Unable to create client record on stable storage: %d\n",
ret);
}
/* Ask daemon to create a new record */ /* Ask daemon to create a new record */
static void static void
nfsd4_cld_remove(struct nfs4_client *clp) nfsd4_cld_remove(struct nfs4_client *clp)
@ -1103,20 +1218,20 @@ nfsd4_cld_remove(struct nfs4_client *clp)
if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
return; return;
cup = alloc_cld_upcall(cn); cup = alloc_cld_upcall(nn);
if (!cup) { if (!cup) {
ret = -ENOMEM; ret = -ENOMEM;
goto out_err; goto out_err;
} }
cup->cu_msg.cm_cmd = Cld_Remove; cup->cu_u.cu_msg.cm_cmd = Cld_Remove;
cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len; cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data, memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
clp->cl_name.len); clp->cl_name.len);
ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
if (!ret) { if (!ret) {
ret = cup->cu_msg.cm_status; ret = cup->cu_u.cu_msg.cm_status;
clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
} }
@ -1145,21 +1260,21 @@ nfsd4_cld_check_v0(struct nfs4_client *clp)
if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
return 0; return 0;
cup = alloc_cld_upcall(cn); cup = alloc_cld_upcall(nn);
if (!cup) { if (!cup) {
printk(KERN_ERR "NFSD: Unable to check client record on " printk(KERN_ERR "NFSD: Unable to check client record on "
"stable storage: %d\n", -ENOMEM); "stable storage: %d\n", -ENOMEM);
return -ENOMEM; return -ENOMEM;
} }
cup->cu_msg.cm_cmd = Cld_Check; cup->cu_u.cu_msg.cm_cmd = Cld_Check;
cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len; cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data, memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
clp->cl_name.len); clp->cl_name.len);
ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
if (!ret) { if (!ret) {
ret = cup->cu_msg.cm_status; ret = cup->cu_u.cu_msg.cm_status;
set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
} }
@ -1216,6 +1331,79 @@ found:
return 0; return 0;
} }
static int
nfsd4_cld_check_v2(struct nfs4_client *clp)
{
struct nfs4_client_reclaim *crp;
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
struct cld_net *cn = nn->cld_net;
int status;
char dname[HEXDIR_LEN];
struct xdr_netobj name;
struct crypto_shash *tfm = cn->cn_tfm;
struct xdr_netobj cksum;
char *principal = NULL;
SHASH_DESC_ON_STACK(desc, tfm);
/* did we already find that this client is stable? */
if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
return 0;
/* look for it in the reclaim hashtable otherwise */
crp = nfsd4_find_reclaim_client(clp->cl_name, nn);
if (crp)
goto found;
if (cn->cn_has_legacy) {
status = nfs4_make_rec_clidname(dname, &clp->cl_name);
if (status)
return -ENOENT;
name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL);
if (!name.data) {
dprintk("%s: failed to allocate memory for name.data\n",
__func__);
return -ENOENT;
}
name.len = HEXDIR_LEN;
crp = nfsd4_find_reclaim_client(name, nn);
kfree(name.data);
if (crp)
goto found;
}
return -ENOENT;
found:
if (crp->cr_princhash.len) {
if (clp->cl_cred.cr_raw_principal)
principal = clp->cl_cred.cr_raw_principal;
else if (clp->cl_cred.cr_principal)
principal = clp->cl_cred.cr_principal;
if (principal == NULL)
return -ENOENT;
desc->tfm = tfm;
cksum.len = crypto_shash_digestsize(tfm);
cksum.data = kmalloc(cksum.len, GFP_KERNEL);
if (cksum.data == NULL)
return -ENOENT;
status = crypto_shash_digest(desc, principal, strlen(principal),
cksum.data);
shash_desc_zero(desc);
if (status) {
kfree(cksum.data);
return -ENOENT;
}
if (memcmp(crp->cr_princhash.data, cksum.data,
crp->cr_princhash.len)) {
kfree(cksum.data);
return -ENOENT;
}
kfree(cksum.data);
}
crp->cr_clp = clp;
return 0;
}
static int static int
nfsd4_cld_grace_start(struct nfsd_net *nn) nfsd4_cld_grace_start(struct nfsd_net *nn)
{ {
@ -1223,16 +1411,16 @@ nfsd4_cld_grace_start(struct nfsd_net *nn)
struct cld_upcall *cup; struct cld_upcall *cup;
struct cld_net *cn = nn->cld_net; struct cld_net *cn = nn->cld_net;
cup = alloc_cld_upcall(cn); cup = alloc_cld_upcall(nn);
if (!cup) { if (!cup) {
ret = -ENOMEM; ret = -ENOMEM;
goto out_err; goto out_err;
} }
cup->cu_msg.cm_cmd = Cld_GraceStart; cup->cu_u.cu_msg.cm_cmd = Cld_GraceStart;
ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
if (!ret) if (!ret)
ret = cup->cu_msg.cm_status; ret = cup->cu_u.cu_msg.cm_status;
free_cld_upcall(cup); free_cld_upcall(cup);
out_err: out_err:
@ -1250,17 +1438,17 @@ nfsd4_cld_grace_done_v0(struct nfsd_net *nn)
struct cld_upcall *cup; struct cld_upcall *cup;
struct cld_net *cn = nn->cld_net; struct cld_net *cn = nn->cld_net;
cup = alloc_cld_upcall(cn); cup = alloc_cld_upcall(nn);
if (!cup) { if (!cup) {
ret = -ENOMEM; ret = -ENOMEM;
goto out_err; goto out_err;
} }
cup->cu_msg.cm_cmd = Cld_GraceDone; cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone;
cup->cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time; cup->cu_u.cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time;
ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
if (!ret) if (!ret)
ret = cup->cu_msg.cm_status; ret = cup->cu_u.cu_msg.cm_status;
free_cld_upcall(cup); free_cld_upcall(cup);
out_err: out_err:
@ -1279,16 +1467,16 @@ nfsd4_cld_grace_done(struct nfsd_net *nn)
struct cld_upcall *cup; struct cld_upcall *cup;
struct cld_net *cn = nn->cld_net; struct cld_net *cn = nn->cld_net;
cup = alloc_cld_upcall(cn); cup = alloc_cld_upcall(nn);
if (!cup) { if (!cup) {
ret = -ENOMEM; ret = -ENOMEM;
goto out_err; goto out_err;
} }
cup->cu_msg.cm_cmd = Cld_GraceDone; cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone;
ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
if (!ret) if (!ret)
ret = cup->cu_msg.cm_status; ret = cup->cu_u.cu_msg.cm_status;
free_cld_upcall(cup); free_cld_upcall(cup);
out_err: out_err:
@ -1336,6 +1524,53 @@ cld_running(struct nfsd_net *nn)
return pipe->nreaders || pipe->nwriters; return pipe->nreaders || pipe->nwriters;
} }
static int
nfsd4_cld_get_version(struct nfsd_net *nn)
{
int ret = 0;
struct cld_upcall *cup;
struct cld_net *cn = nn->cld_net;
uint8_t version;
cup = alloc_cld_upcall(nn);
if (!cup) {
ret = -ENOMEM;
goto out_err;
}
cup->cu_u.cu_msg.cm_cmd = Cld_GetVersion;
ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
if (!ret) {
ret = cup->cu_u.cu_msg.cm_status;
if (ret)
goto out_free;
version = cup->cu_u.cu_msg.cm_u.cm_version;
dprintk("%s: userspace returned version %u\n",
__func__, version);
if (version < 1)
version = 1;
else if (version > CLD_UPCALL_VERSION)
version = CLD_UPCALL_VERSION;
switch (version) {
case 1:
nn->client_tracking_ops = &nfsd4_cld_tracking_ops;
break;
case 2:
nn->client_tracking_ops = &nfsd4_cld_tracking_ops_v2;
break;
default:
break;
}
}
out_free:
free_cld_upcall(cup);
out_err:
if (ret)
dprintk("%s: Unable to get version from userspace: %d\n",
__func__, ret);
return ret;
}
static int static int
nfsd4_cld_tracking_init(struct net *net) nfsd4_cld_tracking_init(struct net *net)
{ {
@ -1351,6 +1586,11 @@ nfsd4_cld_tracking_init(struct net *net)
status = __nfsd4_init_cld_pipe(net); status = __nfsd4_init_cld_pipe(net);
if (status) if (status)
goto err_shutdown; goto err_shutdown;
nn->cld_net->cn_tfm = crypto_alloc_shash("sha256", 0, 0);
if (IS_ERR(nn->cld_net->cn_tfm)) {
status = PTR_ERR(nn->cld_net->cn_tfm);
goto err_remove;
}
/* /*
* rpc pipe upcalls take 30 seconds to time out, so we don't want to * rpc pipe upcalls take 30 seconds to time out, so we don't want to
@ -1368,10 +1608,14 @@ nfsd4_cld_tracking_init(struct net *net)
goto err_remove; goto err_remove;
} }
status = nfsd4_cld_get_version(nn);
if (status == -EOPNOTSUPP)
pr_warn("NFSD: nfsdcld GetVersion upcall failed. Please upgrade nfsdcld.\n");
status = nfsd4_cld_grace_start(nn); status = nfsd4_cld_grace_start(nn);
if (status) { if (status) {
if (status == -EOPNOTSUPP) if (status == -EOPNOTSUPP)
printk(KERN_WARNING "NFSD: Please upgrade nfsdcld.\n"); pr_warn("NFSD: nfsdcld GraceStart upcall failed. Please upgrade nfsdcld.\n");
nfs4_release_reclaim(nn); nfs4_release_reclaim(nn);
goto err_remove; goto err_remove;
} else } else
@ -1403,6 +1647,8 @@ static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v0 = {
.remove = nfsd4_cld_remove, .remove = nfsd4_cld_remove,
.check = nfsd4_cld_check_v0, .check = nfsd4_cld_check_v0,
.grace_done = nfsd4_cld_grace_done_v0, .grace_done = nfsd4_cld_grace_done_v0,
.version = 1,
.msglen = sizeof(struct cld_msg),
}; };
/* For newer nfsdcld's */ /* For newer nfsdcld's */
@ -1413,6 +1659,20 @@ static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = {
.remove = nfsd4_cld_remove, .remove = nfsd4_cld_remove,
.check = nfsd4_cld_check, .check = nfsd4_cld_check,
.grace_done = nfsd4_cld_grace_done, .grace_done = nfsd4_cld_grace_done,
.version = 1,
.msglen = sizeof(struct cld_msg),
};
/* v2 create/check ops include the principal, if available */
static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v2 = {
.init = nfsd4_cld_tracking_init,
.exit = nfsd4_cld_tracking_exit,
.create = nfsd4_cld_create_v2,
.remove = nfsd4_cld_remove,
.check = nfsd4_cld_check_v2,
.grace_done = nfsd4_cld_grace_done,
.version = 2,
.msglen = sizeof(struct cld_msg_v2),
}; };
/* upcall via usermodehelper */ /* upcall via usermodehelper */
@ -1760,6 +2020,8 @@ static const struct nfsd4_client_tracking_ops nfsd4_umh_tracking_ops = {
.remove = nfsd4_umh_cltrack_remove, .remove = nfsd4_umh_cltrack_remove,
.check = nfsd4_umh_cltrack_check, .check = nfsd4_umh_cltrack_check,
.grace_done = nfsd4_umh_cltrack_grace_done, .grace_done = nfsd4_umh_cltrack_grace_done,
.version = 1,
.msglen = 0,
}; };
int int

View File

@ -50,6 +50,7 @@
#include "netns.h" #include "netns.h"
#include "pnfs.h" #include "pnfs.h"
#include "filecache.h"
#define NFSDDBG_FACILITY NFSDDBG_PROC #define NFSDDBG_FACILITY NFSDDBG_PROC
@ -429,18 +430,18 @@ put_nfs4_file(struct nfs4_file *fi)
} }
} }
static struct file * static struct nfsd_file *
__nfs4_get_fd(struct nfs4_file *f, int oflag) __nfs4_get_fd(struct nfs4_file *f, int oflag)
{ {
if (f->fi_fds[oflag]) if (f->fi_fds[oflag])
return get_file(f->fi_fds[oflag]); return nfsd_file_get(f->fi_fds[oflag]);
return NULL; return NULL;
} }
static struct file * static struct nfsd_file *
find_writeable_file_locked(struct nfs4_file *f) find_writeable_file_locked(struct nfs4_file *f)
{ {
struct file *ret; struct nfsd_file *ret;
lockdep_assert_held(&f->fi_lock); lockdep_assert_held(&f->fi_lock);
@ -450,10 +451,10 @@ find_writeable_file_locked(struct nfs4_file *f)
return ret; return ret;
} }
static struct file * static struct nfsd_file *
find_writeable_file(struct nfs4_file *f) find_writeable_file(struct nfs4_file *f)
{ {
struct file *ret; struct nfsd_file *ret;
spin_lock(&f->fi_lock); spin_lock(&f->fi_lock);
ret = find_writeable_file_locked(f); ret = find_writeable_file_locked(f);
@ -462,9 +463,10 @@ find_writeable_file(struct nfs4_file *f)
return ret; return ret;
} }
static struct file *find_readable_file_locked(struct nfs4_file *f) static struct nfsd_file *
find_readable_file_locked(struct nfs4_file *f)
{ {
struct file *ret; struct nfsd_file *ret;
lockdep_assert_held(&f->fi_lock); lockdep_assert_held(&f->fi_lock);
@ -474,10 +476,10 @@ static struct file *find_readable_file_locked(struct nfs4_file *f)
return ret; return ret;
} }
static struct file * static struct nfsd_file *
find_readable_file(struct nfs4_file *f) find_readable_file(struct nfs4_file *f)
{ {
struct file *ret; struct nfsd_file *ret;
spin_lock(&f->fi_lock); spin_lock(&f->fi_lock);
ret = find_readable_file_locked(f); ret = find_readable_file_locked(f);
@ -486,10 +488,10 @@ find_readable_file(struct nfs4_file *f)
return ret; return ret;
} }
struct file * struct nfsd_file *
find_any_file(struct nfs4_file *f) find_any_file(struct nfs4_file *f)
{ {
struct file *ret; struct nfsd_file *ret;
spin_lock(&f->fi_lock); spin_lock(&f->fi_lock);
ret = __nfs4_get_fd(f, O_RDWR); ret = __nfs4_get_fd(f, O_RDWR);
@ -590,17 +592,17 @@ static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag)
might_lock(&fp->fi_lock); might_lock(&fp->fi_lock);
if (atomic_dec_and_lock(&fp->fi_access[oflag], &fp->fi_lock)) { if (atomic_dec_and_lock(&fp->fi_access[oflag], &fp->fi_lock)) {
struct file *f1 = NULL; struct nfsd_file *f1 = NULL;
struct file *f2 = NULL; struct nfsd_file *f2 = NULL;
swap(f1, fp->fi_fds[oflag]); swap(f1, fp->fi_fds[oflag]);
if (atomic_read(&fp->fi_access[1 - oflag]) == 0) if (atomic_read(&fp->fi_access[1 - oflag]) == 0)
swap(f2, fp->fi_fds[O_RDWR]); swap(f2, fp->fi_fds[O_RDWR]);
spin_unlock(&fp->fi_lock); spin_unlock(&fp->fi_lock);
if (f1) if (f1)
fput(f1); nfsd_file_put(f1);
if (f2) if (f2)
fput(f2); nfsd_file_put(f2);
} }
} }
@ -933,25 +935,25 @@ nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid)
static void put_deleg_file(struct nfs4_file *fp) static void put_deleg_file(struct nfs4_file *fp)
{ {
struct file *filp = NULL; struct nfsd_file *nf = NULL;
spin_lock(&fp->fi_lock); spin_lock(&fp->fi_lock);
if (--fp->fi_delegees == 0) if (--fp->fi_delegees == 0)
swap(filp, fp->fi_deleg_file); swap(nf, fp->fi_deleg_file);
spin_unlock(&fp->fi_lock); spin_unlock(&fp->fi_lock);
if (filp) if (nf)
fput(filp); nfsd_file_put(nf);
} }
static void nfs4_unlock_deleg_lease(struct nfs4_delegation *dp) static void nfs4_unlock_deleg_lease(struct nfs4_delegation *dp)
{ {
struct nfs4_file *fp = dp->dl_stid.sc_file; struct nfs4_file *fp = dp->dl_stid.sc_file;
struct file *filp = fp->fi_deleg_file; struct nfsd_file *nf = fp->fi_deleg_file;
WARN_ON_ONCE(!fp->fi_delegees); WARN_ON_ONCE(!fp->fi_delegees);
vfs_setlease(filp, F_UNLCK, NULL, (void **)&dp); vfs_setlease(nf->nf_file, F_UNLCK, NULL, (void **)&dp);
put_deleg_file(fp); put_deleg_file(fp);
} }
@ -1289,11 +1291,14 @@ static void nfs4_free_lock_stateid(struct nfs4_stid *stid)
{ {
struct nfs4_ol_stateid *stp = openlockstateid(stid); struct nfs4_ol_stateid *stp = openlockstateid(stid);
struct nfs4_lockowner *lo = lockowner(stp->st_stateowner); struct nfs4_lockowner *lo = lockowner(stp->st_stateowner);
struct file *file; struct nfsd_file *nf;
file = find_any_file(stp->st_stid.sc_file); nf = find_any_file(stp->st_stid.sc_file);
if (file) if (nf) {
filp_close(file, (fl_owner_t)lo); get_file(nf->nf_file);
filp_close(nf->nf_file, (fl_owner_t)lo);
nfsd_file_put(nf);
}
nfs4_free_ol_stateid(stid); nfs4_free_ol_stateid(stid);
} }
@ -1563,21 +1568,39 @@ static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca)
* re-negotiate active sessions and reduce their slot usage to make * re-negotiate active sessions and reduce their slot usage to make
* room for new connections. For now we just fail the create session. * room for new connections. For now we just fail the create session.
*/ */
static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca) static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn)
{ {
u32 slotsize = slot_bytes(ca); u32 slotsize = slot_bytes(ca);
u32 num = ca->maxreqs; u32 num = ca->maxreqs;
unsigned long avail, total_avail; unsigned long avail, total_avail;
unsigned int scale_factor;
spin_lock(&nfsd_drc_lock); spin_lock(&nfsd_drc_lock);
total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used; if (nfsd_drc_max_mem > nfsd_drc_mem_used)
total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used;
else
/* We have handed out more space than we chose in
* set_max_drc() to allow. That isn't really a
* problem as long as that doesn't make us think we
* have lots more due to integer overflow.
*/
total_avail = 0;
avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, total_avail); avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, total_avail);
/* /*
* Never use more than a third of the remaining memory, * Never use more than a fraction of the remaining memory,
* unless it's the only way to give this client a slot: * unless it's the only way to give this client a slot.
* The chosen fraction is either 1/8 or 1/number of threads,
* whichever is smaller. This ensures there are adequate
* slots to support multiple clients per thread.
* Give the client one slot even if that would require
* over-allocation--it is better than failure.
*/ */
avail = clamp_t(unsigned long, avail, slotsize, total_avail/3); scale_factor = max_t(unsigned int, 8, nn->nfsd_serv->sv_nrthreads);
avail = clamp_t(unsigned long, avail, slotsize,
total_avail/scale_factor);
num = min_t(int, num, avail / slotsize); num = min_t(int, num, avail / slotsize);
num = max_t(int, num, 1);
nfsd_drc_mem_used += num * slotsize; nfsd_drc_mem_used += num * slotsize;
spin_unlock(&nfsd_drc_lock); spin_unlock(&nfsd_drc_lock);
@ -2323,9 +2346,9 @@ static void states_stop(struct seq_file *s, void *v)
spin_unlock(&clp->cl_lock); spin_unlock(&clp->cl_lock);
} }
static void nfs4_show_superblock(struct seq_file *s, struct file *f) static void nfs4_show_superblock(struct seq_file *s, struct nfsd_file *f)
{ {
struct inode *inode = file_inode(f); struct inode *inode = f->nf_inode;
seq_printf(s, "superblock: \"%02x:%02x:%ld\"", seq_printf(s, "superblock: \"%02x:%02x:%ld\"",
MAJOR(inode->i_sb->s_dev), MAJOR(inode->i_sb->s_dev),
@ -2343,7 +2366,7 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
{ {
struct nfs4_ol_stateid *ols; struct nfs4_ol_stateid *ols;
struct nfs4_file *nf; struct nfs4_file *nf;
struct file *file; struct nfsd_file *file;
struct nfs4_stateowner *oo; struct nfs4_stateowner *oo;
unsigned int access, deny; unsigned int access, deny;
@ -2370,7 +2393,7 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
seq_printf(s, ", "); seq_printf(s, ", ");
nfs4_show_owner(s, oo); nfs4_show_owner(s, oo);
seq_printf(s, " }\n"); seq_printf(s, " }\n");
fput(file); nfsd_file_put(file);
return 0; return 0;
} }
@ -2379,7 +2402,7 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
{ {
struct nfs4_ol_stateid *ols; struct nfs4_ol_stateid *ols;
struct nfs4_file *nf; struct nfs4_file *nf;
struct file *file; struct nfsd_file *file;
struct nfs4_stateowner *oo; struct nfs4_stateowner *oo;
ols = openlockstateid(st); ols = openlockstateid(st);
@ -2401,7 +2424,7 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
seq_printf(s, ", "); seq_printf(s, ", ");
nfs4_show_owner(s, oo); nfs4_show_owner(s, oo);
seq_printf(s, " }\n"); seq_printf(s, " }\n");
fput(file); nfsd_file_put(file);
return 0; return 0;
} }
@ -2410,7 +2433,7 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
{ {
struct nfs4_delegation *ds; struct nfs4_delegation *ds;
struct nfs4_file *nf; struct nfs4_file *nf;
struct file *file; struct nfsd_file *file;
ds = delegstateid(st); ds = delegstateid(st);
nf = st->sc_file; nf = st->sc_file;
@ -2433,7 +2456,7 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st) static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st)
{ {
struct nfs4_layout_stateid *ls; struct nfs4_layout_stateid *ls;
struct file *file; struct nfsd_file *file;
ls = container_of(st, struct nfs4_layout_stateid, ls_stid); ls = container_of(st, struct nfs4_layout_stateid, ls_stid);
file = ls->ls_file; file = ls->ls_file;
@ -3169,10 +3192,10 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs
* performance. When short on memory we therefore prefer to * performance. When short on memory we therefore prefer to
* decrease number of slots instead of their size. Clients that * decrease number of slots instead of their size. Clients that
* request larger slots than they need will get poor results: * request larger slots than they need will get poor results:
* Note that we always allow at least one slot, because our
* accounting is soft and provides no guarantees either way.
*/ */
ca->maxreqs = nfsd4_get_drc_mem(ca); ca->maxreqs = nfsd4_get_drc_mem(ca, nn);
if (!ca->maxreqs)
return nfserr_jukebox;
return nfs_ok; return nfs_ok;
} }
@ -4651,7 +4674,7 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp,
struct nfsd4_open *open) struct nfsd4_open *open)
{ {
struct file *filp = NULL; struct nfsd_file *nf = NULL;
__be32 status; __be32 status;
int oflag = nfs4_access_to_omode(open->op_share_access); int oflag = nfs4_access_to_omode(open->op_share_access);
int access = nfs4_access_to_access(open->op_share_access); int access = nfs4_access_to_access(open->op_share_access);
@ -4687,18 +4710,18 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
if (!fp->fi_fds[oflag]) { if (!fp->fi_fds[oflag]) {
spin_unlock(&fp->fi_lock); spin_unlock(&fp->fi_lock);
status = nfsd_open(rqstp, cur_fh, S_IFREG, access, &filp); status = nfsd_file_acquire(rqstp, cur_fh, access, &nf);
if (status) if (status)
goto out_put_access; goto out_put_access;
spin_lock(&fp->fi_lock); spin_lock(&fp->fi_lock);
if (!fp->fi_fds[oflag]) { if (!fp->fi_fds[oflag]) {
fp->fi_fds[oflag] = filp; fp->fi_fds[oflag] = nf;
filp = NULL; nf = NULL;
} }
} }
spin_unlock(&fp->fi_lock); spin_unlock(&fp->fi_lock);
if (filp) if (nf)
fput(filp); nfsd_file_put(nf);
status = nfsd4_truncate(rqstp, cur_fh, open); status = nfsd4_truncate(rqstp, cur_fh, open);
if (status) if (status)
@ -4767,7 +4790,7 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp,
fl->fl_end = OFFSET_MAX; fl->fl_end = OFFSET_MAX;
fl->fl_owner = (fl_owner_t)dp; fl->fl_owner = (fl_owner_t)dp;
fl->fl_pid = current->tgid; fl->fl_pid = current->tgid;
fl->fl_file = dp->dl_stid.sc_file->fi_deleg_file; fl->fl_file = dp->dl_stid.sc_file->fi_deleg_file->nf_file;
return fl; return fl;
} }
@ -4777,7 +4800,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
{ {
int status = 0; int status = 0;
struct nfs4_delegation *dp; struct nfs4_delegation *dp;
struct file *filp; struct nfsd_file *nf;
struct file_lock *fl; struct file_lock *fl;
/* /*
@ -4788,8 +4811,8 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
if (fp->fi_had_conflict) if (fp->fi_had_conflict)
return ERR_PTR(-EAGAIN); return ERR_PTR(-EAGAIN);
filp = find_readable_file(fp); nf = find_readable_file(fp);
if (!filp) { if (!nf) {
/* We should always have a readable file here */ /* We should always have a readable file here */
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
return ERR_PTR(-EBADF); return ERR_PTR(-EBADF);
@ -4799,17 +4822,17 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
if (nfs4_delegation_exists(clp, fp)) if (nfs4_delegation_exists(clp, fp))
status = -EAGAIN; status = -EAGAIN;
else if (!fp->fi_deleg_file) { else if (!fp->fi_deleg_file) {
fp->fi_deleg_file = filp; fp->fi_deleg_file = nf;
/* increment early to prevent fi_deleg_file from being /* increment early to prevent fi_deleg_file from being
* cleared */ * cleared */
fp->fi_delegees = 1; fp->fi_delegees = 1;
filp = NULL; nf = NULL;
} else } else
fp->fi_delegees++; fp->fi_delegees++;
spin_unlock(&fp->fi_lock); spin_unlock(&fp->fi_lock);
spin_unlock(&state_lock); spin_unlock(&state_lock);
if (filp) if (nf)
fput(filp); nfsd_file_put(nf);
if (status) if (status)
return ERR_PTR(status); return ERR_PTR(status);
@ -4822,7 +4845,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
if (!fl) if (!fl)
goto out_clnt_odstate; goto out_clnt_odstate;
status = vfs_setlease(fp->fi_deleg_file, fl->fl_type, &fl, NULL); status = vfs_setlease(fp->fi_deleg_file->nf_file, fl->fl_type, &fl, NULL);
if (fl) if (fl)
locks_free_lock(fl); locks_free_lock(fl);
if (status) if (status)
@ -4842,7 +4865,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
return dp; return dp;
out_unlock: out_unlock:
vfs_setlease(fp->fi_deleg_file, F_UNLCK, NULL, (void **)&dp); vfs_setlease(fp->fi_deleg_file->nf_file, F_UNLCK, NULL, (void **)&dp);
out_clnt_odstate: out_clnt_odstate:
put_clnt_odstate(dp->dl_clnt_odstate); put_clnt_odstate(dp->dl_clnt_odstate);
nfs4_put_stid(&dp->dl_stid); nfs4_put_stid(&dp->dl_stid);
@ -5513,7 +5536,7 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
return nfs_ok; return nfs_ok;
} }
static struct file * static struct nfsd_file *
nfs4_find_file(struct nfs4_stid *s, int flags) nfs4_find_file(struct nfs4_stid *s, int flags)
{ {
if (!s) if (!s)
@ -5523,7 +5546,7 @@ nfs4_find_file(struct nfs4_stid *s, int flags)
case NFS4_DELEG_STID: case NFS4_DELEG_STID:
if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file)) if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
return NULL; return NULL;
return get_file(s->sc_file->fi_deleg_file); return nfsd_file_get(s->sc_file->fi_deleg_file);
case NFS4_OPEN_STID: case NFS4_OPEN_STID:
case NFS4_LOCK_STID: case NFS4_LOCK_STID:
if (flags & RD_STATE) if (flags & RD_STATE)
@ -5549,32 +5572,28 @@ nfs4_check_olstateid(struct nfs4_ol_stateid *ols, int flags)
static __be32 static __be32
nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s, nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
struct file **filpp, bool *tmp_file, int flags) struct nfsd_file **nfp, int flags)
{ {
int acc = (flags & RD_STATE) ? NFSD_MAY_READ : NFSD_MAY_WRITE; int acc = (flags & RD_STATE) ? NFSD_MAY_READ : NFSD_MAY_WRITE;
struct file *file; struct nfsd_file *nf;
__be32 status; __be32 status;
file = nfs4_find_file(s, flags); nf = nfs4_find_file(s, flags);
if (file) { if (nf) {
status = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, status = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
acc | NFSD_MAY_OWNER_OVERRIDE); acc | NFSD_MAY_OWNER_OVERRIDE);
if (status) { if (status) {
fput(file); nfsd_file_put(nf);
return status; goto out;
} }
*filpp = file;
} else { } else {
status = nfsd_open(rqstp, fhp, S_IFREG, acc, filpp); status = nfsd_file_acquire(rqstp, fhp, acc, &nf);
if (status) if (status)
return status; return status;
if (tmp_file)
*tmp_file = true;
} }
*nfp = nf;
return 0; out:
return status;
} }
/* /*
@ -5583,7 +5602,7 @@ nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
__be32 __be32
nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate, struct svc_fh *fhp, struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
stateid_t *stateid, int flags, struct file **filpp, bool *tmp_file) stateid_t *stateid, int flags, struct nfsd_file **nfp)
{ {
struct inode *ino = d_inode(fhp->fh_dentry); struct inode *ino = d_inode(fhp->fh_dentry);
struct net *net = SVC_NET(rqstp); struct net *net = SVC_NET(rqstp);
@ -5591,10 +5610,8 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
struct nfs4_stid *s = NULL; struct nfs4_stid *s = NULL;
__be32 status; __be32 status;
if (filpp) if (nfp)
*filpp = NULL; *nfp = NULL;
if (tmp_file)
*tmp_file = false;
if (grace_disallows_io(net, ino)) if (grace_disallows_io(net, ino))
return nfserr_grace; return nfserr_grace;
@ -5631,8 +5648,8 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
status = nfs4_check_fh(fhp, s); status = nfs4_check_fh(fhp, s);
done: done:
if (!status && filpp) if (status == nfs_ok && nfp)
status = nfs4_check_file(rqstp, fhp, s, filpp, tmp_file, flags); status = nfs4_check_file(rqstp, fhp, s, nfp, flags);
out: out:
if (s) if (s)
nfs4_put_stid(s); nfs4_put_stid(s);
@ -6392,7 +6409,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfs4_ol_stateid *lock_stp = NULL; struct nfs4_ol_stateid *lock_stp = NULL;
struct nfs4_ol_stateid *open_stp = NULL; struct nfs4_ol_stateid *open_stp = NULL;
struct nfs4_file *fp; struct nfs4_file *fp;
struct file *filp = NULL; struct nfsd_file *nf = NULL;
struct nfsd4_blocked_lock *nbl = NULL; struct nfsd4_blocked_lock *nbl = NULL;
struct file_lock *file_lock = NULL; struct file_lock *file_lock = NULL;
struct file_lock *conflock = NULL; struct file_lock *conflock = NULL;
@ -6474,8 +6491,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
/* Fallthrough */ /* Fallthrough */
case NFS4_READ_LT: case NFS4_READ_LT:
spin_lock(&fp->fi_lock); spin_lock(&fp->fi_lock);
filp = find_readable_file_locked(fp); nf = find_readable_file_locked(fp);
if (filp) if (nf)
get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ); get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
spin_unlock(&fp->fi_lock); spin_unlock(&fp->fi_lock);
fl_type = F_RDLCK; fl_type = F_RDLCK;
@ -6486,8 +6503,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
/* Fallthrough */ /* Fallthrough */
case NFS4_WRITE_LT: case NFS4_WRITE_LT:
spin_lock(&fp->fi_lock); spin_lock(&fp->fi_lock);
filp = find_writeable_file_locked(fp); nf = find_writeable_file_locked(fp);
if (filp) if (nf)
get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE); get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
spin_unlock(&fp->fi_lock); spin_unlock(&fp->fi_lock);
fl_type = F_WRLCK; fl_type = F_WRLCK;
@ -6497,7 +6514,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out; goto out;
} }
if (!filp) { if (!nf) {
status = nfserr_openmode; status = nfserr_openmode;
goto out; goto out;
} }
@ -6513,7 +6530,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
file_lock->fl_type = fl_type; file_lock->fl_type = fl_type;
file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner)); file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner));
file_lock->fl_pid = current->tgid; file_lock->fl_pid = current->tgid;
file_lock->fl_file = filp; file_lock->fl_file = nf->nf_file;
file_lock->fl_flags = fl_flags; file_lock->fl_flags = fl_flags;
file_lock->fl_lmops = &nfsd_posix_mng_ops; file_lock->fl_lmops = &nfsd_posix_mng_ops;
file_lock->fl_start = lock->lk_offset; file_lock->fl_start = lock->lk_offset;
@ -6535,7 +6552,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
spin_unlock(&nn->blocked_locks_lock); spin_unlock(&nn->blocked_locks_lock);
} }
err = vfs_lock_file(filp, F_SETLK, file_lock, conflock); err = vfs_lock_file(nf->nf_file, F_SETLK, file_lock, conflock);
switch (err) { switch (err) {
case 0: /* success! */ case 0: /* success! */
nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid); nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid);
@ -6570,8 +6587,8 @@ out:
} }
free_blocked_lock(nbl); free_blocked_lock(nbl);
} }
if (filp) if (nf)
fput(filp); nfsd_file_put(nf);
if (lock_stp) { if (lock_stp) {
/* Bump seqid manually if the 4.0 replay owner is openowner */ /* Bump seqid manually if the 4.0 replay owner is openowner */
if (cstate->replay_owner && if (cstate->replay_owner &&
@ -6606,11 +6623,11 @@ out:
*/ */
static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock) static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
{ {
struct file *file; struct nfsd_file *nf;
__be32 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); __be32 err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
if (!err) { if (!err) {
err = nfserrno(vfs_test_lock(file, lock)); err = nfserrno(vfs_test_lock(nf->nf_file, lock));
fput(file); nfsd_file_put(nf);
} }
return err; return err;
} }
@ -6698,7 +6715,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
{ {
struct nfsd4_locku *locku = &u->locku; struct nfsd4_locku *locku = &u->locku;
struct nfs4_ol_stateid *stp; struct nfs4_ol_stateid *stp;
struct file *filp = NULL; struct nfsd_file *nf = NULL;
struct file_lock *file_lock = NULL; struct file_lock *file_lock = NULL;
__be32 status; __be32 status;
int err; int err;
@ -6716,8 +6733,8 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
&stp, nn); &stp, nn);
if (status) if (status)
goto out; goto out;
filp = find_any_file(stp->st_stid.sc_file); nf = find_any_file(stp->st_stid.sc_file);
if (!filp) { if (!nf) {
status = nfserr_lock_range; status = nfserr_lock_range;
goto put_stateid; goto put_stateid;
} }
@ -6725,13 +6742,13 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (!file_lock) { if (!file_lock) {
dprintk("NFSD: %s: unable to allocate lock!\n", __func__); dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
status = nfserr_jukebox; status = nfserr_jukebox;
goto fput; goto put_file;
} }
file_lock->fl_type = F_UNLCK; file_lock->fl_type = F_UNLCK;
file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(stp->st_stateowner)); file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(stp->st_stateowner));
file_lock->fl_pid = current->tgid; file_lock->fl_pid = current->tgid;
file_lock->fl_file = filp; file_lock->fl_file = nf->nf_file;
file_lock->fl_flags = FL_POSIX; file_lock->fl_flags = FL_POSIX;
file_lock->fl_lmops = &nfsd_posix_mng_ops; file_lock->fl_lmops = &nfsd_posix_mng_ops;
file_lock->fl_start = locku->lu_offset; file_lock->fl_start = locku->lu_offset;
@ -6740,14 +6757,14 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
locku->lu_length); locku->lu_length);
nfs4_transform_lock_offset(file_lock); nfs4_transform_lock_offset(file_lock);
err = vfs_lock_file(filp, F_SETLK, file_lock, NULL); err = vfs_lock_file(nf->nf_file, F_SETLK, file_lock, NULL);
if (err) { if (err) {
dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n"); dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n");
goto out_nfserr; goto out_nfserr;
} }
nfs4_inc_and_copy_stateid(&locku->lu_stateid, &stp->st_stid); nfs4_inc_and_copy_stateid(&locku->lu_stateid, &stp->st_stid);
fput: put_file:
fput(filp); nfsd_file_put(nf);
put_stateid: put_stateid:
mutex_unlock(&stp->st_mutex); mutex_unlock(&stp->st_mutex);
nfs4_put_stid(&stp->st_stid); nfs4_put_stid(&stp->st_stid);
@ -6759,7 +6776,7 @@ out:
out_nfserr: out_nfserr:
status = nfserrno(err); status = nfserrno(err);
goto fput; goto put_file;
} }
/* /*
@ -6772,17 +6789,17 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
{ {
struct file_lock *fl; struct file_lock *fl;
int status = false; int status = false;
struct file *filp = find_any_file(fp); struct nfsd_file *nf = find_any_file(fp);
struct inode *inode; struct inode *inode;
struct file_lock_context *flctx; struct file_lock_context *flctx;
if (!filp) { if (!nf) {
/* Any valid lock stateid should have some sort of access */ /* Any valid lock stateid should have some sort of access */
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
return status; return status;
} }
inode = locks_inode(filp); inode = locks_inode(nf->nf_file);
flctx = inode->i_flctx; flctx = inode->i_flctx;
if (flctx && !list_empty_careful(&flctx->flc_posix)) { if (flctx && !list_empty_careful(&flctx->flc_posix)) {
@ -6795,7 +6812,7 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
} }
spin_unlock(&flctx->flc_lock); spin_unlock(&flctx->flc_lock);
} }
fput(filp); nfsd_file_put(nf);
return status; return status;
} }
@ -6888,7 +6905,8 @@ nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn)
* will be freed in nfs4_remove_reclaim_record in the normal case). * will be freed in nfs4_remove_reclaim_record in the normal case).
*/ */
struct nfs4_client_reclaim * struct nfs4_client_reclaim *
nfs4_client_to_reclaim(struct xdr_netobj name, struct nfsd_net *nn) nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash,
struct nfsd_net *nn)
{ {
unsigned int strhashval; unsigned int strhashval;
struct nfs4_client_reclaim *crp; struct nfs4_client_reclaim *crp;
@ -6901,6 +6919,8 @@ nfs4_client_to_reclaim(struct xdr_netobj name, struct nfsd_net *nn)
list_add(&crp->cr_strhash, &nn->reclaim_str_hashtbl[strhashval]); list_add(&crp->cr_strhash, &nn->reclaim_str_hashtbl[strhashval]);
crp->cr_name.data = name.data; crp->cr_name.data = name.data;
crp->cr_name.len = name.len; crp->cr_name.len = name.len;
crp->cr_princhash.data = princhash.data;
crp->cr_princhash.len = princhash.len;
crp->cr_clp = NULL; crp->cr_clp = NULL;
nn->reclaim_str_hashtbl_size++; nn->reclaim_str_hashtbl_size++;
} }
@ -6912,6 +6932,7 @@ nfs4_remove_reclaim_record(struct nfs4_client_reclaim *crp, struct nfsd_net *nn)
{ {
list_del(&crp->cr_strhash); list_del(&crp->cr_strhash);
kfree(crp->cr_name.data); kfree(crp->cr_name.data);
kfree(crp->cr_princhash.data);
kfree(crp); kfree(crp);
nn->reclaim_str_hashtbl_size--; nn->reclaim_str_hashtbl_size--;
} }

View File

@ -49,6 +49,7 @@
#include "cache.h" #include "cache.h"
#include "netns.h" #include "netns.h"
#include "pnfs.h" #include "pnfs.h"
#include "filecache.h"
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
#include <linux/security.h> #include <linux/security.h>
@ -203,6 +204,13 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
return p; return p;
} }
static unsigned int compoundargs_bytes_left(struct nfsd4_compoundargs *argp)
{
unsigned int this = (char *)argp->end - (char *)argp->p;
return this + argp->pagelen;
}
static int zero_clientid(clientid_t *clid) static int zero_clientid(clientid_t *clid)
{ {
return (clid->cl_boot == 0) && (clid->cl_id == 0); return (clid->cl_boot == 0) && (clid->cl_id == 0);
@ -211,10 +219,10 @@ static int zero_clientid(clientid_t *clid)
/** /**
* svcxdr_tmpalloc - allocate memory to be freed after compound processing * svcxdr_tmpalloc - allocate memory to be freed after compound processing
* @argp: NFSv4 compound argument structure * @argp: NFSv4 compound argument structure
* @p: pointer to be freed (with kfree()) * @len: length of buffer to allocate
* *
* Marks @p to be freed when processing the compound operation * Allocates a buffer of size @len to be freed when processing the compound
* described in @argp finishes. * operation described in @argp finishes.
*/ */
static void * static void *
svcxdr_tmpalloc(struct nfsd4_compoundargs *argp, u32 len) svcxdr_tmpalloc(struct nfsd4_compoundargs *argp, u32 len)
@ -347,7 +355,12 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
READ_BUF(4); len += 4; READ_BUF(4); len += 4;
nace = be32_to_cpup(p++); nace = be32_to_cpup(p++);
if (nace > NFS4_ACL_MAX) if (nace > compoundargs_bytes_left(argp)/20)
/*
* Even with 4-byte names there wouldn't be
* space for that many aces; something fishy is
* going on:
*/
return nfserr_fbig; return nfserr_fbig;
*acl = svcxdr_tmpalloc(argp, nfs4_acl_bytes(nace)); *acl = svcxdr_tmpalloc(argp, nfs4_acl_bytes(nace));
@ -1418,7 +1431,6 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
struct nfsd4_create_session *sess) struct nfsd4_create_session *sess)
{ {
DECODE_HEAD; DECODE_HEAD;
u32 dummy;
READ_BUF(16); READ_BUF(16);
COPYMEM(&sess->clientid, 8); COPYMEM(&sess->clientid, 8);
@ -1427,7 +1439,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
/* Fore channel attrs */ /* Fore channel attrs */
READ_BUF(28); READ_BUF(28);
dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */ p++; /* headerpadsz is always 0 */
sess->fore_channel.maxreq_sz = be32_to_cpup(p++); sess->fore_channel.maxreq_sz = be32_to_cpup(p++);
sess->fore_channel.maxresp_sz = be32_to_cpup(p++); sess->fore_channel.maxresp_sz = be32_to_cpup(p++);
sess->fore_channel.maxresp_cached = be32_to_cpup(p++); sess->fore_channel.maxresp_cached = be32_to_cpup(p++);
@ -1444,7 +1456,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
/* Back channel attrs */ /* Back channel attrs */
READ_BUF(28); READ_BUF(28);
dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */ p++; /* headerpadsz is always 0 */
sess->back_channel.maxreq_sz = be32_to_cpup(p++); sess->back_channel.maxreq_sz = be32_to_cpup(p++);
sess->back_channel.maxresp_sz = be32_to_cpup(p++); sess->back_channel.maxresp_sz = be32_to_cpup(p++);
sess->back_channel.maxresp_cached = be32_to_cpup(p++); sess->back_channel.maxresp_cached = be32_to_cpup(p++);
@ -1736,7 +1748,6 @@ static __be32
nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
{ {
DECODE_HEAD; DECODE_HEAD;
unsigned int tmp;
status = nfsd4_decode_stateid(argp, &copy->cp_src_stateid); status = nfsd4_decode_stateid(argp, &copy->cp_src_stateid);
if (status) if (status)
@ -1751,7 +1762,7 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
p = xdr_decode_hyper(p, &copy->cp_count); p = xdr_decode_hyper(p, &copy->cp_count);
p++; /* ca_consecutive: we always do consecutive copies */ p++; /* ca_consecutive: we always do consecutive copies */
copy->cp_synchronous = be32_to_cpup(p++); copy->cp_synchronous = be32_to_cpup(p++);
tmp = be32_to_cpup(p); /* Source server list not supported */ /* tmp = be32_to_cpup(p); Source server list not supported */
DECODE_TAIL; DECODE_TAIL;
} }
@ -3217,9 +3228,8 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
if (!p) if (!p)
return nfserr_resource; return nfserr_resource;
encode_cinfo(p, &create->cr_cinfo); encode_cinfo(p, &create->cr_cinfo);
nfserr = nfsd4_encode_bitmap(xdr, create->cr_bmval[0], return nfsd4_encode_bitmap(xdr, create->cr_bmval[0],
create->cr_bmval[1], create->cr_bmval[2]); create->cr_bmval[1], create->cr_bmval[2]);
return 0;
} }
static __be32 static __be32
@ -3462,7 +3472,7 @@ static __be32 nfsd4_encode_splice_read(
len = maxcount; len = maxcount;
nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp, nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp,
file, read->rd_offset, &maxcount); file, read->rd_offset, &maxcount, &eof);
read->rd_length = maxcount; read->rd_length = maxcount;
if (nfserr) { if (nfserr) {
/* /*
@ -3474,9 +3484,6 @@ static __be32 nfsd4_encode_splice_read(
return nfserr; return nfserr;
} }
eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
d_inode(read->rd_fhp->fh_dentry)->i_size);
*(p++) = htonl(eof); *(p++) = htonl(eof);
*(p++) = htonl(maxcount); *(p++) = htonl(maxcount);
@ -3547,15 +3554,13 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
len = maxcount; len = maxcount;
nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset, nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
resp->rqstp->rq_vec, read->rd_vlen, &maxcount); resp->rqstp->rq_vec, read->rd_vlen, &maxcount,
&eof);
read->rd_length = maxcount; read->rd_length = maxcount;
if (nfserr) if (nfserr)
return nfserr; return nfserr;
xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3)); xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
d_inode(read->rd_fhp->fh_dentry)->i_size);
tmp = htonl(eof); tmp = htonl(eof);
write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4); write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4);
tmp = htonl(maxcount); tmp = htonl(maxcount);
@ -3574,11 +3579,14 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
{ {
unsigned long maxcount; unsigned long maxcount;
struct xdr_stream *xdr = &resp->xdr; struct xdr_stream *xdr = &resp->xdr;
struct file *file = read->rd_filp; struct file *file;
int starting_len = xdr->buf->len; int starting_len = xdr->buf->len;
struct raparms *ra = NULL;
__be32 *p; __be32 *p;
if (nfserr)
return nfserr;
file = read->rd_nf->nf_file;
p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */ p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
if (!p) { if (!p) {
WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)); WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags));
@ -3596,18 +3604,12 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
(xdr->buf->buflen - xdr->buf->len)); (xdr->buf->buflen - xdr->buf->len));
maxcount = min_t(unsigned long, maxcount, read->rd_length); maxcount = min_t(unsigned long, maxcount, read->rd_length);
if (read->rd_tmp_file)
ra = nfsd_init_raparms(file);
if (file->f_op->splice_read && if (file->f_op->splice_read &&
test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
else else
nfserr = nfsd4_encode_readv(resp, read, file, maxcount); nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
if (ra)
nfsd_put_raparams(file, ra);
if (nfserr) if (nfserr)
xdr_truncate_encode(xdr, starting_len); xdr_truncate_encode(xdr, starting_len);

View File

@ -1476,6 +1476,7 @@ static __net_init int nfsd_init_net(struct net *net)
atomic_set(&nn->ntf_refcnt, 0); atomic_set(&nn->ntf_refcnt, 0);
init_waitqueue_head(&nn->ntf_wq); init_waitqueue_head(&nn->ntf_wq);
seqlock_init(&nn->boot_lock);
mnt = vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL); mnt = vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL);
if (IS_ERR(mnt)) { if (IS_ERR(mnt)) {

View File

@ -172,6 +172,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
struct nfsd_readargs *argp = rqstp->rq_argp; struct nfsd_readargs *argp = rqstp->rq_argp;
struct nfsd_readres *resp = rqstp->rq_resp; struct nfsd_readres *resp = rqstp->rq_resp;
__be32 nfserr; __be32 nfserr;
u32 eof;
dprintk("nfsd: READ %s %d bytes at %d\n", dprintk("nfsd: READ %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh), SVCFH_fmt(&argp->fh),
@ -195,7 +196,8 @@ nfsd_proc_read(struct svc_rqst *rqstp)
nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh),
argp->offset, argp->offset,
rqstp->rq_vec, argp->vlen, rqstp->rq_vec, argp->vlen,
&resp->count); &resp->count,
&eof);
if (nfserr) return nfserr; if (nfserr) return nfserr;
return fh_getattr(&resp->fh, &resp->stat); return fh_getattr(&resp->fh, &resp->stat);

View File

@ -27,6 +27,7 @@
#include "cache.h" #include "cache.h"
#include "vfs.h" #include "vfs.h"
#include "netns.h" #include "netns.h"
#include "filecache.h"
#define NFSDDBG_FACILITY NFSDDBG_SVC #define NFSDDBG_FACILITY NFSDDBG_SVC
@ -313,22 +314,17 @@ static int nfsd_startup_generic(int nrservs)
if (nfsd_users++) if (nfsd_users++)
return 0; return 0;
/* ret = nfsd_file_cache_init();
* Readahead param cache - will no-op if it already exists.
* (Note therefore results will be suboptimal if number of
* threads is modified after nfsd start.)
*/
ret = nfsd_racache_init(2*nrservs);
if (ret) if (ret)
goto dec_users; goto dec_users;
ret = nfs4_state_start(); ret = nfs4_state_start();
if (ret) if (ret)
goto out_racache; goto out_file_cache;
return 0; return 0;
out_racache: out_file_cache:
nfsd_racache_shutdown(); nfsd_file_cache_shutdown();
dec_users: dec_users:
nfsd_users--; nfsd_users--;
return ret; return ret;
@ -340,7 +336,7 @@ static void nfsd_shutdown_generic(void)
return; return;
nfs4_state_shutdown(); nfs4_state_shutdown();
nfsd_racache_shutdown(); nfsd_file_cache_shutdown();
} }
static bool nfsd_needs_lockd(struct nfsd_net *nn) static bool nfsd_needs_lockd(struct nfsd_net *nn)
@ -348,6 +344,35 @@ static bool nfsd_needs_lockd(struct nfsd_net *nn)
return nfsd_vers(nn, 2, NFSD_TEST) || nfsd_vers(nn, 3, NFSD_TEST); return nfsd_vers(nn, 2, NFSD_TEST) || nfsd_vers(nn, 3, NFSD_TEST);
} }
void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn)
{
int seq = 0;
do {
read_seqbegin_or_lock(&nn->boot_lock, &seq);
/*
* This is opaque to client, so no need to byte-swap. Use
* __force to keep sparse happy. y2038 time_t overflow is
* irrelevant in this usage
*/
verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec;
} while (need_seqretry(&nn->boot_lock, seq));
done_seqretry(&nn->boot_lock, seq);
}
static void nfsd_reset_boot_verifier_locked(struct nfsd_net *nn)
{
ktime_get_real_ts64(&nn->nfssvc_boot);
}
void nfsd_reset_boot_verifier(struct nfsd_net *nn)
{
write_seqlock(&nn->boot_lock);
nfsd_reset_boot_verifier_locked(nn);
write_sequnlock(&nn->boot_lock);
}
static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cred) static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cred)
{ {
struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@ -391,6 +416,7 @@ static void nfsd_shutdown_net(struct net *net)
{ {
struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct nfsd_net *nn = net_generic(net, nfsd_net_id);
nfsd_file_cache_purge(net);
nfs4_state_shutdown_net(net); nfs4_state_shutdown_net(net);
if (nn->lockd_up) { if (nn->lockd_up) {
lockd_down(net); lockd_down(net);
@ -599,7 +625,7 @@ int nfsd_create_serv(struct net *net)
#endif #endif
} }
atomic_inc(&nn->ntf_refcnt); atomic_inc(&nn->ntf_refcnt);
ktime_get_real_ts64(&nn->nfssvc_boot); /* record boot time */ nfsd_reset_boot_verifier(nn);
return 0; return 0;
} }

View File

@ -378,6 +378,7 @@ struct nfs4_client_reclaim {
struct list_head cr_strhash; /* hash by cr_name */ struct list_head cr_strhash; /* hash by cr_name */
struct nfs4_client *cr_clp; /* pointer to associated clp */ struct nfs4_client *cr_clp; /* pointer to associated clp */
struct xdr_netobj cr_name; /* recovery dir name */ struct xdr_netobj cr_name; /* recovery dir name */
struct xdr_netobj cr_princhash;
}; };
/* A reasonable value for REPLAY_ISIZE was estimated as follows: /* A reasonable value for REPLAY_ISIZE was estimated as follows:
@ -506,7 +507,7 @@ struct nfs4_file {
}; };
struct list_head fi_clnt_odstate; struct list_head fi_clnt_odstate;
/* One each for O_RDONLY, O_WRONLY, O_RDWR: */ /* One each for O_RDONLY, O_WRONLY, O_RDWR: */
struct file * fi_fds[3]; struct nfsd_file *fi_fds[3];
/* /*
* Each open or lock stateid contributes 0-4 to the counts * Each open or lock stateid contributes 0-4 to the counts
* below depending on which bits are set in st_access_bitmap: * below depending on which bits are set in st_access_bitmap:
@ -516,7 +517,7 @@ struct nfs4_file {
*/ */
atomic_t fi_access[2]; atomic_t fi_access[2];
u32 fi_share_deny; u32 fi_share_deny;
struct file *fi_deleg_file; struct nfsd_file *fi_deleg_file;
int fi_delegees; int fi_delegees;
struct knfsd_fh fi_fhandle; struct knfsd_fh fi_fhandle;
bool fi_had_conflict; bool fi_had_conflict;
@ -565,7 +566,7 @@ struct nfs4_layout_stateid {
spinlock_t ls_lock; spinlock_t ls_lock;
struct list_head ls_layouts; struct list_head ls_layouts;
u32 ls_layout_type; u32 ls_layout_type;
struct file *ls_file; struct nfsd_file *ls_file;
struct nfsd4_callback ls_recall; struct nfsd4_callback ls_recall;
stateid_t ls_recall_sid; stateid_t ls_recall_sid;
bool ls_recalled; bool ls_recalled;
@ -616,7 +617,7 @@ struct nfsd4_copy;
extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate, struct svc_fh *fhp, struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
stateid_t *stateid, int flags, struct file **filp, bool *tmp_file); stateid_t *stateid, int flags, struct nfsd_file **filp);
__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
stateid_t *stateid, unsigned char typemask, stateid_t *stateid, unsigned char typemask,
struct nfs4_stid **s, struct nfsd_net *nn); struct nfs4_stid **s, struct nfsd_net *nn);
@ -645,7 +646,7 @@ extern void nfsd4_shutdown_callback(struct nfs4_client *);
extern void nfsd4_shutdown_copy(struct nfs4_client *clp); extern void nfsd4_shutdown_copy(struct nfs4_client *clp);
extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp); extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp);
extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name, extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name,
struct nfsd_net *nn); struct xdr_netobj princhash, struct nfsd_net *nn);
extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn); extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn);
struct nfs4_file *find_file(struct knfsd_fh *fh); struct nfs4_file *find_file(struct knfsd_fh *fh);
@ -657,7 +658,7 @@ static inline void get_nfs4_file(struct nfs4_file *fi)
{ {
refcount_inc(&fi->fi_ref); refcount_inc(&fi->fi_ref);
} }
struct file *find_any_file(struct nfs4_file *f); struct nfsd_file *find_any_file(struct nfs4_file *f);
/* grace period management */ /* grace period management */
void nfsd4_end_grace(struct nfsd_net *nn); void nfsd4_end_grace(struct nfsd_net *nn);

View File

@ -126,6 +126,8 @@ DEFINE_NFSD_ERR_EVENT(read_err);
DEFINE_NFSD_ERR_EVENT(write_err); DEFINE_NFSD_ERR_EVENT(write_err);
#include "state.h" #include "state.h"
#include "filecache.h"
#include "vfs.h"
DECLARE_EVENT_CLASS(nfsd_stateid_class, DECLARE_EVENT_CLASS(nfsd_stateid_class,
TP_PROTO(stateid_t *stp), TP_PROTO(stateid_t *stp),
@ -164,6 +166,144 @@ DEFINE_STATEID_EVENT(layout_recall_done);
DEFINE_STATEID_EVENT(layout_recall_fail); DEFINE_STATEID_EVENT(layout_recall_fail);
DEFINE_STATEID_EVENT(layout_recall_release); DEFINE_STATEID_EVENT(layout_recall_release);
#define show_nf_flags(val) \
__print_flags(val, "|", \
{ 1 << NFSD_FILE_HASHED, "HASHED" }, \
{ 1 << NFSD_FILE_PENDING, "PENDING" }, \
{ 1 << NFSD_FILE_BREAK_READ, "BREAK_READ" }, \
{ 1 << NFSD_FILE_BREAK_WRITE, "BREAK_WRITE" }, \
{ 1 << NFSD_FILE_REFERENCED, "REFERENCED"})
/* FIXME: This should probably be fleshed out in the future. */
#define show_nf_may(val) \
__print_flags(val, "|", \
{ NFSD_MAY_READ, "READ" }, \
{ NFSD_MAY_WRITE, "WRITE" }, \
{ NFSD_MAY_NOT_BREAK_LEASE, "NOT_BREAK_LEASE" })
DECLARE_EVENT_CLASS(nfsd_file_class,
TP_PROTO(struct nfsd_file *nf),
TP_ARGS(nf),
TP_STRUCT__entry(
__field(unsigned int, nf_hashval)
__field(void *, nf_inode)
__field(int, nf_ref)
__field(unsigned long, nf_flags)
__field(unsigned char, nf_may)
__field(struct file *, nf_file)
),
TP_fast_assign(
__entry->nf_hashval = nf->nf_hashval;
__entry->nf_inode = nf->nf_inode;
__entry->nf_ref = atomic_read(&nf->nf_ref);
__entry->nf_flags = nf->nf_flags;
__entry->nf_may = nf->nf_may;
__entry->nf_file = nf->nf_file;
),
TP_printk("hash=0x%x inode=0x%p ref=%d flags=%s may=%s file=%p",
__entry->nf_hashval,
__entry->nf_inode,
__entry->nf_ref,
show_nf_flags(__entry->nf_flags),
show_nf_may(__entry->nf_may),
__entry->nf_file)
)
#define DEFINE_NFSD_FILE_EVENT(name) \
DEFINE_EVENT(nfsd_file_class, name, \
TP_PROTO(struct nfsd_file *nf), \
TP_ARGS(nf))
DEFINE_NFSD_FILE_EVENT(nfsd_file_alloc);
DEFINE_NFSD_FILE_EVENT(nfsd_file_put_final);
DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash);
DEFINE_NFSD_FILE_EVENT(nfsd_file_put);
DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_release_locked);
TRACE_EVENT(nfsd_file_acquire,
TP_PROTO(struct svc_rqst *rqstp, unsigned int hash,
struct inode *inode, unsigned int may_flags,
struct nfsd_file *nf, __be32 status),
TP_ARGS(rqstp, hash, inode, may_flags, nf, status),
TP_STRUCT__entry(
__field(__be32, xid)
__field(unsigned int, hash)
__field(void *, inode)
__field(unsigned int, may_flags)
__field(int, nf_ref)
__field(unsigned long, nf_flags)
__field(unsigned char, nf_may)
__field(struct file *, nf_file)
__field(__be32, status)
),
TP_fast_assign(
__entry->xid = rqstp->rq_xid;
__entry->hash = hash;
__entry->inode = inode;
__entry->may_flags = may_flags;
__entry->nf_ref = nf ? atomic_read(&nf->nf_ref) : 0;
__entry->nf_flags = nf ? nf->nf_flags : 0;
__entry->nf_may = nf ? nf->nf_may : 0;
__entry->nf_file = nf ? nf->nf_file : NULL;
__entry->status = status;
),
TP_printk("xid=0x%x hash=0x%x inode=0x%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=0x%p status=%u",
be32_to_cpu(__entry->xid), __entry->hash, __entry->inode,
show_nf_may(__entry->may_flags), __entry->nf_ref,
show_nf_flags(__entry->nf_flags),
show_nf_may(__entry->nf_may), __entry->nf_file,
be32_to_cpu(__entry->status))
);
DECLARE_EVENT_CLASS(nfsd_file_search_class,
TP_PROTO(struct inode *inode, unsigned int hash, int found),
TP_ARGS(inode, hash, found),
TP_STRUCT__entry(
__field(struct inode *, inode)
__field(unsigned int, hash)
__field(int, found)
),
TP_fast_assign(
__entry->inode = inode;
__entry->hash = hash;
__entry->found = found;
),
TP_printk("hash=0x%x inode=0x%p found=%d", __entry->hash,
__entry->inode, __entry->found)
);
#define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \
DEFINE_EVENT(nfsd_file_search_class, name, \
TP_PROTO(struct inode *inode, unsigned int hash, int found), \
TP_ARGS(inode, hash, found))
DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync);
DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode);
DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_is_cached);
TRACE_EVENT(nfsd_file_fsnotify_handle_event,
TP_PROTO(struct inode *inode, u32 mask),
TP_ARGS(inode, mask),
TP_STRUCT__entry(
__field(struct inode *, inode)
__field(unsigned int, nlink)
__field(umode_t, mode)
__field(u32, mask)
),
TP_fast_assign(
__entry->inode = inode;
__entry->nlink = inode->i_nlink;
__entry->mode = inode->i_mode;
__entry->mask = mask;
),
TP_printk("inode=0x%p nlink=%u mode=0%ho mask=0x%x", __entry->inode,
__entry->nlink, __entry->mode, __entry->mask)
);
#endif /* _NFSD_TRACE_H */ #endif /* _NFSD_TRACE_H */
#undef TRACE_INCLUDE_PATH #undef TRACE_INCLUDE_PATH

View File

@ -44,38 +44,11 @@
#include "nfsd.h" #include "nfsd.h"
#include "vfs.h" #include "vfs.h"
#include "filecache.h"
#include "trace.h" #include "trace.h"
#define NFSDDBG_FACILITY NFSDDBG_FILEOP #define NFSDDBG_FACILITY NFSDDBG_FILEOP
/*
* This is a cache of readahead params that help us choose the proper
* readahead strategy. Initially, we set all readahead parameters to 0
* and let the VFS handle things.
* If you increase the number of cached files very much, you'll need to
* add a hash table here.
*/
struct raparms {
struct raparms *p_next;
unsigned int p_count;
ino_t p_ino;
dev_t p_dev;
int p_set;
struct file_ra_state p_ra;
unsigned int p_hindex;
};
struct raparm_hbucket {
struct raparms *pb_head;
spinlock_t pb_lock;
} ____cacheline_aligned_in_smp;
#define RAPARM_HASH_BITS 4
#define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS)
#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
/* /*
* Called from nfsd_lookup and encode_dirent. Check if we have crossed * Called from nfsd_lookup and encode_dirent. Check if we have crossed
* a mount point. * a mount point.
@ -699,7 +672,7 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
} }
#endif /* CONFIG_NFSD_V3 */ #endif /* CONFIG_NFSD_V3 */
static int nfsd_open_break_lease(struct inode *inode, int access) int nfsd_open_break_lease(struct inode *inode, int access)
{ {
unsigned int mode; unsigned int mode;
@ -715,8 +688,8 @@ static int nfsd_open_break_lease(struct inode *inode, int access)
* and additional flags. * and additional flags.
* N.B. After this call fhp needs an fh_put * N.B. After this call fhp needs an fh_put
*/ */
__be32 static __be32
nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, __nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
int may_flags, struct file **filp) int may_flags, struct file **filp)
{ {
struct path path; struct path path;
@ -726,25 +699,6 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
__be32 err; __be32 err;
int host_err = 0; int host_err = 0;
validate_process_creds();
/*
* If we get here, then the client has already done an "open",
* and (hopefully) checked permission - so allow OWNER_OVERRIDE
* in case a chmod has now revoked permission.
*
* Arguably we should also allow the owner override for
* directories, but we never have and it doesn't seem to have
* caused anyone a problem. If we were to change this, note
* also that our filldir callbacks would need a variant of
* lookup_one_len that doesn't check permissions.
*/
if (type == S_IFREG)
may_flags |= NFSD_MAY_OWNER_OVERRIDE;
err = fh_verify(rqstp, fhp, type, may_flags);
if (err)
goto out;
path.mnt = fhp->fh_export->ex_path.mnt; path.mnt = fhp->fh_export->ex_path.mnt;
path.dentry = fhp->fh_dentry; path.dentry = fhp->fh_dentry;
inode = d_inode(path.dentry); inode = d_inode(path.dentry);
@ -798,67 +752,46 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
out_nfserr: out_nfserr:
err = nfserrno(host_err); err = nfserrno(host_err);
out: out:
return err;
}
__be32
nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
int may_flags, struct file **filp)
{
__be32 err;
validate_process_creds();
/*
* If we get here, then the client has already done an "open",
* and (hopefully) checked permission - so allow OWNER_OVERRIDE
* in case a chmod has now revoked permission.
*
* Arguably we should also allow the owner override for
* directories, but we never have and it doesn't seem to have
* caused anyone a problem. If we were to change this, note
* also that our filldir callbacks would need a variant of
* lookup_one_len that doesn't check permissions.
*/
if (type == S_IFREG)
may_flags |= NFSD_MAY_OWNER_OVERRIDE;
err = fh_verify(rqstp, fhp, type, may_flags);
if (!err)
err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
validate_process_creds(); validate_process_creds();
return err; return err;
} }
struct raparms * __be32
nfsd_init_raparms(struct file *file) nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
int may_flags, struct file **filp)
{ {
struct inode *inode = file_inode(file); __be32 err;
dev_t dev = inode->i_sb->s_dev;
ino_t ino = inode->i_ino;
struct raparms *ra, **rap, **frap = NULL;
int depth = 0;
unsigned int hash;
struct raparm_hbucket *rab;
hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK; validate_process_creds();
rab = &raparm_hash[hash]; err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
validate_process_creds();
spin_lock(&rab->pb_lock); return err;
for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) {
if (ra->p_ino == ino && ra->p_dev == dev)
goto found;
depth++;
if (ra->p_count == 0)
frap = rap;
}
depth = nfsdstats.ra_size;
if (!frap) {
spin_unlock(&rab->pb_lock);
return NULL;
}
rap = frap;
ra = *frap;
ra->p_dev = dev;
ra->p_ino = ino;
ra->p_set = 0;
ra->p_hindex = hash;
found:
if (rap != &rab->pb_head) {
*rap = ra->p_next;
ra->p_next = rab->pb_head;
rab->pb_head = ra;
}
ra->p_count++;
nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
spin_unlock(&rab->pb_lock);
if (ra->p_set)
file->f_ra = ra->p_ra;
return ra;
}
void nfsd_put_raparams(struct file *file, struct raparms *ra)
{
struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
spin_lock(&rab->pb_lock);
ra->p_ra = file->f_ra;
ra->p_set = 1;
ra->p_count--;
spin_unlock(&rab->pb_lock);
} }
/* /*
@ -901,12 +834,23 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
return __splice_from_pipe(pipe, sd, nfsd_splice_actor); return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
} }
static u32 nfsd_eof_on_read(struct file *file, loff_t offset, ssize_t len,
size_t expected)
{
if (expected != 0 && len == 0)
return 1;
if (offset+len >= i_size_read(file_inode(file)))
return 1;
return 0;
}
static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp, static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset, struct file *file, loff_t offset,
unsigned long *count, int host_err) unsigned long *count, u32 *eof, ssize_t host_err)
{ {
if (host_err >= 0) { if (host_err >= 0) {
nfsdstats.io_read += host_err; nfsdstats.io_read += host_err;
*eof = nfsd_eof_on_read(file, offset, host_err, *count);
*count = host_err; *count = host_err;
fsnotify_access(file); fsnotify_access(file);
trace_nfsd_read_io_done(rqstp, fhp, offset, *count); trace_nfsd_read_io_done(rqstp, fhp, offset, *count);
@ -918,7 +862,8 @@ static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
} }
__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset, unsigned long *count) struct file *file, loff_t offset, unsigned long *count,
u32 *eof)
{ {
struct splice_desc sd = { struct splice_desc sd = {
.len = 0, .len = 0,
@ -926,25 +871,27 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
.pos = offset, .pos = offset,
.u.data = rqstp, .u.data = rqstp,
}; };
int host_err; ssize_t host_err;
trace_nfsd_read_splice(rqstp, fhp, offset, *count); trace_nfsd_read_splice(rqstp, fhp, offset, *count);
rqstp->rq_next_page = rqstp->rq_respages + 1; rqstp->rq_next_page = rqstp->rq_respages + 1;
host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err); return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
} }
__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset, struct file *file, loff_t offset,
struct kvec *vec, int vlen, unsigned long *count) struct kvec *vec, int vlen, unsigned long *count,
u32 *eof)
{ {
struct iov_iter iter; struct iov_iter iter;
int host_err; loff_t ppos = offset;
ssize_t host_err;
trace_nfsd_read_vector(rqstp, fhp, offset, *count); trace_nfsd_read_vector(rqstp, fhp, offset, *count);
iov_iter_kvec(&iter, READ, vec, vlen, *count); iov_iter_kvec(&iter, READ, vec, vlen, *count);
host_err = vfs_iter_read(file, &iter, &offset, 0); host_err = vfs_iter_read(file, &iter, &ppos, 0);
return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err); return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
} }
/* /*
@ -1025,8 +972,12 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
nfsdstats.io_write += *cnt; nfsdstats.io_write += *cnt;
fsnotify_modify(file); fsnotify_modify(file);
if (stable && use_wgather) if (stable && use_wgather) {
host_err = wait_for_concurrent_writes(file); host_err = wait_for_concurrent_writes(file);
if (host_err < 0)
nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
nfsd_net_id));
}
out_nfserr: out_nfserr:
if (host_err >= 0) { if (host_err >= 0) {
@ -1047,27 +998,25 @@ out_nfserr:
* N.B. After this call fhp needs an fh_put * N.B. After this call fhp needs an fh_put
*/ */
__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
loff_t offset, struct kvec *vec, int vlen, unsigned long *count) loff_t offset, struct kvec *vec, int vlen, unsigned long *count,
u32 *eof)
{ {
struct nfsd_file *nf;
struct file *file; struct file *file;
struct raparms *ra;
__be32 err; __be32 err;
trace_nfsd_read_start(rqstp, fhp, offset, *count); trace_nfsd_read_start(rqstp, fhp, offset, *count);
err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
if (err) if (err)
return err; return err;
ra = nfsd_init_raparms(file); file = nf->nf_file;
if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags)) if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
err = nfsd_splice_read(rqstp, fhp, file, offset, count); err = nfsd_splice_read(rqstp, fhp, file, offset, count, eof);
else else
err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count); err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count, eof);
if (ra) nfsd_file_put(nf);
nfsd_put_raparams(file, ra);
fput(file);
trace_nfsd_read_done(rqstp, fhp, offset, *count); trace_nfsd_read_done(rqstp, fhp, offset, *count);
@ -1083,17 +1032,18 @@ __be32
nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
struct kvec *vec, int vlen, unsigned long *cnt, int stable) struct kvec *vec, int vlen, unsigned long *cnt, int stable)
{ {
struct file *file = NULL; struct nfsd_file *nf;
__be32 err = 0; __be32 err;
trace_nfsd_write_start(rqstp, fhp, offset, *cnt); trace_nfsd_write_start(rqstp, fhp, offset, *cnt);
err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file); err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_WRITE, &nf);
if (err) if (err)
goto out; goto out;
err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable); err = nfsd_vfs_write(rqstp, fhp, nf->nf_file, offset, vec,
fput(file); vlen, cnt, stable);
nfsd_file_put(nf);
out: out:
trace_nfsd_write_done(rqstp, fhp, offset, *cnt); trace_nfsd_write_done(rqstp, fhp, offset, *cnt);
return err; return err;
@ -1113,9 +1063,9 @@ __be32
nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
loff_t offset, unsigned long count) loff_t offset, unsigned long count)
{ {
struct file *file; struct nfsd_file *nf;
loff_t end = LLONG_MAX; loff_t end = LLONG_MAX;
__be32 err = nfserr_inval; __be32 err = nfserr_inval;
if (offset < 0) if (offset < 0)
goto out; goto out;
@ -1125,20 +1075,27 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out; goto out;
} }
err = nfsd_open(rqstp, fhp, S_IFREG, err = nfsd_file_acquire(rqstp, fhp,
NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &file); NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf);
if (err) if (err)
goto out; goto out;
if (EX_ISSYNC(fhp->fh_export)) { if (EX_ISSYNC(fhp->fh_export)) {
int err2 = vfs_fsync_range(file, offset, end, 0); int err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
if (err2 != -EINVAL) switch (err2) {
err = nfserrno(err2); case 0:
else break;
case -EINVAL:
err = nfserr_notsupp; err = nfserr_notsupp;
break;
default:
err = nfserrno(err2);
nfsd_reset_boot_verifier(net_generic(nf->nf_net,
nfsd_net_id));
}
} }
fput(file); nfsd_file_put(nf);
out: out:
return err; return err;
} }
@ -1659,6 +1616,26 @@ out_nfserr:
goto out_unlock; goto out_unlock;
} }
static void
nfsd_close_cached_files(struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
if (inode && S_ISREG(inode->i_mode))
nfsd_file_close_inode_sync(inode);
}
static bool
nfsd_has_cached_files(struct dentry *dentry)
{
bool ret = false;
struct inode *inode = d_inode(dentry);
if (inode && S_ISREG(inode->i_mode))
ret = nfsd_file_is_cached(inode);
return ret;
}
/* /*
* Rename a file * Rename a file
* N.B. After this call _both_ ffhp and tfhp need an fh_put * N.B. After this call _both_ ffhp and tfhp need an fh_put
@ -1671,6 +1648,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
struct inode *fdir, *tdir; struct inode *fdir, *tdir;
__be32 err; __be32 err;
int host_err; int host_err;
bool has_cached = false;
err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE); err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
if (err) if (err)
@ -1689,6 +1667,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen)) if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
goto out; goto out;
retry:
host_err = fh_want_write(ffhp); host_err = fh_want_write(ffhp);
if (host_err) { if (host_err) {
err = nfserrno(host_err); err = nfserrno(host_err);
@ -1728,11 +1707,16 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry) if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry)
goto out_dput_new; goto out_dput_new;
host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0); if (nfsd_has_cached_files(ndentry)) {
if (!host_err) { has_cached = true;
host_err = commit_metadata(tfhp); goto out_dput_old;
if (!host_err) } else {
host_err = commit_metadata(ffhp); host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0);
if (!host_err) {
host_err = commit_metadata(tfhp);
if (!host_err)
host_err = commit_metadata(ffhp);
}
} }
out_dput_new: out_dput_new:
dput(ndentry); dput(ndentry);
@ -1745,12 +1729,26 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
* as that would do the wrong thing if the two directories * as that would do the wrong thing if the two directories
* were the same, so again we do it by hand. * were the same, so again we do it by hand.
*/ */
fill_post_wcc(ffhp); if (!has_cached) {
fill_post_wcc(tfhp); fill_post_wcc(ffhp);
fill_post_wcc(tfhp);
}
unlock_rename(tdentry, fdentry); unlock_rename(tdentry, fdentry);
ffhp->fh_locked = tfhp->fh_locked = false; ffhp->fh_locked = tfhp->fh_locked = false;
fh_drop_write(ffhp); fh_drop_write(ffhp);
/*
* If the target dentry has cached open files, then we need to try to
* close them prior to doing the rename. Flushing delayed fput
* shouldn't be done with locks held however, so we delay it until this
* point and then reattempt the whole shebang.
*/
if (has_cached) {
has_cached = false;
nfsd_close_cached_files(ndentry);
dput(ndentry);
goto retry;
}
out: out:
return err; return err;
} }
@ -1797,10 +1795,13 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
if (!type) if (!type)
type = d_inode(rdentry)->i_mode & S_IFMT; type = d_inode(rdentry)->i_mode & S_IFMT;
if (type != S_IFDIR) if (type != S_IFDIR) {
nfsd_close_cached_files(rdentry);
host_err = vfs_unlink(dirp, rdentry, NULL); host_err = vfs_unlink(dirp, rdentry, NULL);
else } else {
host_err = vfs_rmdir(dirp, rdentry); host_err = vfs_rmdir(dirp, rdentry);
}
if (!host_err) if (!host_err)
host_err = commit_metadata(fhp); host_err = commit_metadata(fhp);
dput(rdentry); dput(rdentry);
@ -2074,63 +2075,3 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
return err? nfserrno(err) : 0; return err? nfserrno(err) : 0;
} }
void
nfsd_racache_shutdown(void)
{
struct raparms *raparm, *last_raparm;
unsigned int i;
dprintk("nfsd: freeing readahead buffers.\n");
for (i = 0; i < RAPARM_HASH_SIZE; i++) {
raparm = raparm_hash[i].pb_head;
while(raparm) {
last_raparm = raparm;
raparm = raparm->p_next;
kfree(last_raparm);
}
raparm_hash[i].pb_head = NULL;
}
}
/*
* Initialize readahead param cache
*/
int
nfsd_racache_init(int cache_size)
{
int i;
int j = 0;
int nperbucket;
struct raparms **raparm = NULL;
if (raparm_hash[0].pb_head)
return 0;
nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE);
nperbucket = max(2, nperbucket);
cache_size = nperbucket * RAPARM_HASH_SIZE;
dprintk("nfsd: allocating %d readahead buffers.\n", cache_size);
for (i = 0; i < RAPARM_HASH_SIZE; i++) {
spin_lock_init(&raparm_hash[i].pb_lock);
raparm = &raparm_hash[i].pb_head;
for (j = 0; j < nperbucket; j++) {
*raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL);
if (!*raparm)
goto out_nomem;
raparm = &(*raparm)->p_next;
}
*raparm = NULL;
}
nfsdstats.ra_size = cache_size;
return 0;
out_nomem:
dprintk("nfsd: kmalloc failed, freeing readahead buffers\n");
nfsd_racache_shutdown();
return -ENOMEM;
}

View File

@ -40,8 +40,6 @@
typedef int (*nfsd_filldir_t)(void *, const char *, int, loff_t, u64, unsigned); typedef int (*nfsd_filldir_t)(void *, const char *, int, loff_t, u64, unsigned);
/* nfsd/vfs.c */ /* nfsd/vfs.c */
int nfsd_racache_init(int);
void nfsd_racache_shutdown(void);
int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
struct svc_export **expp); struct svc_export **expp);
__be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *, __be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *,
@ -75,18 +73,23 @@ __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *,
__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
loff_t, unsigned long); loff_t, unsigned long);
#endif /* CONFIG_NFSD_V3 */ #endif /* CONFIG_NFSD_V3 */
int nfsd_open_break_lease(struct inode *, int);
__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t, __be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
int, struct file **); int, struct file **);
struct raparms; __be32 nfsd_open_verified(struct svc_rqst *, struct svc_fh *, umode_t,
int, struct file **);
__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset, struct file *file, loff_t offset,
unsigned long *count); unsigned long *count,
u32 *eof);
__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset, struct file *file, loff_t offset,
struct kvec *vec, int vlen, struct kvec *vec, int vlen,
unsigned long *count); unsigned long *count,
u32 *eof);
__be32 nfsd_read(struct svc_rqst *, struct svc_fh *, __be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
loff_t, struct kvec *, int, unsigned long *); loff_t, struct kvec *, int, unsigned long *,
u32 *eof);
__be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t, __be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
struct kvec *, int, unsigned long *, int); struct kvec *, int, unsigned long *, int);
__be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
@ -115,9 +118,6 @@ __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *,
__be32 nfsd_permission(struct svc_rqst *, struct svc_export *, __be32 nfsd_permission(struct svc_rqst *, struct svc_export *,
struct dentry *, int); struct dentry *, int);
struct raparms *nfsd_init_raparms(struct file *file);
void nfsd_put_raparams(struct file *file, struct raparms *ra);
static inline int fh_want_write(struct svc_fh *fh) static inline int fh_want_write(struct svc_fh *fh)
{ {
int ret; int ret;
@ -152,23 +152,4 @@ static inline int nfsd_create_is_exclusive(int createmode)
|| createmode == NFS4_CREATE_EXCLUSIVE4_1; || createmode == NFS4_CREATE_EXCLUSIVE4_1;
} }
static inline bool nfsd_eof_on_read(long requested, long read,
loff_t offset, loff_t size)
{
/* We assume a short read means eof: */
if (requested > read)
return true;
/*
* A non-short read might also reach end of file. The spec
* still requires us to set eof in that case.
*
* Further operations may have modified the file size since
* the read, so the following check is not atomic with the read.
* We've only seen that cause a problem for a client in the case
* where the read returned a count of 0 without setting eof.
* That case was fixed by the addition of the above check.
*/
return (offset + read >= size);
}
#endif /* LINUX_NFSD_VFS_H */ #endif /* LINUX_NFSD_VFS_H */

View File

@ -151,7 +151,7 @@ struct nfsd3_readres {
__be32 status; __be32 status;
struct svc_fh fh; struct svc_fh fh;
unsigned long count; unsigned long count;
int eof; __u32 eof;
}; };
struct nfsd3_writeres { struct nfsd3_writeres {

View File

@ -273,15 +273,14 @@ struct nfsd4_open_downgrade {
struct nfsd4_read { struct nfsd4_read {
stateid_t rd_stateid; /* request */ stateid_t rd_stateid; /* request */
u64 rd_offset; /* request */ u64 rd_offset; /* request */
u32 rd_length; /* request */ u32 rd_length; /* request */
int rd_vlen; int rd_vlen;
struct file *rd_filp; struct nfsd_file *rd_nf;
bool rd_tmp_file;
struct svc_rqst *rd_rqstp; /* response */ struct svc_rqst *rd_rqstp; /* response */
struct svc_fh * rd_fhp; /* response */ struct svc_fh *rd_fhp; /* response */
}; };
struct nfsd4_readdir { struct nfsd4_readdir {
@ -538,8 +537,8 @@ struct nfsd4_copy {
struct nfs4_client *cp_clp; struct nfs4_client *cp_clp;
struct file *file_src; struct nfsd_file *nf_src;
struct file *file_dst; struct nfsd_file *nf_dst;
stateid_t cp_stateid; stateid_t cp_stateid;

View File

@ -54,8 +54,6 @@ static inline void fsnotify_clear_marks_by_sb(struct super_block *sb)
{ {
fsnotify_destroy_marks(&sb->s_fsnotify_marks); fsnotify_destroy_marks(&sb->s_fsnotify_marks);
} }
/* Wait until all marks queued for destruction are destroyed */
extern void fsnotify_wait_marks_destroyed(void);
/* /*
* update the dentry->d_flags of all of inode's children to indicate if inode cares * update the dentry->d_flags of all of inode's children to indicate if inode cares

View File

@ -108,6 +108,7 @@ void fsnotify_put_group(struct fsnotify_group *group)
if (refcount_dec_and_test(&group->refcnt)) if (refcount_dec_and_test(&group->refcnt))
fsnotify_final_destroy_group(group); fsnotify_final_destroy_group(group);
} }
EXPORT_SYMBOL_GPL(fsnotify_put_group);
/* /*
* Create a new fsnotify_group and hold a reference for the group returned. * Create a new fsnotify_group and hold a reference for the group returned.
@ -137,6 +138,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
return group; return group;
} }
EXPORT_SYMBOL_GPL(fsnotify_alloc_group);
int fsnotify_fasync(int fd, struct file *file, int on) int fsnotify_fasync(int fd, struct file *file, int on)
{ {

View File

@ -276,6 +276,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
queue_delayed_work(system_unbound_wq, &reaper_work, queue_delayed_work(system_unbound_wq, &reaper_work,
FSNOTIFY_REAPER_DELAY); FSNOTIFY_REAPER_DELAY);
} }
EXPORT_SYMBOL_GPL(fsnotify_put_mark);
/* /*
* Get mark reference when we found the mark via lockless traversal of object * Get mark reference when we found the mark via lockless traversal of object
@ -430,6 +431,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark,
mutex_unlock(&group->mark_mutex); mutex_unlock(&group->mark_mutex);
fsnotify_free_mark(mark); fsnotify_free_mark(mark);
} }
EXPORT_SYMBOL_GPL(fsnotify_destroy_mark);
/* /*
* Sorting function for lists of fsnotify marks. * Sorting function for lists of fsnotify marks.
@ -685,6 +687,7 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp,
mutex_unlock(&group->mark_mutex); mutex_unlock(&group->mark_mutex);
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(fsnotify_add_mark);
/* /*
* Given a list of marks, find the mark associated with given group. If found * Given a list of marks, find the mark associated with given group. If found
@ -711,6 +714,7 @@ struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp,
spin_unlock(&conn->lock); spin_unlock(&conn->lock);
return NULL; return NULL;
} }
EXPORT_SYMBOL_GPL(fsnotify_find_mark);
/* Clear any marks in a group with given type mask */ /* Clear any marks in a group with given type mask */
void fsnotify_clear_marks_by_group(struct fsnotify_group *group, void fsnotify_clear_marks_by_group(struct fsnotify_group *group,
@ -809,6 +813,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
mark->group = group; mark->group = group;
WRITE_ONCE(mark->connector, NULL); WRITE_ONCE(mark->connector, NULL);
} }
EXPORT_SYMBOL_GPL(fsnotify_init_mark);
/* /*
* Destroy all marks in destroy_list, waits for SRCU period to finish before * Destroy all marks in destroy_list, waits for SRCU period to finish before
@ -837,3 +842,4 @@ void fsnotify_wait_marks_destroyed(void)
{ {
flush_delayed_work(&reaper_work); flush_delayed_work(&reaper_work);
} }
EXPORT_SYMBOL_GPL(fsnotify_wait_marks_destroyed);

View File

@ -1168,6 +1168,11 @@ extern void lease_get_mtime(struct inode *, struct timespec64 *time);
extern int generic_setlease(struct file *, long, struct file_lock **, void **priv); extern int generic_setlease(struct file *, long, struct file_lock **, void **priv);
extern int vfs_setlease(struct file *, long, struct file_lock **, void **); extern int vfs_setlease(struct file *, long, struct file_lock **, void **);
extern int lease_modify(struct file_lock *, int, struct list_head *); extern int lease_modify(struct file_lock *, int, struct list_head *);
struct notifier_block;
extern int lease_register_notifier(struct notifier_block *);
extern void lease_unregister_notifier(struct notifier_block *);
struct files_struct; struct files_struct;
extern void show_fd_locks(struct seq_file *f, extern void show_fd_locks(struct seq_file *f,
struct file *filp, struct files_struct *files); struct file *filp, struct files_struct *files);

View File

@ -475,6 +475,8 @@ extern void fsnotify_destroy_mark(struct fsnotify_mark *mark,
extern void fsnotify_detach_mark(struct fsnotify_mark *mark); extern void fsnotify_detach_mark(struct fsnotify_mark *mark);
/* free mark */ /* free mark */
extern void fsnotify_free_mark(struct fsnotify_mark *mark); extern void fsnotify_free_mark(struct fsnotify_mark *mark);
/* Wait until all marks queued for destruction are destroyed */
extern void fsnotify_wait_marks_destroyed(void);
/* run all the marks in a group, and clear all of the marks attached to given object type */ /* run all the marks in a group, and clear all of the marks attached to given object type */
extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int type); extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int type);
/* run all the marks in a group, and clear all of the vfsmount marks */ /* run all the marks in a group, and clear all of the vfsmount marks */

View File

@ -87,6 +87,7 @@ struct cache_detail {
int has_died); int has_died);
struct cache_head * (*alloc)(void); struct cache_head * (*alloc)(void);
void (*flush)(void);
int (*match)(struct cache_head *orig, struct cache_head *new); int (*match)(struct cache_head *orig, struct cache_head *new);
void (*init)(struct cache_head *orig, struct cache_head *new); void (*init)(struct cache_head *orig, struct cache_head *new);
void (*update)(struct cache_head *orig, struct cache_head *new); void (*update)(struct cache_head *orig, struct cache_head *new);
@ -107,9 +108,9 @@ struct cache_detail {
/* fields for communication over channel */ /* fields for communication over channel */
struct list_head queue; struct list_head queue;
atomic_t readers; /* how many time is /chennel open */ atomic_t writers; /* how many time is /channel open */
time_t last_close; /* if no readers, when did last close */ time_t last_close; /* if no writers, when did last close */
time_t last_warn; /* when we last warned about no readers */ time_t last_warn; /* when we last warned about no writers */
union { union {
struct proc_dir_entry *procfs; struct proc_dir_entry *procfs;

View File

@ -42,6 +42,7 @@
#ifndef SVC_RDMA_H #ifndef SVC_RDMA_H
#define SVC_RDMA_H #define SVC_RDMA_H
#include <linux/llist.h>
#include <linux/sunrpc/xdr.h> #include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/rpc_rdma.h> #include <linux/sunrpc/rpc_rdma.h>
@ -107,8 +108,7 @@ struct svcxprt_rdma {
struct list_head sc_read_complete_q; struct list_head sc_read_complete_q;
struct work_struct sc_work; struct work_struct sc_work;
spinlock_t sc_recv_lock; struct llist_head sc_recv_ctxts;
struct list_head sc_recv_ctxts;
}; };
/* sc_flags */ /* sc_flags */
#define RDMAXPRT_CONN_PENDING 3 #define RDMAXPRT_CONN_PENDING 3
@ -125,6 +125,7 @@ enum {
#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD #define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
struct svc_rdma_recv_ctxt { struct svc_rdma_recv_ctxt {
struct llist_node rc_node;
struct list_head rc_list; struct list_head rc_list;
struct ib_recv_wr rc_recv_wr; struct ib_recv_wr rc_recv_wr;
struct ib_cqe rc_cqe; struct ib_cqe rc_cqe;
@ -200,7 +201,6 @@ extern struct svc_xprt_class svc_rdma_bc_class;
#endif #endif
/* svc_rdma.c */ /* svc_rdma.c */
extern struct workqueue_struct *svc_rdma_wq;
extern int svc_rdma_init(void); extern int svc_rdma_init(void);
extern void svc_rdma_cleanup(void); extern void svc_rdma_cleanup(void);

View File

@ -26,17 +26,22 @@
#include <linux/types.h> #include <linux/types.h>
/* latest upcall version available */ /* latest upcall version available */
#define CLD_UPCALL_VERSION 1 #define CLD_UPCALL_VERSION 2
/* defined by RFC3530 */ /* defined by RFC3530 */
#define NFS4_OPAQUE_LIMIT 1024 #define NFS4_OPAQUE_LIMIT 1024
#ifndef SHA256_DIGEST_SIZE
#define SHA256_DIGEST_SIZE 32
#endif
enum cld_command { enum cld_command {
Cld_Create, /* create a record for this cm_id */ Cld_Create, /* create a record for this cm_id */
Cld_Remove, /* remove record of this cm_id */ Cld_Remove, /* remove record of this cm_id */
Cld_Check, /* is this cm_id allowed? */ Cld_Check, /* is this cm_id allowed? */
Cld_GraceDone, /* grace period is complete */ Cld_GraceDone, /* grace period is complete */
Cld_GraceStart, Cld_GraceStart, /* grace start (upload client records) */
Cld_GetVersion, /* query max supported upcall version */
}; };
/* representation of long-form NFSv4 client ID */ /* representation of long-form NFSv4 client ID */
@ -45,6 +50,17 @@ struct cld_name {
unsigned char cn_id[NFS4_OPAQUE_LIMIT]; /* client-provided */ unsigned char cn_id[NFS4_OPAQUE_LIMIT]; /* client-provided */
} __attribute__((packed)); } __attribute__((packed));
/* sha256 hash of the kerberos principal */
struct cld_princhash {
__u8 cp_len; /* length of cp_data */
unsigned char cp_data[SHA256_DIGEST_SIZE]; /* hash of principal */
} __attribute__((packed));
struct cld_clntinfo {
struct cld_name cc_name;
struct cld_princhash cc_princhash;
} __attribute__((packed));
/* message struct for communication with userspace */ /* message struct for communication with userspace */
struct cld_msg { struct cld_msg {
__u8 cm_vers; /* upcall version */ __u8 cm_vers; /* upcall version */
@ -54,7 +70,28 @@ struct cld_msg {
union { union {
__s64 cm_gracetime; /* grace period start time */ __s64 cm_gracetime; /* grace period start time */
struct cld_name cm_name; struct cld_name cm_name;
__u8 cm_version; /* for getting max version */
} __attribute__((packed)) cm_u; } __attribute__((packed)) cm_u;
} __attribute__((packed)); } __attribute__((packed));
/* version 2 message can include hash of kerberos principal */
struct cld_msg_v2 {
__u8 cm_vers; /* upcall version */
__u8 cm_cmd; /* upcall command */
__s16 cm_status; /* return code */
__u32 cm_xid; /* transaction id */
union {
struct cld_name cm_name;
__u8 cm_version; /* for getting max version */
struct cld_clntinfo cm_clntinfo; /* name & princ hash */
} __attribute__((packed)) cm_u;
} __attribute__((packed));
struct cld_msg_hdr {
__u8 cm_vers; /* upcall version */
__u8 cm_cmd; /* upcall command */
__s16 cm_status; /* return code */
__u32 cm_xid; /* transaction id */
} __attribute__((packed));
#endif /* !_NFSD_CLD_H */ #endif /* !_NFSD_CLD_H */

View File

@ -373,7 +373,7 @@ void sunrpc_init_cache_detail(struct cache_detail *cd)
spin_lock(&cache_list_lock); spin_lock(&cache_list_lock);
cd->nextcheck = 0; cd->nextcheck = 0;
cd->entries = 0; cd->entries = 0;
atomic_set(&cd->readers, 0); atomic_set(&cd->writers, 0);
cd->last_close = 0; cd->last_close = 0;
cd->last_warn = -1; cd->last_warn = -1;
list_add(&cd->others, &cache_list); list_add(&cd->others, &cache_list);
@ -1029,11 +1029,13 @@ static int cache_open(struct inode *inode, struct file *filp,
} }
rp->offset = 0; rp->offset = 0;
rp->q.reader = 1; rp->q.reader = 1;
atomic_inc(&cd->readers);
spin_lock(&queue_lock); spin_lock(&queue_lock);
list_add(&rp->q.list, &cd->queue); list_add(&rp->q.list, &cd->queue);
spin_unlock(&queue_lock); spin_unlock(&queue_lock);
} }
if (filp->f_mode & FMODE_WRITE)
atomic_inc(&cd->writers);
filp->private_data = rp; filp->private_data = rp;
return 0; return 0;
} }
@ -1062,8 +1064,10 @@ static int cache_release(struct inode *inode, struct file *filp,
filp->private_data = NULL; filp->private_data = NULL;
kfree(rp); kfree(rp);
}
if (filp->f_mode & FMODE_WRITE) {
atomic_dec(&cd->writers);
cd->last_close = seconds_since_boot(); cd->last_close = seconds_since_boot();
atomic_dec(&cd->readers);
} }
module_put(cd->owner); module_put(cd->owner);
return 0; return 0;
@ -1171,7 +1175,7 @@ static void warn_no_listener(struct cache_detail *detail)
static bool cache_listeners_exist(struct cache_detail *detail) static bool cache_listeners_exist(struct cache_detail *detail)
{ {
if (atomic_read(&detail->readers)) if (atomic_read(&detail->writers))
return true; return true;
if (detail->last_close == 0) if (detail->last_close == 0)
/* This cache was never opened */ /* This cache was never opened */
@ -1520,6 +1524,9 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
cd->nextcheck = now; cd->nextcheck = now;
cache_flush(); cache_flush();
if (cd->flush)
cd->flush();
*ppos += count; *ppos += count;
return count; return count;
} }

View File

@ -1233,8 +1233,8 @@ svc_generic_init_request(struct svc_rqst *rqstp,
if (rqstp->rq_vers >= progp->pg_nvers ) if (rqstp->rq_vers >= progp->pg_nvers )
goto err_bad_vers; goto err_bad_vers;
versp = progp->pg_vers[rqstp->rq_vers]; versp = progp->pg_vers[rqstp->rq_vers];
if (!versp) if (!versp)
goto err_bad_vers; goto err_bad_vers;
/* /*

View File

@ -73,8 +73,6 @@ atomic_t rdma_stat_rq_prod;
atomic_t rdma_stat_sq_poll; atomic_t rdma_stat_sq_poll;
atomic_t rdma_stat_sq_prod; atomic_t rdma_stat_sq_prod;
struct workqueue_struct *svc_rdma_wq;
/* /*
* This function implements reading and resetting an atomic_t stat * This function implements reading and resetting an atomic_t stat
* variable through read/write to a proc file. Any write to the file * variable through read/write to a proc file. Any write to the file
@ -230,7 +228,6 @@ static struct ctl_table svcrdma_root_table[] = {
void svc_rdma_cleanup(void) void svc_rdma_cleanup(void)
{ {
dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n"); dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
destroy_workqueue(svc_rdma_wq);
if (svcrdma_table_header) { if (svcrdma_table_header) {
unregister_sysctl_table(svcrdma_table_header); unregister_sysctl_table(svcrdma_table_header);
svcrdma_table_header = NULL; svcrdma_table_header = NULL;
@ -246,10 +243,6 @@ int svc_rdma_init(void)
dprintk("\tmax_bc_requests : %u\n", svcrdma_max_bc_requests); dprintk("\tmax_bc_requests : %u\n", svcrdma_max_bc_requests);
dprintk("\tmax_inline : %d\n", svcrdma_max_req_size); dprintk("\tmax_inline : %d\n", svcrdma_max_req_size);
svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0);
if (!svc_rdma_wq)
return -ENOMEM;
if (!svcrdma_table_header) if (!svcrdma_table_header)
svcrdma_table_header = svcrdma_table_header =
register_sysctl_table(svcrdma_root_table); register_sysctl_table(svcrdma_root_table);

View File

@ -172,9 +172,10 @@ static void svc_rdma_recv_ctxt_destroy(struct svcxprt_rdma *rdma,
void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma) void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma)
{ {
struct svc_rdma_recv_ctxt *ctxt; struct svc_rdma_recv_ctxt *ctxt;
struct llist_node *node;
while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts))) { while ((node = llist_del_first(&rdma->sc_recv_ctxts))) {
list_del(&ctxt->rc_list); ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node);
svc_rdma_recv_ctxt_destroy(rdma, ctxt); svc_rdma_recv_ctxt_destroy(rdma, ctxt);
} }
} }
@ -183,21 +184,18 @@ static struct svc_rdma_recv_ctxt *
svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma) svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma)
{ {
struct svc_rdma_recv_ctxt *ctxt; struct svc_rdma_recv_ctxt *ctxt;
struct llist_node *node;
spin_lock(&rdma->sc_recv_lock); node = llist_del_first(&rdma->sc_recv_ctxts);
ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts); if (!node)
if (!ctxt)
goto out_empty; goto out_empty;
list_del(&ctxt->rc_list); ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node);
spin_unlock(&rdma->sc_recv_lock);
out: out:
ctxt->rc_page_count = 0; ctxt->rc_page_count = 0;
return ctxt; return ctxt;
out_empty: out_empty:
spin_unlock(&rdma->sc_recv_lock);
ctxt = svc_rdma_recv_ctxt_alloc(rdma); ctxt = svc_rdma_recv_ctxt_alloc(rdma);
if (!ctxt) if (!ctxt)
return NULL; return NULL;
@ -218,11 +216,9 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
for (i = 0; i < ctxt->rc_page_count; i++) for (i = 0; i < ctxt->rc_page_count; i++)
put_page(ctxt->rc_pages[i]); put_page(ctxt->rc_pages[i]);
if (!ctxt->rc_temp) { if (!ctxt->rc_temp)
spin_lock(&rdma->sc_recv_lock); llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts);
list_add(&ctxt->rc_list, &rdma->sc_recv_ctxts); else
spin_unlock(&rdma->sc_recv_lock);
} else
svc_rdma_recv_ctxt_destroy(rdma, ctxt); svc_rdma_recv_ctxt_destroy(rdma, ctxt);
} }

View File

@ -140,14 +140,13 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
INIT_LIST_HEAD(&cma_xprt->sc_send_ctxts); INIT_LIST_HEAD(&cma_xprt->sc_send_ctxts);
INIT_LIST_HEAD(&cma_xprt->sc_recv_ctxts); init_llist_head(&cma_xprt->sc_recv_ctxts);
INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts); INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts);
init_waitqueue_head(&cma_xprt->sc_send_wait); init_waitqueue_head(&cma_xprt->sc_send_wait);
spin_lock_init(&cma_xprt->sc_lock); spin_lock_init(&cma_xprt->sc_lock);
spin_lock_init(&cma_xprt->sc_rq_dto_lock); spin_lock_init(&cma_xprt->sc_rq_dto_lock);
spin_lock_init(&cma_xprt->sc_send_lock); spin_lock_init(&cma_xprt->sc_send_lock);
spin_lock_init(&cma_xprt->sc_recv_lock);
spin_lock_init(&cma_xprt->sc_rw_ctxt_lock); spin_lock_init(&cma_xprt->sc_rw_ctxt_lock);
/* /*
@ -630,8 +629,9 @@ static void svc_rdma_free(struct svc_xprt *xprt)
{ {
struct svcxprt_rdma *rdma = struct svcxprt_rdma *rdma =
container_of(xprt, struct svcxprt_rdma, sc_xprt); container_of(xprt, struct svcxprt_rdma, sc_xprt);
INIT_WORK(&rdma->sc_work, __svc_rdma_free); INIT_WORK(&rdma->sc_work, __svc_rdma_free);
queue_work(svc_rdma_wq, &rdma->sc_work); schedule_work(&rdma->sc_work);
} }
static int svc_rdma_has_wspace(struct svc_xprt *xprt) static int svc_rdma_has_wspace(struct svc_xprt *xprt)