alistair23-linux/fs/afs/volume.c

206 lines
5.3 KiB
C
Raw Normal View History

/* AFS volume management
*
* Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/slab.h>
#include "internal.h"
static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" };
/*
* lookup a volume by name
* - this can be one of the following:
* "%[cell:]volume[.]" R/W volume
* "#[cell:]volume[.]" R/O or R/W volume (rwparent=0),
* or R/W (rwparent=1) volume
* "%[cell:]volume.readonly" R/O volume
* "#[cell:]volume.readonly" R/O volume
* "%[cell:]volume.backup" Backup volume
* "#[cell:]volume.backup" Backup volume
*
* The cell name is optional, and defaults to the current cell.
*
* See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin
* Guide
* - Rule 1: Explicit type suffix forces access of that type or nothing
* (no suffix, then use Rule 2 & 3)
* - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W
* if not available
* - Rule 3: If parent volume is R/W, then only mount R/W volume unless
* explicitly told otherwise
*/
struct afs_volume *afs_volume_lookup(struct afs_mount_params *params)
{
struct afs_vlocation *vlocation = NULL;
struct afs_volume *volume = NULL;
struct afs_server *server = NULL;
char srvtmask;
int ret, loop;
_enter("{%*.*s,%d}",
params->volnamesz, params->volnamesz, params->volname, params->rwpath);
/* lookup the volume location record */
afs: Lay the groundwork for supporting network namespaces Lay the groundwork for supporting network namespaces (netns) to the AFS filesystem by moving various global features to a network-namespace struct (afs_net) and providing an instance of this as a temporary global variable that everything uses via accessor functions for the moment. The following changes have been made: (1) Store the netns in the superblock info. This will be obtained from the mounter's nsproxy on a manual mount and inherited from the parent superblock on an automount. (2) The cell list is made per-netns. It can be viewed through /proc/net/afs/cells and also be modified by writing commands to that file. (3) The local workstation cell is set per-ns in /proc/net/afs/rootcell. This is unset by default. (4) The 'rootcell' module parameter, which sets a cell and VL server list modifies the init net namespace, thereby allowing an AFS root fs to be theoretically used. (5) The volume location lists and the file lock manager are made per-netns. (6) The AF_RXRPC socket and associated I/O bits are made per-ns. The various workqueues remain global for the moment. Changes still to be made: (1) /proc/fs/afs/ should be moved to /proc/net/afs/ and a symlink emplaced from the old name. (2) A per-netns subsys needs to be registered for AFS into which it can store its per-netns data. (3) Rather than the AF_RXRPC socket being opened on module init, it needs to be opened on the creation of a superblock in that netns. (4) The socket needs to be closed when the last superblock using it is destroyed and all outstanding client calls on it have been completed. This prevents a reference loop on the namespace. (5) It is possible that several namespaces will want to use AFS, in which case each one will need its own UDP port. These can either be set through /proc/net/afs/cm_port or the kernel can pick one at random. The init_ns gets 7001 by default. Other issues that need resolving: (1) The DNS keyring needs net-namespacing. (2) Where do upcalls go (eg. DNS request-key upcall)? (3) Need something like open_socket_in_file_ns() syscall so that AFS command line tools attempting to operate on an AFS file/volume have their RPC calls go to the right place. Signed-off-by: David Howells <dhowells@redhat.com>
2017-11-02 09:27:45 -06:00
vlocation = afs_vlocation_lookup(params->net, params->cell, params->key,
params->volname, params->volnamesz);
if (IS_ERR(vlocation)) {
ret = PTR_ERR(vlocation);
vlocation = NULL;
goto error;
}
/* make the final decision on the type we want */
ret = -ENOMEDIUM;
if (params->force && !(vlocation->vldb.vidmask & (1 << params->type)))
goto error;
srvtmask = 0;
for (loop = 0; loop < vlocation->vldb.nservers; loop++)
srvtmask |= vlocation->vldb.srvtmask[loop];
if (params->force) {
if (!(srvtmask & (1 << params->type)))
goto error;
} else if (srvtmask & AFS_VOL_VTM_RO) {
params->type = AFSVL_ROVOL;
} else if (srvtmask & AFS_VOL_VTM_RW) {
params->type = AFSVL_RWVOL;
} else {
goto error;
}
down_write(&params->cell->vl_sem);
/* is the volume already active? */
if (vlocation->vols[params->type]) {
/* yes - re-use it */
volume = vlocation->vols[params->type];
afs_get_volume(volume);
goto success;
}
/* create a new volume record */
_debug("creating new volume record");
ret = -ENOMEM;
volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
if (!volume)
goto error_up;
atomic_set(&volume->usage, 1);
volume->type = params->type;
volume->type_force = params->force;
volume->cell = params->cell;
volume->vid = vlocation->vldb.vid[params->type];
init_rwsem(&volume->server_sem);
/* look up all the applicable server records */
for (loop = 0; loop < 8; loop++) {
if (vlocation->vldb.srvtmask[loop] & (1 << volume->type)) {
server = afs_lookup_server(
volume->cell, &vlocation->vldb.servers[loop]);
if (IS_ERR(server)) {
ret = PTR_ERR(server);
goto error_discard;
}
volume->servers[volume->nservers] = server;
volume->nservers++;
}
}
/* attach the cache and volume location */
#ifdef CONFIG_AFS_FSCACHE
volume->cache = fscache_acquire_cookie(volume->cell->cache,
&afs_volume_cache_index_def,
FS-Cache: Provide the ability to enable/disable cookies Provide the ability to enable and disable fscache cookies. A disabled cookie will reject or ignore further requests to: Acquire a child cookie Invalidate and update backing objects Check the consistency of a backing object Allocate storage for backing page Read backing pages Write to backing pages but still allows: Checks/waits on the completion of already in-progress objects Uncaching of pages Relinquishment of cookies Two new operations are provided: (1) Disable a cookie: void fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate); If the cookie is not already disabled, this locks the cookie against other dis/enablement ops, marks the cookie as being disabled, discards or invalidates any backing objects and waits for cessation of activity on any associated object. This is a wrapper around a chunk split out of fscache_relinquish_cookie(), but it reinitialises the cookie such that it can be reenabled. All possible failures are handled internally. The caller should consider calling fscache_uncache_all_inode_pages() afterwards to make sure all page markings are cleared up. (2) Enable a cookie: void fscache_enable_cookie(struct fscache_cookie *cookie, bool (*can_enable)(void *data), void *data) If the cookie is not already enabled, this locks the cookie against other dis/enablement ops, invokes can_enable() and, if the cookie is not an index cookie, will begin the procedure of acquiring backing objects. The optional can_enable() function is passed the data argument and returns a ruling as to whether or not enablement should actually be permitted to begin. All possible failures are handled internally. The cookie will only be marked as enabled if provisional backing objects are allocated. A later patch will introduce these to NFS. Cookie enablement during nfs_open() is then contingent on i_writecount <= 0. can_enable() checks for a race between open(O_RDONLY) and open(O_WRONLY/O_RDWR). This simplifies NFS's cookie handling and allows us to get rid of open(O_RDONLY) accidentally introducing caching to an inode that's open for writing already. One operation has its API modified: (3) Acquire a cookie. struct fscache_cookie *fscache_acquire_cookie( struct fscache_cookie *parent, const struct fscache_cookie_def *def, void *netfs_data, bool enable); This now has an additional argument that indicates whether the requested cookie should be enabled by default. It doesn't need the can_enable() function because the caller must prevent multiple calls for the same netfs object and it doesn't need to take the enablement lock because no one else can get at the cookie before this returns. Signed-off-by: David Howells <dhowells@redhat.com
2013-09-20 17:09:31 -06:00
volume, true);
#endif
afs_get_vlocation(vlocation);
volume->vlocation = vlocation;
vlocation->vols[volume->type] = volume;
success:
_debug("kAFS selected %s volume %08x",
afs_voltypes[volume->type], volume->vid);
up_write(&params->cell->vl_sem);
afs: Lay the groundwork for supporting network namespaces Lay the groundwork for supporting network namespaces (netns) to the AFS filesystem by moving various global features to a network-namespace struct (afs_net) and providing an instance of this as a temporary global variable that everything uses via accessor functions for the moment. The following changes have been made: (1) Store the netns in the superblock info. This will be obtained from the mounter's nsproxy on a manual mount and inherited from the parent superblock on an automount. (2) The cell list is made per-netns. It can be viewed through /proc/net/afs/cells and also be modified by writing commands to that file. (3) The local workstation cell is set per-ns in /proc/net/afs/rootcell. This is unset by default. (4) The 'rootcell' module parameter, which sets a cell and VL server list modifies the init net namespace, thereby allowing an AFS root fs to be theoretically used. (5) The volume location lists and the file lock manager are made per-netns. (6) The AF_RXRPC socket and associated I/O bits are made per-ns. The various workqueues remain global for the moment. Changes still to be made: (1) /proc/fs/afs/ should be moved to /proc/net/afs/ and a symlink emplaced from the old name. (2) A per-netns subsys needs to be registered for AFS into which it can store its per-netns data. (3) Rather than the AF_RXRPC socket being opened on module init, it needs to be opened on the creation of a superblock in that netns. (4) The socket needs to be closed when the last superblock using it is destroyed and all outstanding client calls on it have been completed. This prevents a reference loop on the namespace. (5) It is possible that several namespaces will want to use AFS, in which case each one will need its own UDP port. These can either be set through /proc/net/afs/cm_port or the kernel can pick one at random. The init_ns gets 7001 by default. Other issues that need resolving: (1) The DNS keyring needs net-namespacing. (2) Where do upcalls go (eg. DNS request-key upcall)? (3) Need something like open_socket_in_file_ns() syscall so that AFS command line tools attempting to operate on an AFS file/volume have their RPC calls go to the right place. Signed-off-by: David Howells <dhowells@redhat.com>
2017-11-02 09:27:45 -06:00
afs_put_vlocation(params->net, vlocation);
_leave(" = %p", volume);
return volume;
/* clean up */
error_up:
up_write(&params->cell->vl_sem);
error:
afs: Lay the groundwork for supporting network namespaces Lay the groundwork for supporting network namespaces (netns) to the AFS filesystem by moving various global features to a network-namespace struct (afs_net) and providing an instance of this as a temporary global variable that everything uses via accessor functions for the moment. The following changes have been made: (1) Store the netns in the superblock info. This will be obtained from the mounter's nsproxy on a manual mount and inherited from the parent superblock on an automount. (2) The cell list is made per-netns. It can be viewed through /proc/net/afs/cells and also be modified by writing commands to that file. (3) The local workstation cell is set per-ns in /proc/net/afs/rootcell. This is unset by default. (4) The 'rootcell' module parameter, which sets a cell and VL server list modifies the init net namespace, thereby allowing an AFS root fs to be theoretically used. (5) The volume location lists and the file lock manager are made per-netns. (6) The AF_RXRPC socket and associated I/O bits are made per-ns. The various workqueues remain global for the moment. Changes still to be made: (1) /proc/fs/afs/ should be moved to /proc/net/afs/ and a symlink emplaced from the old name. (2) A per-netns subsys needs to be registered for AFS into which it can store its per-netns data. (3) Rather than the AF_RXRPC socket being opened on module init, it needs to be opened on the creation of a superblock in that netns. (4) The socket needs to be closed when the last superblock using it is destroyed and all outstanding client calls on it have been completed. This prevents a reference loop on the namespace. (5) It is possible that several namespaces will want to use AFS, in which case each one will need its own UDP port. These can either be set through /proc/net/afs/cm_port or the kernel can pick one at random. The init_ns gets 7001 by default. Other issues that need resolving: (1) The DNS keyring needs net-namespacing. (2) Where do upcalls go (eg. DNS request-key upcall)? (3) Need something like open_socket_in_file_ns() syscall so that AFS command line tools attempting to operate on an AFS file/volume have their RPC calls go to the right place. Signed-off-by: David Howells <dhowells@redhat.com>
2017-11-02 09:27:45 -06:00
afs_put_vlocation(params->net, vlocation);
_leave(" = %d", ret);
return ERR_PTR(ret);
error_discard:
up_write(&params->cell->vl_sem);
afs: Overhaul the callback handling Overhaul the AFS callback handling by the following means: (1) Don't give up callback promises on vnodes that we are no longer using, rather let them just expire on the server or let the server break them. This is actually more efficient for the server as the callback lookup is expensive if there are lots of extant callbacks. (2) Only give up the callback promises we have from a server when the server record is destroyed. Then we can just give up *all* the callback promises on it in one go. (3) Servers can end up being shared between cells if cells are aliased, so don't add all the vnodes being backed by a particular server into a big FID-indexed tree on that server as there may be duplicates. Instead have each volume instance (~= superblock) register an interest in a server as it starts to make use of it and use this to allow the processor for callbacks from the server to find the superblock and thence the inode corresponding to the FID being broken by means of ilookup_nowait(). (4) Rather than iterating over the entire callback list when a mass-break comes in from the server, maintain a counter of mass-breaks in afs_server (cb_seq) and make afs_validate() check it against the copy in afs_vnode. It would be nice not to have to take a read_lock whilst doing this, but that's tricky without using RCU. (5) Save a ref on the fileserver we're using for a call in the afs_call struct so that we can access its cb_s_break during call decoding. (6) Write-lock around callback and status storage in a vnode and read-lock around getattr so that we don't see the status mid-update. This has the following consequences: (1) Data invalidation isn't seen until someone calls afs_validate() on a vnode. Unfortunately, we need to use a key to query the server, but getting one from a background thread is tricky without caching loads of keys all over the place. (2) Mass invalidation isn't seen until someone calls afs_validate(). (3) Callback breaking is going to hit the inode_hash_lock quite a bit. Could this be replaced with rcu_read_lock() since inodes are destroyed under RCU conditions. Signed-off-by: David Howells <dhowells@redhat.com>
2017-11-02 09:27:49 -06:00
for (loop = volume->nservers - 1; loop >= 0; loop--) {
afs_put_cb_interest(params->net, volume->cb_interests[loop]);
afs_put_server(params->net, volume->servers[loop]);
afs: Overhaul the callback handling Overhaul the AFS callback handling by the following means: (1) Don't give up callback promises on vnodes that we are no longer using, rather let them just expire on the server or let the server break them. This is actually more efficient for the server as the callback lookup is expensive if there are lots of extant callbacks. (2) Only give up the callback promises we have from a server when the server record is destroyed. Then we can just give up *all* the callback promises on it in one go. (3) Servers can end up being shared between cells if cells are aliased, so don't add all the vnodes being backed by a particular server into a big FID-indexed tree on that server as there may be duplicates. Instead have each volume instance (~= superblock) register an interest in a server as it starts to make use of it and use this to allow the processor for callbacks from the server to find the superblock and thence the inode corresponding to the FID being broken by means of ilookup_nowait(). (4) Rather than iterating over the entire callback list when a mass-break comes in from the server, maintain a counter of mass-breaks in afs_server (cb_seq) and make afs_validate() check it against the copy in afs_vnode. It would be nice not to have to take a read_lock whilst doing this, but that's tricky without using RCU. (5) Save a ref on the fileserver we're using for a call in the afs_call struct so that we can access its cb_s_break during call decoding. (6) Write-lock around callback and status storage in a vnode and read-lock around getattr so that we don't see the status mid-update. This has the following consequences: (1) Data invalidation isn't seen until someone calls afs_validate() on a vnode. Unfortunately, we need to use a key to query the server, but getting one from a background thread is tricky without caching loads of keys all over the place. (2) Mass invalidation isn't seen until someone calls afs_validate(). (3) Callback breaking is going to hit the inode_hash_lock quite a bit. Could this be replaced with rcu_read_lock() since inodes are destroyed under RCU conditions. Signed-off-by: David Howells <dhowells@redhat.com>
2017-11-02 09:27:49 -06:00
}
kfree(volume);
goto error;
}
/*
* destroy a volume record
*/
void afs_put_volume(struct afs_cell *cell, struct afs_volume *volume)
{
struct afs_vlocation *vlocation;
int loop;
if (!volume)
return;
_enter("%p", volume);
ASSERTCMP(atomic_read(&volume->usage), >, 0);
vlocation = volume->vlocation;
/* to prevent a race, the decrement and the dequeue must be effectively
* atomic */
down_write(&cell->vl_sem);
if (likely(!atomic_dec_and_test(&volume->usage))) {
up_write(&vlocation->cell->vl_sem);
_leave("");
return;
}
vlocation->vols[volume->type] = NULL;
up_write(&cell->vl_sem);
/* finish cleaning up the volume */
#ifdef CONFIG_AFS_FSCACHE
fscache_relinquish_cookie(volume->cache, 0);
#endif
afs_put_vlocation(cell->net, vlocation);
afs: Overhaul the callback handling Overhaul the AFS callback handling by the following means: (1) Don't give up callback promises on vnodes that we are no longer using, rather let them just expire on the server or let the server break them. This is actually more efficient for the server as the callback lookup is expensive if there are lots of extant callbacks. (2) Only give up the callback promises we have from a server when the server record is destroyed. Then we can just give up *all* the callback promises on it in one go. (3) Servers can end up being shared between cells if cells are aliased, so don't add all the vnodes being backed by a particular server into a big FID-indexed tree on that server as there may be duplicates. Instead have each volume instance (~= superblock) register an interest in a server as it starts to make use of it and use this to allow the processor for callbacks from the server to find the superblock and thence the inode corresponding to the FID being broken by means of ilookup_nowait(). (4) Rather than iterating over the entire callback list when a mass-break comes in from the server, maintain a counter of mass-breaks in afs_server (cb_seq) and make afs_validate() check it against the copy in afs_vnode. It would be nice not to have to take a read_lock whilst doing this, but that's tricky without using RCU. (5) Save a ref on the fileserver we're using for a call in the afs_call struct so that we can access its cb_s_break during call decoding. (6) Write-lock around callback and status storage in a vnode and read-lock around getattr so that we don't see the status mid-update. This has the following consequences: (1) Data invalidation isn't seen until someone calls afs_validate() on a vnode. Unfortunately, we need to use a key to query the server, but getting one from a background thread is tricky without caching loads of keys all over the place. (2) Mass invalidation isn't seen until someone calls afs_validate(). (3) Callback breaking is going to hit the inode_hash_lock quite a bit. Could this be replaced with rcu_read_lock() since inodes are destroyed under RCU conditions. Signed-off-by: David Howells <dhowells@redhat.com>
2017-11-02 09:27:49 -06:00
for (loop = volume->nservers - 1; loop >= 0; loop--) {
afs_put_cb_interest(cell->net, volume->cb_interests[loop]);
afs_put_server(cell->net, volume->servers[loop]);
afs: Overhaul the callback handling Overhaul the AFS callback handling by the following means: (1) Don't give up callback promises on vnodes that we are no longer using, rather let them just expire on the server or let the server break them. This is actually more efficient for the server as the callback lookup is expensive if there are lots of extant callbacks. (2) Only give up the callback promises we have from a server when the server record is destroyed. Then we can just give up *all* the callback promises on it in one go. (3) Servers can end up being shared between cells if cells are aliased, so don't add all the vnodes being backed by a particular server into a big FID-indexed tree on that server as there may be duplicates. Instead have each volume instance (~= superblock) register an interest in a server as it starts to make use of it and use this to allow the processor for callbacks from the server to find the superblock and thence the inode corresponding to the FID being broken by means of ilookup_nowait(). (4) Rather than iterating over the entire callback list when a mass-break comes in from the server, maintain a counter of mass-breaks in afs_server (cb_seq) and make afs_validate() check it against the copy in afs_vnode. It would be nice not to have to take a read_lock whilst doing this, but that's tricky without using RCU. (5) Save a ref on the fileserver we're using for a call in the afs_call struct so that we can access its cb_s_break during call decoding. (6) Write-lock around callback and status storage in a vnode and read-lock around getattr so that we don't see the status mid-update. This has the following consequences: (1) Data invalidation isn't seen until someone calls afs_validate() on a vnode. Unfortunately, we need to use a key to query the server, but getting one from a background thread is tricky without caching loads of keys all over the place. (2) Mass invalidation isn't seen until someone calls afs_validate(). (3) Callback breaking is going to hit the inode_hash_lock quite a bit. Could this be replaced with rcu_read_lock() since inodes are destroyed under RCU conditions. Signed-off-by: David Howells <dhowells@redhat.com>
2017-11-02 09:27:49 -06:00
}
kfree(volume);
_leave(" [destroyed]");
}