From f7ab093f74bf638ed98fd1115f3efa17e308bb7f Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Fri, 17 Jul 2015 10:38:11 -0400 Subject: [PATCH 001/174] Orangefs: kernel client part 1 OrangeFS (formerly PVFS) is an lgpl licensed userspace networked parallel file system. OrangeFS can be accessed through included system utilities, user integration libraries, MPI-IO and can be used by the Hadoop ecosystem as an alternative to the HDFS filesystem. OrangeFS is used widely for parallel science, data analytics and engineering applications. While applications often don't require Orangefs to be mounted into the VFS, users do like to be able to access their files in the normal way. The Orangefs kernel client allows Orangefs filesystems to be mounted as a VFS. The kernel client communicates with a userspace daemon which in turn communicates with the Orangefs server daemons that implement the filesystem. The server daemons (there's almost always more than one) need not be running on the same host as the kernel client. Orangefs filesystems can also be mounted with FUSE, and we ship code and instructions to facilitate that, but most of our users report preferring to use our kernel module instead. Further, as an example of a problem we can't solve with fuse, we have in the works a not-yet-ready-for-prime-time version of a file_operations lock function that accounts for the server daemons being distributed across more than one running kernel. Many people and organizations, including Clemson University, Argonne National Laboratories and Acxiom Corporation have helped to create what has become Orangefs over more than twenty years. Some of the more recent contributors to the kernel client include: Mike Marshall Christoph Hellwig Randy Martin Becky Ligon Walt Ligon Michael Moore Rob Ross Phil Carnes Signed-off-by: Mike Marshall --- fs/orangefs/downcall.h | 138 ++++++ fs/orangefs/protocol.h | 681 +++++++++++++++++++++++++++ fs/orangefs/pvfs2-bufmap.h | 76 +++ fs/orangefs/pvfs2-debug.h | 290 ++++++++++++ fs/orangefs/pvfs2-debugfs.h | 3 + fs/orangefs/pvfs2-dev-proto.h | 102 ++++ fs/orangefs/pvfs2-kernel.h | 864 ++++++++++++++++++++++++++++++++++ fs/orangefs/pvfs2-sysfs.h | 2 + fs/orangefs/upcall.h | 255 ++++++++++ 9 files changed, 2411 insertions(+) create mode 100644 fs/orangefs/downcall.h create mode 100644 fs/orangefs/protocol.h create mode 100644 fs/orangefs/pvfs2-bufmap.h create mode 100644 fs/orangefs/pvfs2-debug.h create mode 100644 fs/orangefs/pvfs2-debugfs.h create mode 100644 fs/orangefs/pvfs2-dev-proto.h create mode 100644 fs/orangefs/pvfs2-kernel.h create mode 100644 fs/orangefs/pvfs2-sysfs.h create mode 100644 fs/orangefs/upcall.h diff --git a/fs/orangefs/downcall.h b/fs/orangefs/downcall.h new file mode 100644 index 000000000000..a79129f875f3 --- /dev/null +++ b/fs/orangefs/downcall.h @@ -0,0 +1,138 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/* + * Definitions of downcalls used in Linux kernel module. + */ + +#ifndef __DOWNCALL_H +#define __DOWNCALL_H + +/* + * Sanitized the device-client core interaction + * for clean 32-64 bit usage + */ +struct pvfs2_io_response { + __s64 amt_complete; +}; + +struct pvfs2_iox_response { + __s64 amt_complete; +}; + +struct pvfs2_lookup_response { + struct pvfs2_object_kref refn; +}; + +struct pvfs2_create_response { + struct pvfs2_object_kref refn; +}; + +struct pvfs2_symlink_response { + struct pvfs2_object_kref refn; +}; + +struct pvfs2_getattr_response { + struct PVFS_sys_attr_s attributes; + char link_target[PVFS2_NAME_LEN]; +}; + +struct pvfs2_mkdir_response { + struct pvfs2_object_kref refn; +}; + +/* + * duplication of some system interface structures so that I don't have + * to allocate extra memory + */ +struct pvfs2_dirent { + char *d_name; + int d_length; + struct pvfs2_khandle khandle; +}; + +struct pvfs2_statfs_response { + __s64 block_size; + __s64 blocks_total; + __s64 blocks_avail; + __s64 files_total; + __s64 files_avail; +}; + +struct pvfs2_fs_mount_response { + __s32 fs_id; + __s32 id; + struct pvfs2_khandle root_khandle; +}; + +/* the getxattr response is the attribute value */ +struct pvfs2_getxattr_response { + __s32 val_sz; + __s32 __pad1; + char val[PVFS_MAX_XATTR_VALUELEN]; +}; + +/* the listxattr response is an array of attribute names */ +struct pvfs2_listxattr_response { + __s32 returned_count; + __s32 __pad1; + __u64 token; + char key[PVFS_MAX_XATTR_LISTLEN * PVFS_MAX_XATTR_NAMELEN]; + __s32 keylen; + __s32 __pad2; + __s32 lengths[PVFS_MAX_XATTR_LISTLEN]; +}; + +struct pvfs2_param_response { + __s64 value; +}; + +#define PERF_COUNT_BUF_SIZE 4096 +struct pvfs2_perf_count_response { + char buffer[PERF_COUNT_BUF_SIZE]; +}; + +#define FS_KEY_BUF_SIZE 4096 +struct pvfs2_fs_key_response { + __s32 fs_keylen; + __s32 __pad1; + char fs_key[FS_KEY_BUF_SIZE]; +}; + +struct pvfs2_downcall_s { + __s32 type; + __s32 status; + /* currently trailer is used only by readdir */ + __s64 trailer_size; + char * trailer_buf; + + union { + struct pvfs2_io_response io; + struct pvfs2_iox_response iox; + struct pvfs2_lookup_response lookup; + struct pvfs2_create_response create; + struct pvfs2_symlink_response sym; + struct pvfs2_getattr_response getattr; + struct pvfs2_mkdir_response mkdir; + struct pvfs2_statfs_response statfs; + struct pvfs2_fs_mount_response fs_mount; + struct pvfs2_getxattr_response getxattr; + struct pvfs2_listxattr_response listxattr; + struct pvfs2_param_response param; + struct pvfs2_perf_count_response perf_count; + struct pvfs2_fs_key_response fs_key; + } resp; +}; + +struct pvfs2_readdir_response_s { + __u64 token; + __u64 directory_version; + __u32 __pad2; + __u32 pvfs_dirent_outcount; + struct pvfs2_dirent *dirent_array; +}; + +#endif /* __DOWNCALL_H */ diff --git a/fs/orangefs/protocol.h b/fs/orangefs/protocol.h new file mode 100644 index 000000000000..2fb3a63ae9ab --- /dev/null +++ b/fs/orangefs/protocol.h @@ -0,0 +1,681 @@ +#include +#include +#include + +extern struct client_debug_mask *cdm_array; +extern char *debug_help_string; +extern int help_string_initialized; +extern struct dentry *debug_dir; +extern struct dentry *help_file_dentry; +extern struct dentry *client_debug_dentry; +extern const struct file_operations debug_help_fops; +extern int client_all_index; +extern int client_verbose_index; +extern int cdm_element_count; +#define DEBUG_HELP_STRING_SIZE 4096 +#define HELP_STRING_UNINITIALIZED \ + "Client Debug Keywords are unknown until the first time\n" \ + "the client is started after boot.\n" +#define ORANGEFS_KMOD_DEBUG_HELP_FILE "debug-help" +#define ORANGEFS_KMOD_DEBUG_FILE "kernel-debug" +#define ORANGEFS_CLIENT_DEBUG_FILE "client-debug" +#define PVFS2_VERBOSE "verbose" +#define PVFS2_ALL "all" + +/* pvfs2-config.h ***********************************************************/ +#define PVFS2_VERSION_MAJOR 2 +#define PVFS2_VERSION_MINOR 9 +#define PVFS2_VERSION_SUB 0 + +/* khandle stuff ***********************************************************/ + +/* + * The 2.9 core will put 64 bit handles in here like this: + * 1234 0000 0000 5678 + * The 3.0 and beyond cores will put 128 bit handles in here like this: + * 1234 5678 90AB CDEF + * The kernel module will always use the first four bytes and + * the last four bytes as an inum. + */ +struct pvfs2_khandle { + unsigned char u[16]; +} __aligned(8); + +/* + * kernel version of an object ref. + */ +struct pvfs2_object_kref { + struct pvfs2_khandle khandle; + __s32 fs_id; + __s32 __pad1; +}; + +/* + * compare 2 khandles assumes little endian thus from large address to + * small address + */ +static inline int PVFS_khandle_cmp(const struct pvfs2_khandle *kh1, + const struct pvfs2_khandle *kh2) +{ + int i; + + for (i = 15; i >= 0; i--) { + if (kh1->u[i] > kh2->u[i]) + return 1; + if (kh1->u[i] < kh2->u[i]) + return -1; + } + + return 0; +} + +/* copy a khandle to a field of arbitrary size */ +static inline void PVFS_khandle_to(const struct pvfs2_khandle *kh, + void *p, int size) +{ + int i; + unsigned char *c = p; + + memset(p, 0, size); + + for (i = 0; i < 16 && i < size; i++) + c[i] = kh->u[i]; +} + +/* copy a khandle from a field of arbitrary size */ +static inline void PVFS_khandle_from(struct pvfs2_khandle *kh, + void *p, int size) +{ + int i; + unsigned char *c = p; + + memset(kh, 0, 16); + + for (i = 0; i < 16 && i < size; i++) + kh->u[i] = c[i]; +} + +/* pvfs2-types.h ************************************************************/ +typedef __u32 PVFS_uid; +typedef __u32 PVFS_gid; +typedef __s32 PVFS_fs_id; +typedef __u32 PVFS_permissions; +typedef __u64 PVFS_time; +typedef __s64 PVFS_size; +typedef __u64 PVFS_flags; +typedef __u64 PVFS_ds_position; +typedef __s32 PVFS_error; +typedef __s64 PVFS_offset; + +#define PVFS2_SUPER_MAGIC 0x20030528 +#define PVFS_ERROR_BIT (1 << 30) +#define PVFS_NON_ERRNO_ERROR_BIT (1 << 29) +#define IS_PVFS_ERROR(__error) ((__error)&(PVFS_ERROR_BIT)) +#define IS_PVFS_NON_ERRNO_ERROR(__error) \ +(((__error)&(PVFS_NON_ERRNO_ERROR_BIT)) && IS_PVFS_ERROR(__error)) +#define PVFS_ERROR_TO_ERRNO(__error) PVFS_get_errno_mapping(__error) + +/* 7 bits are used for the errno mapped error codes */ +#define PVFS_ERROR_CODE(__error) \ +((__error) & (__s32)(0x7f|PVFS_ERROR_BIT)) +#define PVFS_ERROR_CLASS(__error) \ +((__error) & ~((__s32)(0x7f|PVFS_ERROR_BIT|PVFS_NON_ERRNO_ERROR_BIT))) +#define PVFS_NON_ERRNO_ERROR_CODE(__error) \ +((__error) & (__s32)(127|PVFS_ERROR_BIT|PVFS_NON_ERRNO_ERROR_BIT)) + +/* PVFS2 error codes, compliments of asm/errno.h */ +#define PVFS_EPERM E(1) /* Operation not permitted */ +#define PVFS_ENOENT E(2) /* No such file or directory */ +#define PVFS_EINTR E(3) /* Interrupted system call */ +#define PVFS_EIO E(4) /* I/O error */ +#define PVFS_ENXIO E(5) /* No such device or address */ +#define PVFS_EBADF E(6) /* Bad file number */ +#define PVFS_EAGAIN E(7) /* Try again */ +#define PVFS_ENOMEM E(8) /* Out of memory */ +#define PVFS_EFAULT E(9) /* Bad address */ +#define PVFS_EBUSY E(10) /* Device or resource busy */ +#define PVFS_EEXIST E(11) /* File exists */ +#define PVFS_ENODEV E(12) /* No such device */ +#define PVFS_ENOTDIR E(13) /* Not a directory */ +#define PVFS_EISDIR E(14) /* Is a directory */ +#define PVFS_EINVAL E(15) /* Invalid argument */ +#define PVFS_EMFILE E(16) /* Too many open files */ +#define PVFS_EFBIG E(17) /* File too large */ +#define PVFS_ENOSPC E(18) /* No space left on device */ +#define PVFS_EROFS E(19) /* Read-only file system */ +#define PVFS_EMLINK E(20) /* Too many links */ +#define PVFS_EPIPE E(21) /* Broken pipe */ +#define PVFS_EDEADLK E(22) /* Resource deadlock would occur */ +#define PVFS_ENAMETOOLONG E(23) /* File name too long */ +#define PVFS_ENOLCK E(24) /* No record locks available */ +#define PVFS_ENOSYS E(25) /* Function not implemented */ +#define PVFS_ENOTEMPTY E(26) /* Directory not empty */ + /* +#define PVFS_ELOOP E(27) * Too many symbolic links encountered + */ +#define PVFS_EWOULDBLOCK E(28) /* Operation would block */ +#define PVFS_ENOMSG E(29) /* No message of desired type */ +#define PVFS_EUNATCH E(30) /* Protocol driver not attached */ +#define PVFS_EBADR E(31) /* Invalid request descriptor */ +#define PVFS_EDEADLOCK E(32) +#define PVFS_ENODATA E(33) /* No data available */ +#define PVFS_ETIME E(34) /* Timer expired */ +#define PVFS_ENONET E(35) /* Machine is not on the network */ +#define PVFS_EREMOTE E(36) /* Object is remote */ +#define PVFS_ECOMM E(37) /* Communication error on send */ +#define PVFS_EPROTO E(38) /* Protocol error */ +#define PVFS_EBADMSG E(39) /* Not a data message */ + /* +#define PVFS_EOVERFLOW E(40) * Value too large for defined data + * type + */ + /* +#define PVFS_ERESTART E(41) * Interrupted system call should be + * restarted + */ +#define PVFS_EMSGSIZE E(42) /* Message too long */ +#define PVFS_EPROTOTYPE E(43) /* Protocol wrong type for socket */ +#define PVFS_ENOPROTOOPT E(44) /* Protocol not available */ +#define PVFS_EPROTONOSUPPORT E(45) /* Protocol not supported */ + /* +#define PVFS_EOPNOTSUPP E(46) * Operation not supported on transport + * endpoint + */ +#define PVFS_EADDRINUSE E(47) /* Address already in use */ +#define PVFS_EADDRNOTAVAIL E(48) /* Cannot assign requested address */ +#define PVFS_ENETDOWN E(49) /* Network is down */ +#define PVFS_ENETUNREACH E(50) /* Network is unreachable */ + /* +#define PVFS_ENETRESET E(51) * Network dropped connection because + * of reset + */ +#define PVFS_ENOBUFS E(52) /* No buffer space available */ +#define PVFS_ETIMEDOUT E(53) /* Connection timed out */ +#define PVFS_ECONNREFUSED E(54) /* Connection refused */ +#define PVFS_EHOSTDOWN E(55) /* Host is down */ +#define PVFS_EHOSTUNREACH E(56) /* No route to host */ +#define PVFS_EALREADY E(57) /* Operation already in progress */ +#define PVFS_EACCES E(58) /* Access not allowed */ +#define PVFS_ECONNRESET E(59) /* Connection reset by peer */ +#define PVFS_ERANGE E(60) /* Math out of range or buf too small */ + +/***************** non-errno/pvfs2 specific error codes *****************/ +#define PVFS_ECANCEL (1|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) +#define PVFS_EDEVINIT (2|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) +#define PVFS_EDETAIL (3|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) +#define PVFS_EHOSTNTFD (4|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) +#define PVFS_EADDRNTFD (5|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) +#define PVFS_ENORECVR (6|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) +#define PVFS_ETRYAGAIN (7|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) +#define PVFS_ENOTPVFS (8|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) +#define PVFS_ESECURITY (9|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) + +/* + * NOTE: PLEASE DO NOT ARBITRARILY ADD NEW ERRNO ERROR CODES! + * + * IF YOU CHOOSE TO ADD A NEW ERROR CODE (DESPITE OUR PLEA), YOU ALSO + * NEED TO INCREMENT PVFS_ERRNO MAX (BELOW) AND ADD A MAPPING TO A + * UNIX ERRNO VALUE IN THE MACROS BELOW (USED IN + * src/common/misc/errno-mapping.c and the kernel module) + */ +#define PVFS_ERRNO_MAX 61 + +#define PVFS_ERROR_BMI (1 << 7) /* BMI-specific error */ +#define PVFS_ERROR_TROVE (2 << 7) /* Trove-specific error */ +#define PVFS_ERROR_FLOW (3 << 7) +#define PVFS_ERROR_SM (4 << 7) /* state machine specific error */ +#define PVFS_ERROR_SCHED (5 << 7) +#define PVFS_ERROR_CLIENT (6 << 7) +#define PVFS_ERROR_DEV (7 << 7) /* device file interaction */ + +#define PVFS_ERROR_CLASS_BITS \ + (PVFS_ERROR_BMI | \ + PVFS_ERROR_TROVE | \ + PVFS_ERROR_FLOW | \ + PVFS_ERROR_SM | \ + PVFS_ERROR_SCHED | \ + PVFS_ERROR_CLIENT | \ + PVFS_ERROR_DEV) + +#define DECLARE_ERRNO_MAPPING() \ +__s32 PINT_errno_mapping[PVFS_ERRNO_MAX + 1] = { \ + 0, /* leave this one empty */ \ + EPERM, /* 1 */ \ + ENOENT, \ + EINTR, \ + EIO, \ + ENXIO, \ + EBADF, \ + EAGAIN, \ + ENOMEM, \ + EFAULT, \ + EBUSY, /* 10 */ \ + EEXIST, \ + ENODEV, \ + ENOTDIR, \ + EISDIR, \ + EINVAL, \ + EMFILE, \ + EFBIG, \ + ENOSPC, \ + EROFS, \ + EMLINK, /* 20 */ \ + EPIPE, \ + EDEADLK, \ + ENAMETOOLONG, \ + ENOLCK, \ + ENOSYS, \ + ENOTEMPTY, \ + ELOOP, \ + EWOULDBLOCK, \ + ENOMSG, \ + EUNATCH, /* 30 */ \ + EBADR, \ + EDEADLOCK, \ + ENODATA, \ + ETIME, \ + ENONET, \ + EREMOTE, \ + ECOMM, \ + EPROTO, \ + EBADMSG, \ + EOVERFLOW, /* 40 */ \ + ERESTART, \ + EMSGSIZE, \ + EPROTOTYPE, \ + ENOPROTOOPT, \ + EPROTONOSUPPORT, \ + EOPNOTSUPP, \ + EADDRINUSE, \ + EADDRNOTAVAIL, \ + ENETDOWN, \ + ENETUNREACH, /* 50 */ \ + ENETRESET, \ + ENOBUFS, \ + ETIMEDOUT, \ + ECONNREFUSED, \ + EHOSTDOWN, \ + EHOSTUNREACH, \ + EALREADY, \ + EACCES, \ + ECONNRESET, /* 59 */ \ + ERANGE, \ + 0 /* PVFS_ERRNO_MAX */ \ +}; \ +const char *PINT_non_errno_strerror_mapping[] = { \ + "Success", /* 0 */ \ + "Operation cancelled (possibly due to timeout)", \ + "Device initialization failed", \ + "Detailed per-server errors are available", \ + "Unknown host", \ + "No address associated with name", \ + "Unknown server error", \ + "Host name lookup failure", \ + "Path contains non-PVFS elements", \ + "Security error", \ +}; \ +__s32 PINT_non_errno_mapping[] = { \ + 0, /* leave this one empty */ \ + PVFS_ECANCEL, /* 1 */ \ + PVFS_EDEVINIT, /* 2 */ \ + PVFS_EDETAIL, /* 3 */ \ + PVFS_EHOSTNTFD, /* 4 */ \ + PVFS_EADDRNTFD, /* 5 */ \ + PVFS_ENORECVR, /* 6 */ \ + PVFS_ETRYAGAIN, /* 7 */ \ + PVFS_ENOTPVFS, /* 8 */ \ + PVFS_ESECURITY, /* 9 */ \ +} + +/* + * NOTE: PVFS_get_errno_mapping will convert a PVFS_ERROR_CODE to an + * errno value. If the error code is a pvfs2 specific error code + * (i.e. a PVFS_NON_ERRNO_ERROR_CODE), PVFS_get_errno_mapping will + * return an index into the PINT_non_errno_strerror_mapping array which + * can be used for getting the pvfs2 specific strerror message given + * the error code. if the value is not a recognized error code, the + * passed in value will be returned unchanged. + */ +#define DECLARE_ERRNO_MAPPING_AND_FN() \ +extern __s32 PINT_errno_mapping[]; \ +extern __s32 PINT_non_errno_mapping[]; \ +extern const char *PINT_non_errno_strerror_mapping[]; \ +__s32 PVFS_get_errno_mapping(__s32 error) \ +{ \ + __s32 ret = error, mask = 0; \ + __s32 positive = ((error > -1) ? 1 : 0); \ + if (IS_PVFS_NON_ERRNO_ERROR((positive ? error : -error))) { \ + mask = (PVFS_NON_ERRNO_ERROR_BIT | \ + PVFS_ERROR_BIT | \ + PVFS_ERROR_CLASS_BITS); \ + ret = PVFS_NON_ERRNO_ERROR_CODE(((positive ? \ + error : \ + abs(error))) & \ + ~mask); \ + } \ + else if (IS_PVFS_ERROR((positive ? error : -error))) { \ + mask = (PVFS_ERROR_BIT | \ + PVFS_ERROR_CLASS_BITS); \ + ret = PINT_errno_mapping[PVFS_ERROR_CODE(((positive ? \ + error : \ + abs(error))) & \ + ~mask)]; \ + } \ + return ret; \ +} \ +__s32 PVFS_errno_to_error(int err) \ +{ \ + __s32 e = 0; \ + \ + for (; e < PVFS_ERRNO_MAX; ++e) \ + if (PINT_errno_mapping[e] == err) \ + return e | PVFS_ERROR_BIT; \ + \ + return err; \ +} \ +DECLARE_ERRNO_MAPPING() + +/* permission bits */ +#define PVFS_O_EXECUTE (1 << 0) +#define PVFS_O_WRITE (1 << 1) +#define PVFS_O_READ (1 << 2) +#define PVFS_G_EXECUTE (1 << 3) +#define PVFS_G_WRITE (1 << 4) +#define PVFS_G_READ (1 << 5) +#define PVFS_U_EXECUTE (1 << 6) +#define PVFS_U_WRITE (1 << 7) +#define PVFS_U_READ (1 << 8) +/* no PVFS_U_VTX (sticky bit) */ +#define PVFS_G_SGID (1 << 10) +#define PVFS_U_SUID (1 << 11) + +/* definition taken from stdint.h */ +#define INT32_MAX (2147483647) +#define PVFS_ITERATE_START (INT32_MAX - 1) +#define PVFS_ITERATE_END (INT32_MAX - 2) +#define PVFS_READDIR_START PVFS_ITERATE_START +#define PVFS_READDIR_END PVFS_ITERATE_END +#define PVFS_IMMUTABLE_FL FS_IMMUTABLE_FL +#define PVFS_APPEND_FL FS_APPEND_FL +#define PVFS_NOATIME_FL FS_NOATIME_FL +#define PVFS_MIRROR_FL 0x01000000ULL +#define PVFS_O_EXECUTE (1 << 0) +#define PVFS_FS_ID_NULL ((__s32)0) +#define PVFS_ATTR_SYS_UID (1 << 0) +#define PVFS_ATTR_SYS_GID (1 << 1) +#define PVFS_ATTR_SYS_PERM (1 << 2) +#define PVFS_ATTR_SYS_ATIME (1 << 3) +#define PVFS_ATTR_SYS_CTIME (1 << 4) +#define PVFS_ATTR_SYS_MTIME (1 << 5) +#define PVFS_ATTR_SYS_TYPE (1 << 6) +#define PVFS_ATTR_SYS_ATIME_SET (1 << 7) +#define PVFS_ATTR_SYS_MTIME_SET (1 << 8) +#define PVFS_ATTR_SYS_SIZE (1 << 20) +#define PVFS_ATTR_SYS_LNK_TARGET (1 << 24) +#define PVFS_ATTR_SYS_DFILE_COUNT (1 << 25) +#define PVFS_ATTR_SYS_DIRENT_COUNT (1 << 26) +#define PVFS_ATTR_SYS_BLKSIZE (1 << 28) +#define PVFS_ATTR_SYS_MIRROR_COPIES_COUNT (1 << 29) +#define PVFS_ATTR_SYS_COMMON_ALL \ + (PVFS_ATTR_SYS_UID | \ + PVFS_ATTR_SYS_GID | \ + PVFS_ATTR_SYS_PERM | \ + PVFS_ATTR_SYS_ATIME | \ + PVFS_ATTR_SYS_CTIME | \ + PVFS_ATTR_SYS_MTIME | \ + PVFS_ATTR_SYS_TYPE) + +#define PVFS_ATTR_SYS_ALL_SETABLE \ +(PVFS_ATTR_SYS_COMMON_ALL-PVFS_ATTR_SYS_TYPE) + +#define PVFS_ATTR_SYS_ALL_NOHINT \ + (PVFS_ATTR_SYS_COMMON_ALL | \ + PVFS_ATTR_SYS_SIZE | \ + PVFS_ATTR_SYS_LNK_TARGET | \ + PVFS_ATTR_SYS_DFILE_COUNT | \ + PVFS_ATTR_SYS_MIRROR_COPIES_COUNT | \ + PVFS_ATTR_SYS_DIRENT_COUNT | \ + PVFS_ATTR_SYS_BLKSIZE) +#define PVFS_XATTR_REPLACE 0x2 +#define PVFS_XATTR_CREATE 0x1 +#define PVFS_MAX_SERVER_ADDR_LEN 256 +#define PVFS_NAME_MAX 256 +/* + * max extended attribute name len as imposed by the VFS and exploited for the + * upcall request types. + * NOTE: Please retain them as multiples of 8 even if you wish to change them + * This is *NECESSARY* for supporting 32 bit user-space binaries on a 64-bit + * kernel. Due to implementation within DBPF, this really needs to be + * PVFS_NAME_MAX, which it was the same value as, but no reason to let it + * break if that changes in the future. + */ +#define PVFS_MAX_XATTR_NAMELEN PVFS_NAME_MAX /* Not the same as + * XATTR_NAME_MAX defined + * by + */ +#define PVFS_MAX_XATTR_VALUELEN 8192 /* Not the same as XATTR_SIZE_MAX + * defined by + */ +#define PVFS_MAX_XATTR_LISTLEN 16 /* Not the same as XATTR_LIST_MAX + * defined by + */ +/* + * PVFS I/O operation types, used in both system and server interfaces. + */ +enum PVFS_io_type { + PVFS_IO_READ = 1, + PVFS_IO_WRITE = 2 +}; + +/* + * If this enum is modified the server parameters related to the precreate pool + * batch and low threshold sizes may need to be modified to reflect this + * change. + */ +enum pvfs2_ds_type { + PVFS_TYPE_NONE = 0, + PVFS_TYPE_METAFILE = (1 << 0), + PVFS_TYPE_DATAFILE = (1 << 1), + PVFS_TYPE_DIRECTORY = (1 << 2), + PVFS_TYPE_SYMLINK = (1 << 3), + PVFS_TYPE_DIRDATA = (1 << 4), + PVFS_TYPE_INTERNAL = (1 << 5) /* for the server's private use */ +}; + +/* + * PVFS_certificate simply stores a buffer with the buffer size. + * The buffer can be converted to an OpenSSL X509 struct for use. + */ +struct PVFS_certificate { + __u32 buf_size; + unsigned char *buf; +}; + +/* + * A credential identifies a user and is signed by the client/user + * private key. + */ +struct PVFS_credential { + __u32 userid; /* user id */ + __u32 num_groups; /* length of group_array */ + __u32 *group_array; /* groups for which the user is a member */ + char *issuer; /* alias of the issuing server */ + __u64 timeout; /* seconds after epoch to time out */ + __u32 sig_size; /* length of the signature in bytes */ + unsigned char *signature; /* digital signature */ + struct PVFS_certificate certificate; /* user certificate buffer */ +}; +#define extra_size_PVFS_credential (PVFS_REQ_LIMIT_GROUPS * \ + sizeof(__u32) + \ + PVFS_REQ_LIMIT_ISSUER + \ + PVFS_REQ_LIMIT_SIGNATURE + \ + extra_size_PVFS_certificate) + +/* This structure is used by the VFS-client interaction alone */ +struct PVFS_keyval_pair { + char key[PVFS_MAX_XATTR_NAMELEN]; + __s32 key_sz; /* __s32 for portable, fixed-size structures */ + __s32 val_sz; + char val[PVFS_MAX_XATTR_VALUELEN]; +}; + +/* pvfs2-sysint.h ***********************************************************/ +/* Describes attributes for a file, directory, or symlink. */ +struct PVFS_sys_attr_s { + __u32 owner; + __u32 group; + __u32 perms; + __u64 atime; + __u64 mtime; + __u64 ctime; + __s64 size; + + /* NOTE: caller must free if valid */ + char *link_target; + + /* Changed to __s32 so that size of structure does not change */ + __s32 dfile_count; + + /* Changed to __s32 so that size of structure does not change */ + __s32 distr_dir_servers_initial; + + /* Changed to __s32 so that size of structure does not change */ + __s32 distr_dir_servers_max; + + /* Changed to __s32 so that size of structure does not change */ + __s32 distr_dir_split_size; + + __u32 mirror_copies_count; + + /* NOTE: caller must free if valid */ + char *dist_name; + + /* NOTE: caller must free if valid */ + char *dist_params; + + __s64 dirent_count; + enum pvfs2_ds_type objtype; + __u64 flags; + __u32 mask; + __s64 blksize; +}; + +#define PVFS2_LOOKUP_LINK_NO_FOLLOW 0 +#define PVFS2_LOOKUP_LINK_FOLLOW 1 + +/* pint-dev.h ***************************************************************/ + +/* parameter structure used in PVFS_DEV_DEBUG ioctl command */ +struct dev_mask_info_s { + enum { + KERNEL_MASK, + CLIENT_MASK, + } mask_type; + __u64 mask_value; +}; + +struct dev_mask2_info_s { + __u64 mask1_value; + __u64 mask2_value; +}; + +/* pvfs2-util.h *************************************************************/ +#define PVFS_util_min(x1, x2) (((x1) > (x2)) ? (x2) : (x1)) +__s32 PVFS_util_translate_mode(int mode); + +/* pvfs2-debug.h ************************************************************/ +#include "pvfs2-debug.h" + +/* pvfs2-internal.h *********************************************************/ +#define llu(x) (unsigned long long)(x) +#define lld(x) (long long)(x) + +/* pint-dev-shared.h ********************************************************/ +#define PVFS_DEV_MAGIC 'k' + +#define PVFS2_READDIR_DEFAULT_DESC_COUNT 5 + +#define DEV_GET_MAGIC 0x1 +#define DEV_GET_MAX_UPSIZE 0x2 +#define DEV_GET_MAX_DOWNSIZE 0x3 +#define DEV_MAP 0x4 +#define DEV_REMOUNT_ALL 0x5 +#define DEV_DEBUG 0x6 +#define DEV_UPSTREAM 0x7 +#define DEV_CLIENT_MASK 0x8 +#define DEV_CLIENT_STRING 0x9 +#define DEV_MAX_NR 0xa + +/* supported ioctls, codes are with respect to user-space */ +enum { + PVFS_DEV_GET_MAGIC = _IOW(PVFS_DEV_MAGIC, DEV_GET_MAGIC, __s32), + PVFS_DEV_GET_MAX_UPSIZE = + _IOW(PVFS_DEV_MAGIC, DEV_GET_MAX_UPSIZE, __s32), + PVFS_DEV_GET_MAX_DOWNSIZE = + _IOW(PVFS_DEV_MAGIC, DEV_GET_MAX_DOWNSIZE, __s32), + PVFS_DEV_MAP = _IO(PVFS_DEV_MAGIC, DEV_MAP), + PVFS_DEV_REMOUNT_ALL = _IO(PVFS_DEV_MAGIC, DEV_REMOUNT_ALL), + PVFS_DEV_DEBUG = _IOR(PVFS_DEV_MAGIC, DEV_DEBUG, __s32), + PVFS_DEV_UPSTREAM = _IOW(PVFS_DEV_MAGIC, DEV_UPSTREAM, int), + PVFS_DEV_CLIENT_MASK = _IOW(PVFS_DEV_MAGIC, + DEV_CLIENT_MASK, + struct dev_mask2_info_s), + PVFS_DEV_CLIENT_STRING = _IOW(PVFS_DEV_MAGIC, + DEV_CLIENT_STRING, + char *), + PVFS_DEV_MAXNR = DEV_MAX_NR, +}; + +/* + * version number for use in communicating between kernel space and user + * space + */ +/* +#define PVFS_KERNEL_PROTO_VERSION \ + ((PVFS2_VERSION_MAJOR * 10000) + \ + (PVFS2_VERSION_MINOR * 100) + \ + PVFS2_VERSION_SUB) +*/ +#define PVFS_KERNEL_PROTO_VERSION 0 + +/* + * describes memory regions to map in the PVFS_DEV_MAP ioctl. + * NOTE: See devpvfs2-req.c for 32 bit compat structure. + * Since this structure has a variable-sized layout that is different + * on 32 and 64 bit platforms, we need to normalize to a 64 bit layout + * on such systems before servicing ioctl calls from user-space binaries + * that may be 32 bit! + */ +struct PVFS_dev_map_desc { + void *ptr; + __s32 total_size; + __s32 size; + __s32 count; +}; + +/* gossip.h *****************************************************************/ + +#ifdef GOSSIP_DISABLE_DEBUG +#define gossip_debug(mask, format, f...) do {} while (0) +#else +extern __u64 gossip_debug_mask; +extern struct client_debug_mask client_debug_mask; + +/* try to avoid function call overhead by checking masks in macro */ +#define gossip_debug(mask, format, f...) \ +do { \ + if (gossip_debug_mask & mask) \ + printk(format, ##f); \ +} while (0) +#endif /* GOSSIP_DISABLE_DEBUG */ + +/* do file and line number printouts w/ the GNU preprocessor */ +#define gossip_ldebug(mask, format, f...) \ + gossip_debug(mask, "%s: " format, __func__, ##f) + +#define gossip_err printk +#define gossip_lerr(format, f...) \ + gossip_err("%s line %d: " format, \ + __FILE__, \ + __LINE__, \ + ##f) diff --git a/fs/orangefs/pvfs2-bufmap.h b/fs/orangefs/pvfs2-bufmap.h new file mode 100644 index 000000000000..e269deafbb74 --- /dev/null +++ b/fs/orangefs/pvfs2-bufmap.h @@ -0,0 +1,76 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#ifndef __PVFS2_BUFMAP_H +#define __PVFS2_BUFMAP_H + +/* used to describe mapped buffers */ +struct pvfs_bufmap_desc { + void *uaddr; /* user space address pointer */ + struct page **page_array; /* array of mapped pages */ + int array_count; /* size of above arrays */ + struct list_head list_link; +}; + +struct pvfs2_bufmap; + +struct pvfs2_bufmap *pvfs2_bufmap_ref(void); +void pvfs2_bufmap_unref(struct pvfs2_bufmap *bufmap); + +/* + * pvfs_bufmap_size_query is now an inline function because buffer + * sizes are not hardcoded + */ +int pvfs_bufmap_size_query(void); + +int pvfs_bufmap_shift_query(void); + +int pvfs_bufmap_initialize(struct PVFS_dev_map_desc *user_desc); + +int get_bufmap_init(void); + +void pvfs_bufmap_finalize(void); + +int pvfs_bufmap_get(struct pvfs2_bufmap **mapp, int *buffer_index); + +void pvfs_bufmap_put(struct pvfs2_bufmap *bufmap, int buffer_index); + +int readdir_index_get(struct pvfs2_bufmap **mapp, int *buffer_index); + +void readdir_index_put(struct pvfs2_bufmap *bufmap, int buffer_index); + +int pvfs_bufmap_copy_iovec_from_user(struct pvfs2_bufmap *bufmap, + int buffer_index, + const struct iovec *iov, + unsigned long nr_segs, + size_t size); + +int pvfs_bufmap_copy_iovec_from_kernel(struct pvfs2_bufmap *bufmap, + int buffer_index, + const struct iovec *iov, + unsigned long nr_segs, + size_t size); + +int pvfs_bufmap_copy_to_user_iovec(struct pvfs2_bufmap *bufmap, + int buffer_index, + const struct iovec *iov, + unsigned long nr_segs, + size_t size); + +int pvfs_bufmap_copy_to_kernel_iovec(struct pvfs2_bufmap *bufmap, + int buffer_index, + const struct iovec *iov, + unsigned long nr_segs, + size_t size); + +size_t pvfs_bufmap_copy_to_user_task_iovec(struct task_struct *tsk, + struct iovec *iovec, + unsigned long nr_segs, + struct pvfs2_bufmap *bufmap, + int buffer_index, + size_t bytes_to_be_copied); + +#endif /* __PVFS2_BUFMAP_H */ diff --git a/fs/orangefs/pvfs2-debug.h b/fs/orangefs/pvfs2-debug.h new file mode 100644 index 000000000000..4c27ad77fa16 --- /dev/null +++ b/fs/orangefs/pvfs2-debug.h @@ -0,0 +1,290 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/* This file just defines debugging masks to be used with the gossip + * logging utility. All debugging masks for PVFS2 are kept here to make + * sure we don't have collisions. + */ + +#ifndef __PVFS2_DEBUG_H +#define __PVFS2_DEBUG_H + +#ifdef __KERNEL__ +#include +#else +#include +#endif + +#define GOSSIP_NO_DEBUG (__u64)0 +#define GOSSIP_BMI_DEBUG_TCP ((__u64)1 << 0) +#define GOSSIP_BMI_DEBUG_CONTROL ((__u64)1 << 1) +#define GOSSIP_BMI_DEBUG_OFFSETS ((__u64)1 << 2) +#define GOSSIP_BMI_DEBUG_GM ((__u64)1 << 3) +#define GOSSIP_JOB_DEBUG ((__u64)1 << 4) +#define GOSSIP_SERVER_DEBUG ((__u64)1 << 5) +#define GOSSIP_STO_DEBUG_CTRL ((__u64)1 << 6) +#define GOSSIP_STO_DEBUG_DEFAULT ((__u64)1 << 7) +#define GOSSIP_FLOW_DEBUG ((__u64)1 << 8) +#define GOSSIP_BMI_DEBUG_GM_MEM ((__u64)1 << 9) +#define GOSSIP_REQUEST_DEBUG ((__u64)1 << 10) +#define GOSSIP_FLOW_PROTO_DEBUG ((__u64)1 << 11) +#define GOSSIP_NCACHE_DEBUG ((__u64)1 << 12) +#define GOSSIP_CLIENT_DEBUG ((__u64)1 << 13) +#define GOSSIP_REQ_SCHED_DEBUG ((__u64)1 << 14) +#define GOSSIP_ACACHE_DEBUG ((__u64)1 << 15) +#define GOSSIP_TROVE_DEBUG ((__u64)1 << 16) +#define GOSSIP_TROVE_OP_DEBUG ((__u64)1 << 17) +#define GOSSIP_DIST_DEBUG ((__u64)1 << 18) +#define GOSSIP_BMI_DEBUG_IB ((__u64)1 << 19) +#define GOSSIP_DBPF_ATTRCACHE_DEBUG ((__u64)1 << 20) +#define GOSSIP_MMAP_RCACHE_DEBUG ((__u64)1 << 21) +#define GOSSIP_LOOKUP_DEBUG ((__u64)1 << 22) +#define GOSSIP_REMOVE_DEBUG ((__u64)1 << 23) +#define GOSSIP_GETATTR_DEBUG ((__u64)1 << 24) +#define GOSSIP_READDIR_DEBUG ((__u64)1 << 25) +#define GOSSIP_IO_DEBUG ((__u64)1 << 26) +#define GOSSIP_DBPF_OPEN_CACHE_DEBUG ((__u64)1 << 27) +#define GOSSIP_PERMISSIONS_DEBUG ((__u64)1 << 28) +#define GOSSIP_CANCEL_DEBUG ((__u64)1 << 29) +#define GOSSIP_MSGPAIR_DEBUG ((__u64)1 << 30) +#define GOSSIP_CLIENTCORE_DEBUG ((__u64)1 << 31) +#define GOSSIP_CLIENTCORE_TIMING_DEBUG ((__u64)1 << 32) +#define GOSSIP_SETATTR_DEBUG ((__u64)1 << 33) +#define GOSSIP_MKDIR_DEBUG ((__u64)1 << 34) +#define GOSSIP_VARSTRIP_DEBUG ((__u64)1 << 35) +#define GOSSIP_GETEATTR_DEBUG ((__u64)1 << 36) +#define GOSSIP_SETEATTR_DEBUG ((__u64)1 << 37) +#define GOSSIP_ENDECODE_DEBUG ((__u64)1 << 38) +#define GOSSIP_DELEATTR_DEBUG ((__u64)1 << 39) +#define GOSSIP_ACCESS_DEBUG ((__u64)1 << 40) +#define GOSSIP_ACCESS_DETAIL_DEBUG ((__u64)1 << 41) +#define GOSSIP_LISTEATTR_DEBUG ((__u64)1 << 42) +#define GOSSIP_PERFCOUNTER_DEBUG ((__u64)1 << 43) +#define GOSSIP_STATE_MACHINE_DEBUG ((__u64)1 << 44) +#define GOSSIP_DBPF_KEYVAL_DEBUG ((__u64)1 << 45) +#define GOSSIP_LISTATTR_DEBUG ((__u64)1 << 46) +#define GOSSIP_DBPF_COALESCE_DEBUG ((__u64)1 << 47) +#define GOSSIP_ACCESS_HOSTNAMES ((__u64)1 << 48) +#define GOSSIP_FSCK_DEBUG ((__u64)1 << 49) +#define GOSSIP_BMI_DEBUG_MX ((__u64)1 << 50) +#define GOSSIP_BSTREAM_DEBUG ((__u64)1 << 51) +#define GOSSIP_BMI_DEBUG_PORTALS ((__u64)1 << 52) +#define GOSSIP_USER_DEV_DEBUG ((__u64)1 << 53) +#define GOSSIP_DIRECTIO_DEBUG ((__u64)1 << 54) +#define GOSSIP_MGMT_DEBUG ((__u64)1 << 55) +#define GOSSIP_MIRROR_DEBUG ((__u64)1 << 56) +#define GOSSIP_WIN_CLIENT_DEBUG ((__u64)1 << 57) +#define GOSSIP_SECURITY_DEBUG ((__u64)1 << 58) +#define GOSSIP_USRINT_DEBUG ((__u64)1 << 59) +#define GOSSIP_RCACHE_DEBUG ((__u64)1 << 60) +#define GOSSIP_SECCACHE_DEBUG ((__u64)1 << 61) + +#define GOSSIP_BMI_DEBUG_ALL ((__u64) (GOSSIP_BMI_DEBUG_TCP + \ + GOSSIP_BMI_DEBUG_CONTROL + \ + GOSSIP_BMI_DEBUG_GM + \ + GOSSIP_BMI_DEBUG_OFFSETS + \ + GOSSIP_BMI_DEBUG_IB + \ + GOSSIP_BMI_DEBUG_MX + \ + GOSSIP_BMI_DEBUG_PORTALS)) + +const char *PVFS_debug_get_next_debug_keyword(int position); + +#define GOSSIP_SUPER_DEBUG ((__u64)1 << 0) +#define GOSSIP_INODE_DEBUG ((__u64)1 << 1) +#define GOSSIP_FILE_DEBUG ((__u64)1 << 2) +#define GOSSIP_DIR_DEBUG ((__u64)1 << 3) +#define GOSSIP_UTILS_DEBUG ((__u64)1 << 4) +#define GOSSIP_WAIT_DEBUG ((__u64)1 << 5) +#define GOSSIP_ACL_DEBUG ((__u64)1 << 6) +#define GOSSIP_DCACHE_DEBUG ((__u64)1 << 7) +#define GOSSIP_DEV_DEBUG ((__u64)1 << 8) +#define GOSSIP_NAME_DEBUG ((__u64)1 << 9) +#define GOSSIP_BUFMAP_DEBUG ((__u64)1 << 10) +#define GOSSIP_CACHE_DEBUG ((__u64)1 << 11) +#define GOSSIP_DEBUGFS_DEBUG ((__u64)1 << 12) +#define GOSSIP_XATTR_DEBUG ((__u64)1 << 13) +#define GOSSIP_INIT_DEBUG ((__u64)1 << 14) +#define GOSSIP_SYSFS_DEBUG ((__u64)1 << 15) + +#define GOSSIP_MAX_NR 16 +#define GOSSIP_MAX_DEBUG (((__u64)1 << GOSSIP_MAX_NR) - 1) + +/*function prototypes*/ +__u64 PVFS_kmod_eventlog_to_mask(const char *event_logging); +__u64 PVFS_debug_eventlog_to_mask(const char *event_logging); +char *PVFS_debug_mask_to_eventlog(__u64 mask); +char *PVFS_kmod_mask_to_eventlog(__u64 mask); + +/* a private internal type */ +struct __keyword_mask_s { + const char *keyword; + __u64 mask_val; +}; + +#define __DEBUG_ALL ((__u64) -1) + +/* map all config keywords to pvfs2 debug masks here */ +static struct __keyword_mask_s s_keyword_mask_map[] = { + /* Log trove debugging info. Same as 'trove'. */ + {"storage", GOSSIP_TROVE_DEBUG}, + /* Log trove debugging info. Same as 'storage'. */ + {"trove", GOSSIP_TROVE_DEBUG}, + /* Log trove operations. */ + {"trove_op", GOSSIP_TROVE_OP_DEBUG}, + /* Log network debug info. */ + {"network", GOSSIP_BMI_DEBUG_ALL}, + /* Log server info, including new operations. */ + {"server", GOSSIP_SERVER_DEBUG}, + /* Log client sysint info. This is only useful for the client. */ + {"client", GOSSIP_CLIENT_DEBUG}, + /* Debug the varstrip distribution */ + {"varstrip", GOSSIP_VARSTRIP_DEBUG}, + /* Log job info */ + {"job", GOSSIP_JOB_DEBUG}, + /* Debug PINT_process_request calls. EXTREMELY verbose! */ + {"request", GOSSIP_REQUEST_DEBUG}, + /* Log request scheduler events */ + {"reqsched", GOSSIP_REQ_SCHED_DEBUG}, + /* Log the flow protocol events, including flowproto_multiqueue */ + {"flowproto", GOSSIP_FLOW_PROTO_DEBUG}, + /* Log flow calls */ + {"flow", GOSSIP_FLOW_DEBUG}, + /* Debug the client name cache. Only useful on the client. */ + {"ncache", GOSSIP_NCACHE_DEBUG}, + /* Debug read-ahead cache events. Only useful on the client. */ + {"mmaprcache", GOSSIP_MMAP_RCACHE_DEBUG}, + /* Debug the attribute cache. Only useful on the client. */ + {"acache", GOSSIP_ACACHE_DEBUG}, + /* Log/Debug distribution calls */ + {"distribution", GOSSIP_DIST_DEBUG}, + /* Debug the server-side dbpf attribute cache */ + {"dbpfattrcache", GOSSIP_DBPF_ATTRCACHE_DEBUG}, + /* Debug the client lookup state machine. */ + {"lookup", GOSSIP_LOOKUP_DEBUG}, + /* Debug the client remove state macine. */ + {"remove", GOSSIP_REMOVE_DEBUG}, + /* Debug the server getattr state machine. */ + {"getattr", GOSSIP_GETATTR_DEBUG}, + /* Debug the server setattr state machine. */ + {"setattr", GOSSIP_SETATTR_DEBUG}, + /* vectored getattr server state machine */ + {"listattr", GOSSIP_LISTATTR_DEBUG}, + /* Debug the client and server get ext attributes SM. */ + {"geteattr", GOSSIP_GETEATTR_DEBUG}, + /* Debug the client and server set ext attributes SM. */ + {"seteattr", GOSSIP_SETEATTR_DEBUG}, + /* Debug the readdir operation (client and server) */ + {"readdir", GOSSIP_READDIR_DEBUG}, + /* Debug the mkdir operation (server only) */ + {"mkdir", GOSSIP_MKDIR_DEBUG}, + /* Debug the io operation (reads and writes) + * for both the client and server */ + {"io", GOSSIP_IO_DEBUG}, + /* Debug the server's open file descriptor cache */ + {"open_cache", GOSSIP_DBPF_OPEN_CACHE_DEBUG}, + /* Debug permissions checking on the server */ + {"permissions", GOSSIP_PERMISSIONS_DEBUG}, + /* Debug the cancel operation */ + {"cancel", GOSSIP_CANCEL_DEBUG}, + /* Debug the msgpair state machine */ + {"msgpair", GOSSIP_MSGPAIR_DEBUG}, + /* Debug the client core app */ + {"clientcore", GOSSIP_CLIENTCORE_DEBUG}, + /* Debug the client timing state machines (job timeout, etc.) */ + {"clientcore_timing", GOSSIP_CLIENTCORE_TIMING_DEBUG}, + /* network encoding */ + {"endecode", GOSSIP_ENDECODE_DEBUG}, + /* Show server file (metadata) accesses (both modify and read-only). */ + {"access", GOSSIP_ACCESS_DEBUG}, + /* Show more detailed server file accesses */ + {"access_detail", GOSSIP_ACCESS_DETAIL_DEBUG}, + /* Debug the listeattr operation */ + {"listeattr", GOSSIP_LISTEATTR_DEBUG}, + /* Debug the state machine management code */ + {"sm", GOSSIP_STATE_MACHINE_DEBUG}, + /* Debug the metadata dbpf keyval functions */ + {"keyval", GOSSIP_DBPF_KEYVAL_DEBUG}, + /* Debug the metadata sync coalescing code */ + {"coalesce", GOSSIP_DBPF_COALESCE_DEBUG}, + /* Display the hostnames instead of IP addrs in debug output */ + {"access_hostnames", GOSSIP_ACCESS_HOSTNAMES}, + /* Show the client device events */ + {"user_dev", GOSSIP_USER_DEV_DEBUG}, + /* Debug the fsck tool */ + {"fsck", GOSSIP_FSCK_DEBUG}, + /* Debug the bstream code */ + {"bstream", GOSSIP_BSTREAM_DEBUG}, + /* Debug trove in direct io mode */ + {"directio", GOSSIP_DIRECTIO_DEBUG}, + /* Debug direct io thread management */ + {"mgmt", GOSSIP_MGMT_DEBUG}, + /* Debug mirroring process */ + {"mirror", GOSSIP_MIRROR_DEBUG}, + /* Windows client */ + {"win_client", GOSSIP_WIN_CLIENT_DEBUG}, + /* Debug robust security code */ + {"security", GOSSIP_SECURITY_DEBUG}, + /* Capability Cache */ + {"seccache", GOSSIP_SECCACHE_DEBUG}, + /* Client User Interface */ + {"usrint", GOSSIP_USRINT_DEBUG}, + /* rcache */ + {"rcache", GOSSIP_RCACHE_DEBUG}, + /* Everything except the periodic events. Useful for debugging */ + {"verbose", + (__DEBUG_ALL & + ~(GOSSIP_PERFCOUNTER_DEBUG | GOSSIP_STATE_MACHINE_DEBUG | + GOSSIP_ENDECODE_DEBUG | GOSSIP_USER_DEV_DEBUG)) + }, + /* No debug output */ + {"none", GOSSIP_NO_DEBUG}, + /* Everything */ + {"all", __DEBUG_ALL} +}; + +#undef __DEBUG_ALL + +/* + * Map all kmod keywords to kmod debug masks here. Keep this + * structure "packed": + * + * "all" is always last... + * + * keyword mask_val index + * foo 1 0 + * bar 2 1 + * baz 4 2 + * qux 8 3 + * . . . + */ +static struct __keyword_mask_s s_kmod_keyword_mask_map[] = { + {"super", GOSSIP_SUPER_DEBUG}, + {"inode", GOSSIP_INODE_DEBUG}, + {"file", GOSSIP_FILE_DEBUG}, + {"dir", GOSSIP_DIR_DEBUG}, + {"utils", GOSSIP_UTILS_DEBUG}, + {"wait", GOSSIP_WAIT_DEBUG}, + {"acl", GOSSIP_ACL_DEBUG}, + {"dcache", GOSSIP_DCACHE_DEBUG}, + {"dev", GOSSIP_DEV_DEBUG}, + {"name", GOSSIP_NAME_DEBUG}, + {"bufmap", GOSSIP_BUFMAP_DEBUG}, + {"cache", GOSSIP_CACHE_DEBUG}, + {"debugfs", GOSSIP_DEBUGFS_DEBUG}, + {"xattr", GOSSIP_XATTR_DEBUG}, + {"init", GOSSIP_INIT_DEBUG}, + {"sysfs", GOSSIP_SYSFS_DEBUG}, + {"none", GOSSIP_NO_DEBUG}, + {"all", GOSSIP_MAX_DEBUG} +}; + +static const int num_kmod_keyword_mask_map = (int) + (sizeof(s_kmod_keyword_mask_map) / sizeof(struct __keyword_mask_s)); + +static const int num_keyword_mask_map = (int) + (sizeof(s_keyword_mask_map) / sizeof(struct __keyword_mask_s)); + +#endif /* __PVFS2_DEBUG_H */ diff --git a/fs/orangefs/pvfs2-debugfs.h b/fs/orangefs/pvfs2-debugfs.h new file mode 100644 index 000000000000..a66b7d08c14d --- /dev/null +++ b/fs/orangefs/pvfs2-debugfs.h @@ -0,0 +1,3 @@ +int pvfs2_debugfs_init(void); +int pvfs2_kernel_debug_init(void); +void pvfs2_debugfs_cleanup(void); diff --git a/fs/orangefs/pvfs2-dev-proto.h b/fs/orangefs/pvfs2-dev-proto.h new file mode 100644 index 000000000000..9c82e6e651f3 --- /dev/null +++ b/fs/orangefs/pvfs2-dev-proto.h @@ -0,0 +1,102 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#ifndef _PVFS2_DEV_PROTO_H +#define _PVFS2_DEV_PROTO_H + +/* + * types and constants shared between user space and kernel space for + * device interaction using a common protocol + */ + +/* + * valid pvfs2 kernel operation types + */ +#define PVFS2_VFS_OP_INVALID 0xFF000000 +#define PVFS2_VFS_OP_FILE_IO 0xFF000001 +#define PVFS2_VFS_OP_LOOKUP 0xFF000002 +#define PVFS2_VFS_OP_CREATE 0xFF000003 +#define PVFS2_VFS_OP_GETATTR 0xFF000004 +#define PVFS2_VFS_OP_REMOVE 0xFF000005 +#define PVFS2_VFS_OP_MKDIR 0xFF000006 +#define PVFS2_VFS_OP_READDIR 0xFF000007 +#define PVFS2_VFS_OP_SETATTR 0xFF000008 +#define PVFS2_VFS_OP_SYMLINK 0xFF000009 +#define PVFS2_VFS_OP_RENAME 0xFF00000A +#define PVFS2_VFS_OP_STATFS 0xFF00000B +#define PVFS2_VFS_OP_TRUNCATE 0xFF00000C +#define PVFS2_VFS_OP_MMAP_RA_FLUSH 0xFF00000D +#define PVFS2_VFS_OP_FS_MOUNT 0xFF00000E +#define PVFS2_VFS_OP_FS_UMOUNT 0xFF00000F +#define PVFS2_VFS_OP_GETXATTR 0xFF000010 +#define PVFS2_VFS_OP_SETXATTR 0xFF000011 +#define PVFS2_VFS_OP_LISTXATTR 0xFF000012 +#define PVFS2_VFS_OP_REMOVEXATTR 0xFF000013 +#define PVFS2_VFS_OP_PARAM 0xFF000014 +#define PVFS2_VFS_OP_PERF_COUNT 0xFF000015 +#define PVFS2_VFS_OP_CANCEL 0xFF00EE00 +#define PVFS2_VFS_OP_FSYNC 0xFF00EE01 +#define PVFS2_VFS_OP_FSKEY 0xFF00EE02 +#define PVFS2_VFS_OP_READDIRPLUS 0xFF00EE03 +#define PVFS2_VFS_OP_FILE_IOX 0xFF00EE04 + +/* + * Misc constants. Please retain them as multiples of 8! + * Otherwise 32-64 bit interactions will be messed up :) + */ +#define PVFS2_NAME_LEN 0x00000100 +#define PVFS2_MAX_DEBUG_STRING_LEN 0x00000400 +#define PVFS2_MAX_DEBUG_ARRAY_LEN 0x00000800 + +/* + * MAX_DIRENT_COUNT cannot be larger than PVFS_REQ_LIMIT_LISTATTR. + * The value of PVFS_REQ_LIMIT_LISTATTR has been changed from 113 to 60 + * to accomodate an attribute object with mirrored handles. + * MAX_DIRENT_COUNT is replaced by MAX_DIRENT_COUNT_READDIR and + * MAX_DIRENT_COUNT_READDIRPLUS, since readdir doesn't trigger a listattr + * but readdirplus might. +*/ +#define MAX_DIRENT_COUNT_READDIR 0x00000060 +#define MAX_DIRENT_COUNT_READDIRPLUS 0x0000003C + +#include "upcall.h" +#include "downcall.h" + +/* + * These macros differ from proto macros in that they don't do any + * byte-swappings and are used to ensure that kernel-clientcore interactions + * don't cause any unaligned accesses etc on 64 bit machines + */ +#ifndef roundup4 +#define roundup4(x) (((x)+3) & ~3) +#endif + +#ifndef roundup8 +#define roundup8(x) (((x)+7) & ~7) +#endif + +/* strings; decoding just points into existing character data */ +#define enc_string(pptr, pbuf) do { \ + __u32 len = strlen(*pbuf); \ + *(__u32 *) *(pptr) = (len); \ + memcpy(*(pptr)+4, *pbuf, len+1); \ + *(pptr) += roundup8(4 + len + 1); \ +} while (0) + +#define dec_string(pptr, pbuf, plen) do { \ + __u32 len = (*(__u32 *) *(pptr)); \ + *pbuf = *(pptr) + 4; \ + *(pptr) += roundup8(4 + len + 1); \ + if (plen) \ + *plen = len;\ +} while (0) + +struct read_write_x { + __s64 off; + __s64 len; +}; + +#endif diff --git a/fs/orangefs/pvfs2-kernel.h b/fs/orangefs/pvfs2-kernel.h new file mode 100644 index 000000000000..6c787c4797d0 --- /dev/null +++ b/fs/orangefs/pvfs2-kernel.h @@ -0,0 +1,864 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/* + * The PVFS2 Linux kernel support allows PVFS2 volumes to be mounted and + * accessed through the Linux VFS (i.e. using standard I/O system calls). + * This support is only needed on clients that wish to mount the file system. + * + */ + +/* + * Declarations and macros for the PVFS2 Linux kernel support. + */ + +#ifndef __PVFS2KERNEL_H +#define __PVFS2KERNEL_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "pvfs2-dev-proto.h" + +#ifdef PVFS2_KERNEL_DEBUG +#define PVFS2_DEFAULT_OP_TIMEOUT_SECS 10 +#else +#define PVFS2_DEFAULT_OP_TIMEOUT_SECS 20 +#endif + +#define PVFS2_BUFMAP_WAIT_TIMEOUT_SECS 30 + +#define PVFS2_DEFAULT_SLOT_TIMEOUT_SECS 900 /* 15 minutes */ + +#define PVFS2_REQDEVICE_NAME "pvfs2-req" + +#define PVFS2_DEVREQ_MAGIC 0x20030529 +#define PVFS2_LINK_MAX 0x000000FF +#define PVFS2_PURGE_RETRY_COUNT 0x00000005 +#define PVFS2_SEEK_END 0x00000002 +#define PVFS2_MAX_NUM_OPTIONS 0x00000004 +#define PVFS2_MAX_MOUNT_OPT_LEN 0x00000080 +#define PVFS2_MAX_FSKEY_LEN 64 + +#define MAX_DEV_REQ_UPSIZE (2*sizeof(__s32) + \ +sizeof(__u64) + sizeof(struct pvfs2_upcall_s)) +#define MAX_DEV_REQ_DOWNSIZE (2*sizeof(__s32) + \ +sizeof(__u64) + sizeof(struct pvfs2_downcall_s)) + +#define BITS_PER_LONG_DIV_8 (BITS_PER_LONG >> 3) + +/* borrowed from irda.h */ +#ifndef MSECS_TO_JIFFIES +#define MSECS_TO_JIFFIES(ms) (((ms)*HZ+999)/1000) +#endif + +#define MAX_ALIGNED_DEV_REQ_UPSIZE \ + (MAX_DEV_REQ_UPSIZE + \ + ((((MAX_DEV_REQ_UPSIZE / \ + (BITS_PER_LONG_DIV_8)) * \ + (BITS_PER_LONG_DIV_8)) + \ + (BITS_PER_LONG_DIV_8)) - \ + MAX_DEV_REQ_UPSIZE)) + +#define MAX_ALIGNED_DEV_REQ_DOWNSIZE \ + (MAX_DEV_REQ_DOWNSIZE + \ + ((((MAX_DEV_REQ_DOWNSIZE / \ + (BITS_PER_LONG_DIV_8)) * \ + (BITS_PER_LONG_DIV_8)) + \ + (BITS_PER_LONG_DIV_8)) - \ + MAX_DEV_REQ_DOWNSIZE)) + +/* + * valid pvfs2 kernel operation states + * + * unknown - op was just initialized + * waiting - op is on request_list (upward bound) + * inprogr - op is in progress (waiting for downcall) + * serviced - op has matching downcall; ok + * purged - op has to start a timer since client-core + * exited uncleanly before servicing op + */ +enum pvfs2_vfs_op_states { + OP_VFS_STATE_UNKNOWN = 0, + OP_VFS_STATE_WAITING = 1, + OP_VFS_STATE_INPROGR = 2, + OP_VFS_STATE_SERVICED = 4, + OP_VFS_STATE_PURGED = 8, +}; + +#define set_op_state_waiting(op) ((op)->op_state = OP_VFS_STATE_WAITING) +#define set_op_state_inprogress(op) ((op)->op_state = OP_VFS_STATE_INPROGR) +#define set_op_state_serviced(op) ((op)->op_state = OP_VFS_STATE_SERVICED) +#define set_op_state_purged(op) ((op)->op_state |= OP_VFS_STATE_PURGED) + +#define op_state_waiting(op) ((op)->op_state & OP_VFS_STATE_WAITING) +#define op_state_in_progress(op) ((op)->op_state & OP_VFS_STATE_INPROGR) +#define op_state_serviced(op) ((op)->op_state & OP_VFS_STATE_SERVICED) +#define op_state_purged(op) ((op)->op_state & OP_VFS_STATE_PURGED) + +#define get_op(op) \ + do { \ + atomic_inc(&(op)->aio_ref_count); \ + gossip_debug(GOSSIP_DEV_DEBUG, \ + "(get) Alloced OP (%p:%llu)\n", \ + op, \ + llu((op)->tag)); \ + } while (0) + +#define put_op(op) \ + do { \ + if (atomic_sub_and_test(1, &(op)->aio_ref_count) == 1) { \ + gossip_debug(GOSSIP_DEV_DEBUG, \ + "(put) Releasing OP (%p:%llu)\n", \ + op, \ + llu((op)->tag)); \ + op_release(op); \ + } \ + } while (0) + +#define op_wait(op) (atomic_read(&(op)->aio_ref_count) <= 2 ? 0 : 1) + +/* + * Defines for controlling whether I/O upcalls are for async or sync operations + */ +enum PVFS_async_io_type { + PVFS_VFS_SYNC_IO = 0, + PVFS_VFS_ASYNC_IO = 1, +}; + +/* + * An array of client_debug_mask will be built to hold debug keyword/mask + * values fetched from userspace. + */ +struct client_debug_mask { + char *keyword; + __u64 mask1; + __u64 mask2; +}; + +/* + * pvfs2 kernel memory related flags + */ + +#if ((defined PVFS2_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB)) +#define PVFS2_CACHE_CREATE_FLAGS SLAB_RED_ZONE +#else +#define PVFS2_CACHE_CREATE_FLAGS 0 +#endif /* ((defined PVFS2_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB)) */ + +#define PVFS2_CACHE_ALLOC_FLAGS (GFP_KERNEL) +#define PVFS2_GFP_FLAGS (GFP_KERNEL) +#define PVFS2_BUFMAP_GFP_FLAGS (GFP_KERNEL) + +#define pvfs2_kmap(page) kmap(page) +#define pvfs2_kunmap(page) kunmap(page) + +/* pvfs2 xattr and acl related defines */ +#define PVFS2_XATTR_INDEX_POSIX_ACL_ACCESS 1 +#define PVFS2_XATTR_INDEX_POSIX_ACL_DEFAULT 2 +#define PVFS2_XATTR_INDEX_TRUSTED 3 +#define PVFS2_XATTR_INDEX_DEFAULT 4 + +#if 0 +#ifndef POSIX_ACL_XATTR_ACCESS +#define POSIX_ACL_XATTR_ACCESS "system.posix_acl_access" +#endif +#ifndef POSIX_ACL_XATTR_DEFAULT +#define POSIX_ACL_XATTR_DEFAULT "system.posix_acl_default" +#endif +#endif + +#define PVFS2_XATTR_NAME_ACL_ACCESS POSIX_ACL_XATTR_ACCESS +#define PVFS2_XATTR_NAME_ACL_DEFAULT POSIX_ACL_XATTR_DEFAULT +#define PVFS2_XATTR_NAME_TRUSTED_PREFIX "trusted." +#define PVFS2_XATTR_NAME_DEFAULT_PREFIX "" + +/* these functions are defined in pvfs2-utils.c */ +int orangefs_prepare_cdm_array(char *debug_array_string); +int orangefs_prepare_debugfs_help_string(int); + +/* defined in pvfs2-debugfs.c */ +int pvfs2_client_debug_init(void); + +void debug_string_to_mask(char *, void *, int); +void do_c_mask(int, char *, struct client_debug_mask **); +void do_k_mask(int, char *, __u64 **); + +void debug_mask_to_string(void *, int); +void do_k_string(void *, int); +void do_c_string(void *, int); +int check_amalgam_keyword(void *, int); +int keyword_is_amalgam(char *); + +/*these variables are defined in pvfs2-mod.c */ +extern char kernel_debug_string[PVFS2_MAX_DEBUG_STRING_LEN]; +extern char client_debug_string[PVFS2_MAX_DEBUG_STRING_LEN]; +extern char client_debug_array_string[PVFS2_MAX_DEBUG_STRING_LEN]; +/* HELLO +extern struct client_debug_mask current_client_mask; +*/ +extern unsigned int kernel_mask_set_mod_init; + +extern int pvfs2_init_acl(struct inode *inode, struct inode *dir); +extern const struct xattr_handler *pvfs2_xattr_handlers[]; + +extern struct posix_acl *pvfs2_get_acl(struct inode *inode, int type); +extern int pvfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type); + +int pvfs2_xattr_set_default(struct dentry *dentry, + const char *name, + const void *buffer, + size_t size, + int flags, + int handler_flags); + +int pvfs2_xattr_get_default(struct dentry *dentry, + const char *name, + void *buffer, + size_t size, + int handler_flags); + +/* + * Redefine xtvec structure so that we could move helper functions out of + * the define + */ +struct xtvec { + __kernel_off_t xtv_off; /* must be off_t */ + __kernel_size_t xtv_len; /* must be size_t */ +}; + +/* + * pvfs2 data structures + */ +struct pvfs2_kernel_op_s { + enum pvfs2_vfs_op_states op_state; + __u64 tag; + + /* + * Set uses_shared_memory to 1 if this operation uses shared memory. + * If true, then a retry on the op must also get a new shared memory + * buffer and re-populate it. + */ + int uses_shared_memory; + + struct pvfs2_upcall_s upcall; + struct pvfs2_downcall_s downcall; + + wait_queue_head_t waitq; + spinlock_t lock; + + int io_completed; + wait_queue_head_t io_completion_waitq; + + /* + * upcalls requiring variable length trailers require that this struct + * be in the request list even after client-core does a read() on the + * device to dequeue the upcall. + * if op_linger field goes to 0, we dequeue this op off the list. + * else we let it stay. What gets passed to the read() is + * a) if op_linger field is = 1, pvfs2_kernel_op_s itself + * b) else if = 0, we pass ->upcall.trailer_buf + * We expect to have only a single upcall trailer buffer, + * so we expect callers with trailers + * to set this field to 2 and others to set it to 1. + */ + __s32 op_linger, op_linger_tmp; + /* VFS aio fields */ + + /* used by the async I/O code to stash the pvfs2_kiocb_s structure */ + void *priv; + + /* used again for the async I/O code for deallocation */ + atomic_t aio_ref_count; + + int attempts; + + struct list_head list; +}; + +/* per inode private pvfs2 info */ +struct pvfs2_inode_s { + struct pvfs2_object_kref refn; + char link_target[PVFS_NAME_MAX]; + __s64 blksize; + /* + * Reading/Writing Extended attributes need to acquire the appropriate + * reader/writer semaphore on the pvfs2_inode_s structure. + */ + struct rw_semaphore xattr_sem; + + struct inode vfs_inode; + sector_t last_failed_block_index_read; + + /* + * State of in-memory attributes not yet flushed to disk associated + * with this object + */ + unsigned long pinode_flags; + + /* All allocated pvfs2_inode_s objects are chained to a list */ + struct list_head list; +}; + +#define P_ATIME_FLAG 0 +#define P_MTIME_FLAG 1 +#define P_CTIME_FLAG 2 +#define P_MODE_FLAG 3 + +#define ClearAtimeFlag(pinode) clear_bit(P_ATIME_FLAG, &(pinode)->pinode_flags) +#define SetAtimeFlag(pinode) set_bit(P_ATIME_FLAG, &(pinode)->pinode_flags) +#define AtimeFlag(pinode) test_bit(P_ATIME_FLAG, &(pinode)->pinode_flags) + +#define ClearMtimeFlag(pinode) clear_bit(P_MTIME_FLAG, &(pinode)->pinode_flags) +#define SetMtimeFlag(pinode) set_bit(P_MTIME_FLAG, &(pinode)->pinode_flags) +#define MtimeFlag(pinode) test_bit(P_MTIME_FLAG, &(pinode)->pinode_flags) + +#define ClearCtimeFlag(pinode) clear_bit(P_CTIME_FLAG, &(pinode)->pinode_flags) +#define SetCtimeFlag(pinode) set_bit(P_CTIME_FLAG, &(pinode)->pinode_flags) +#define CtimeFlag(pinode) test_bit(P_CTIME_FLAG, &(pinode)->pinode_flags) + +#define ClearModeFlag(pinode) clear_bit(P_MODE_FLAG, &(pinode)->pinode_flags) +#define SetModeFlag(pinode) set_bit(P_MODE_FLAG, &(pinode)->pinode_flags) +#define ModeFlag(pinode) test_bit(P_MODE_FLAG, &(pinode)->pinode_flags) + +/* per superblock private pvfs2 info */ +struct pvfs2_sb_info_s { + struct pvfs2_khandle root_khandle; + __s32 fs_id; + int id; + int flags; +#define PVFS2_OPT_INTR 0x01 +#define PVFS2_OPT_LOCAL_LOCK 0x02 + char devname[PVFS_MAX_SERVER_ADDR_LEN]; + struct super_block *sb; + int mount_pending; + struct list_head list; +}; + +/* + * a temporary structure used only for sb mount time that groups the + * mount time data provided along with a private superblock structure + * that is allocated before a 'kernel' superblock is allocated. +*/ +struct pvfs2_mount_sb_info_s { + void *data; + struct pvfs2_khandle root_khandle; + __s32 fs_id; + int id; +}; + +/* + * structure that holds the state of any async I/O operation issued + * through the VFS. Needed especially to handle cancellation requests + * or even completion notification so that the VFS client-side daemon + * can free up its vfs_request slots. + */ +struct pvfs2_kiocb_s { + /* the pointer to the task that initiated the AIO */ + struct task_struct *tsk; + + /* pointer to the kiocb that kicked this operation */ + struct kiocb *kiocb; + + /* buffer index that was used for the I/O */ + struct pvfs2_bufmap *bufmap; + int buffer_index; + + /* pvfs2 kernel operation type */ + struct pvfs2_kernel_op_s *op; + + /* The user space buffers from/to which I/O is being staged */ + struct iovec *iov; + + /* number of elements in the iovector */ + unsigned long nr_segs; + + /* set to indicate the type of the operation */ + int rw; + + /* file offset */ + loff_t offset; + + /* and the count in bytes */ + size_t bytes_to_be_copied; + + ssize_t bytes_copied; + int needs_cleanup; +}; + +struct pvfs2_stats { + unsigned long cache_hits; + unsigned long cache_misses; + unsigned long reads; + unsigned long writes; +}; + +extern struct pvfs2_stats g_pvfs2_stats; + +/* + NOTE: See Documentation/filesystems/porting for information + on implementing FOO_I and properly accessing fs private data +*/ +static inline struct pvfs2_inode_s *PVFS2_I(struct inode *inode) +{ + return container_of(inode, struct pvfs2_inode_s, vfs_inode); +} + +static inline struct pvfs2_sb_info_s *PVFS2_SB(struct super_block *sb) +{ + return (struct pvfs2_sb_info_s *) sb->s_fs_info; +} + +/* ino_t descends from "unsigned long", 8 bytes, 64 bits. */ +static inline ino_t pvfs2_khandle_to_ino(struct pvfs2_khandle *khandle) +{ + union { + unsigned char u[8]; + __u64 ino; + } ihandle; + + ihandle.u[0] = khandle->u[0] ^ khandle->u[4]; + ihandle.u[1] = khandle->u[1] ^ khandle->u[5]; + ihandle.u[2] = khandle->u[2] ^ khandle->u[6]; + ihandle.u[3] = khandle->u[3] ^ khandle->u[7]; + ihandle.u[4] = khandle->u[12] ^ khandle->u[8]; + ihandle.u[5] = khandle->u[13] ^ khandle->u[9]; + ihandle.u[6] = khandle->u[14] ^ khandle->u[10]; + ihandle.u[7] = khandle->u[15] ^ khandle->u[11]; + + return ihandle.ino; +} + +static inline struct pvfs2_khandle *get_khandle_from_ino(struct inode *inode) +{ + return &(PVFS2_I(inode)->refn.khandle); +} + +static inline __s32 get_fsid_from_ino(struct inode *inode) +{ + return PVFS2_I(inode)->refn.fs_id; +} + +static inline ino_t get_ino_from_khandle(struct inode *inode) +{ + struct pvfs2_khandle *khandle; + ino_t ino; + + khandle = get_khandle_from_ino(inode); + ino = pvfs2_khandle_to_ino(khandle); + return ino; +} + +static inline ino_t get_parent_ino_from_dentry(struct dentry *dentry) +{ + return get_ino_from_khandle(dentry->d_parent->d_inode); +} + +static inline int is_root_handle(struct inode *inode) +{ + gossip_debug(GOSSIP_DCACHE_DEBUG, + "%s: root handle: %pU, this handle: %pU:\n", + __func__, + &PVFS2_SB(inode->i_sb)->root_khandle, + get_khandle_from_ino(inode)); + + if (PVFS_khandle_cmp(&(PVFS2_SB(inode->i_sb)->root_khandle), + get_khandle_from_ino(inode))) + return 0; + else + return 1; +} + +static inline int match_handle(struct pvfs2_khandle resp_handle, + struct inode *inode) +{ + gossip_debug(GOSSIP_DCACHE_DEBUG, + "%s: one handle: %pU, another handle:%pU:\n", + __func__, + &resp_handle, + get_khandle_from_ino(inode)); + + if (PVFS_khandle_cmp(&resp_handle, get_khandle_from_ino(inode))) + return 0; + else + return 1; +} + +/* + * defined in pvfs2-cache.c + */ +int op_cache_initialize(void); +int op_cache_finalize(void); +struct pvfs2_kernel_op_s *op_alloc(__s32 type); +struct pvfs2_kernel_op_s *op_alloc_trailer(__s32 type); +char *get_opname_string(struct pvfs2_kernel_op_s *new_op); +void op_release(struct pvfs2_kernel_op_s *op); + +int dev_req_cache_initialize(void); +int dev_req_cache_finalize(void); +void *dev_req_alloc(void); +void dev_req_release(void *); + +int pvfs2_inode_cache_initialize(void); +int pvfs2_inode_cache_finalize(void); + +int kiocb_cache_initialize(void); +int kiocb_cache_finalize(void); +struct pvfs2_kiocb_s *kiocb_alloc(void); +void kiocb_release(struct pvfs2_kiocb_s *ptr); + +/* + * defined in pvfs2-mod.c + */ +void purge_inprogress_ops(void); + +/* + * defined in waitqueue.c + */ +int wait_for_matching_downcall(struct pvfs2_kernel_op_s *op); +int wait_for_cancellation_downcall(struct pvfs2_kernel_op_s *op); +void pvfs2_clean_up_interrupted_operation(struct pvfs2_kernel_op_s *op); +void purge_waiting_ops(void); + +/* + * defined in super.c + */ +struct dentry *pvfs2_mount(struct file_system_type *fst, + int flags, + const char *devname, + void *data); + +void pvfs2_kill_sb(struct super_block *sb); +int pvfs2_remount(struct super_block *sb); + +int fsid_key_table_initialize(void); +void fsid_key_table_finalize(void); + +/* + * defined in inode.c + */ +__u32 convert_to_pvfs2_mask(unsigned long lite_mask); +struct inode *pvfs2_new_inode(struct super_block *sb, + struct inode *dir, + int mode, + dev_t dev, + struct pvfs2_object_kref *ref); + +int pvfs2_setattr(struct dentry *dentry, struct iattr *iattr); + +int pvfs2_getattr(struct vfsmount *mnt, + struct dentry *dentry, + struct kstat *kstat); + +/* + * defined in xattr.c + */ +int pvfs2_setxattr(struct dentry *dentry, + const char *name, + const void *value, + size_t size, + int flags); + +ssize_t pvfs2_getxattr(struct dentry *dentry, + const char *name, + void *buffer, + size_t size); + +ssize_t pvfs2_listxattr(struct dentry *dentry, char *buffer, size_t size); + +/* + * defined in namei.c + */ +struct inode *pvfs2_iget(struct super_block *sb, + struct pvfs2_object_kref *ref); + +ssize_t pvfs2_inode_read(struct inode *inode, + char *buf, + size_t count, + loff_t *offset, + loff_t readahead_size); + +/* + * defined in devpvfs2-req.c + */ +int pvfs2_dev_init(void); +void pvfs2_dev_cleanup(void); +int is_daemon_in_service(void); +int fs_mount_pending(__s32 fsid); + +/* + * defined in pvfs2-utils.c + */ +__s32 fsid_of_op(struct pvfs2_kernel_op_s *op); + +int pvfs2_flush_inode(struct inode *inode); + +ssize_t pvfs2_inode_getxattr(struct inode *inode, + const char *prefix, + const char *name, + void *buffer, + size_t size); + +int pvfs2_inode_setxattr(struct inode *inode, + const char *prefix, + const char *name, + const void *value, + size_t size, + int flags); + +int pvfs2_inode_getattr(struct inode *inode, __u32 mask); + +int pvfs2_inode_setattr(struct inode *inode, struct iattr *iattr); + +void pvfs2_op_initialize(struct pvfs2_kernel_op_s *op); + +void pvfs2_make_bad_inode(struct inode *inode); + +void mask_blocked_signals(sigset_t *orig_sigset); + +void unmask_blocked_signals(sigset_t *orig_sigset); + +int pvfs2_unmount_sb(struct super_block *sb); + +int pvfs2_cancel_op_in_progress(__u64 tag); + +__u64 pvfs2_convert_time_field(void *time_ptr); + +int pvfs2_normalize_to_errno(__s32 error_code); + +extern struct mutex devreq_mutex; +extern struct mutex request_mutex; +extern int debug; +extern int op_timeout_secs; +extern int slot_timeout_secs; +extern struct list_head pvfs2_superblocks; +extern spinlock_t pvfs2_superblocks_lock; +extern struct list_head pvfs2_request_list; +extern spinlock_t pvfs2_request_list_lock; +extern wait_queue_head_t pvfs2_request_list_waitq; +extern struct list_head *htable_ops_in_progress; +extern spinlock_t htable_ops_in_progress_lock; +extern int hash_table_size; + +extern const struct address_space_operations pvfs2_address_operations; +extern struct backing_dev_info pvfs2_backing_dev_info; +extern struct inode_operations pvfs2_file_inode_operations; +extern const struct file_operations pvfs2_file_operations; +extern struct inode_operations pvfs2_symlink_inode_operations; +extern struct inode_operations pvfs2_dir_inode_operations; +extern const struct file_operations pvfs2_dir_operations; +extern const struct dentry_operations pvfs2_dentry_operations; +extern const struct file_operations pvfs2_devreq_file_operations; + +extern wait_queue_head_t pvfs2_bufmap_init_waitq; + +/* + * misc convenience macros + */ +#define add_op_to_request_list(op) \ +do { \ + spin_lock(&pvfs2_request_list_lock); \ + spin_lock(&op->lock); \ + set_op_state_waiting(op); \ + list_add_tail(&op->list, &pvfs2_request_list); \ + spin_unlock(&pvfs2_request_list_lock); \ + spin_unlock(&op->lock); \ + wake_up_interruptible(&pvfs2_request_list_waitq); \ +} while (0) + +#define add_priority_op_to_request_list(op) \ + do { \ + spin_lock(&pvfs2_request_list_lock); \ + spin_lock(&op->lock); \ + set_op_state_waiting(op); \ + \ + list_add(&op->list, &pvfs2_request_list); \ + spin_unlock(&pvfs2_request_list_lock); \ + spin_unlock(&op->lock); \ + wake_up_interruptible(&pvfs2_request_list_waitq); \ +} while (0) + +#define remove_op_from_request_list(op) \ + do { \ + struct list_head *tmp = NULL; \ + struct list_head *tmp_safe = NULL; \ + struct pvfs2_kernel_op_s *tmp_op = NULL; \ + \ + spin_lock(&pvfs2_request_list_lock); \ + list_for_each_safe(tmp, tmp_safe, &pvfs2_request_list) { \ + tmp_op = list_entry(tmp, \ + struct pvfs2_kernel_op_s, \ + list); \ + if (tmp_op && (tmp_op == op)) { \ + list_del(&tmp_op->list); \ + break; \ + } \ + } \ + spin_unlock(&pvfs2_request_list_lock); \ + } while (0) + +#define PVFS2_OP_INTERRUPTIBLE 1 /* service_operation() is interruptible */ +#define PVFS2_OP_PRIORITY 2 /* service_operation() is high priority */ +#define PVFS2_OP_CANCELLATION 4 /* this is a cancellation */ +#define PVFS2_OP_NO_SEMAPHORE 8 /* don't acquire semaphore */ +#define PVFS2_OP_ASYNC 16 /* Queue it, but don't wait */ + +int service_operation(struct pvfs2_kernel_op_s *op, + const char *op_name, + int flags); + +/* + * handles two possible error cases, depending on context. + * + * by design, our vfs i/o errors need to be handled in one of two ways, + * depending on where the error occured. + * + * if the error happens in the waitqueue code because we either timed + * out or a signal was raised while waiting, we need to cancel the + * userspace i/o operation and free the op manually. this is done to + * avoid having the device start writing application data to our shared + * bufmap pages without us expecting it. + * + * FIXME: POSSIBLE OPTIMIZATION: + * However, if we timed out or if we got a signal AND our upcall was never + * picked off the queue (i.e. we were in OP_VFS_STATE_WAITING), then we don't + * need to send a cancellation upcall. The way we can handle this is + * set error_exit to 2 in such cases and 1 whenever cancellation has to be + * sent and have handle_error + * take care of this situation as well.. + * + * if a pvfs2 sysint level error occured and i/o has been completed, + * there is no need to cancel the operation, as the user has finished + * using the bufmap page and so there is no danger in this case. in + * this case, we wake up the device normally so that it may free the + * op, as normal. + * + * note the only reason this is a macro is because both read and write + * cases need the exact same handling code. + */ +#define handle_io_error() \ +do { \ + if (!op_state_serviced(new_op)) { \ + pvfs2_cancel_op_in_progress(new_op->tag); \ + op_release(new_op); \ + } else { \ + wake_up_daemon_for_return(new_op); \ + } \ + new_op = NULL; \ + pvfs_bufmap_put(bufmap, buffer_index); \ + buffer_index = -1; \ +} while (0) + +#define get_interruptible_flag(inode) \ + ((PVFS2_SB(inode->i_sb)->flags & PVFS2_OPT_INTR) ? \ + PVFS2_OP_INTERRUPTIBLE : 0) + +#define add_pvfs2_sb(sb) \ +do { \ + gossip_debug(GOSSIP_SUPER_DEBUG, \ + "Adding SB %p to pvfs2 superblocks\n", \ + PVFS2_SB(sb)); \ + spin_lock(&pvfs2_superblocks_lock); \ + list_add_tail(&PVFS2_SB(sb)->list, &pvfs2_superblocks); \ + spin_unlock(&pvfs2_superblocks_lock); \ +} while (0) + +#define remove_pvfs2_sb(sb) \ +do { \ + struct list_head *tmp = NULL; \ + struct list_head *tmp_safe = NULL; \ + struct pvfs2_sb_info_s *pvfs2_sb = NULL; \ + \ + spin_lock(&pvfs2_superblocks_lock); \ + list_for_each_safe(tmp, tmp_safe, &pvfs2_superblocks) { \ + pvfs2_sb = list_entry(tmp, \ + struct pvfs2_sb_info_s, \ + list); \ + if (pvfs2_sb && (pvfs2_sb->sb == sb)) { \ + gossip_debug(GOSSIP_SUPER_DEBUG, \ + "Removing SB %p from pvfs2 superblocks\n", \ + pvfs2_sb); \ + list_del(&pvfs2_sb->list); \ + break; \ + } \ + } \ + spin_unlock(&pvfs2_superblocks_lock); \ +} while (0) + +#define pvfs2_lock_inode(inode) spin_lock(&inode->i_lock) +#define pvfs2_unlock_inode(inode) spin_unlock(&inode->i_lock) +#define pvfs2_current_signal_lock current->sighand->siglock +#define pvfs2_current_sigaction current->sighand->action + +#define fill_default_sys_attrs(sys_attr, type, mode) \ +do { \ + sys_attr.owner = from_kuid(current_user_ns(), current_fsuid()); \ + sys_attr.group = from_kgid(current_user_ns(), current_fsgid()); \ + sys_attr.size = 0; \ + sys_attr.perms = PVFS_util_translate_mode(mode); \ + sys_attr.objtype = type; \ + sys_attr.mask = PVFS_ATTR_SYS_ALL_SETABLE; \ +} while (0) + +#define pvfs2_inode_lock(__i) mutex_lock(&(__i)->i_mutex) + +#define pvfs2_inode_unlock(__i) mutex_unlock(&(__i)->i_mutex) + +static inline void pvfs2_i_size_write(struct inode *inode, loff_t i_size) +{ +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) + pvfs2_inode_lock(inode); +#endif + i_size_write(inode, i_size); +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) + pvfs2_inode_unlock(inode); +#endif +} + +static inline unsigned int diff(struct timeval *end, struct timeval *begin) +{ + if (end->tv_usec < begin->tv_usec) { + end->tv_usec += 1000000; + end->tv_sec--; + } + end->tv_sec -= begin->tv_sec; + end->tv_usec -= begin->tv_usec; + return (end->tv_sec * 1000000) + end->tv_usec; +} + +#endif /* __PVFS2KERNEL_H */ diff --git a/fs/orangefs/pvfs2-sysfs.h b/fs/orangefs/pvfs2-sysfs.h new file mode 100644 index 000000000000..f0b76382db02 --- /dev/null +++ b/fs/orangefs/pvfs2-sysfs.h @@ -0,0 +1,2 @@ +extern int orangefs_sysfs_init(void); +extern void orangefs_sysfs_exit(void); diff --git a/fs/orangefs/upcall.h b/fs/orangefs/upcall.h new file mode 100644 index 000000000000..1e07f626aac6 --- /dev/null +++ b/fs/orangefs/upcall.h @@ -0,0 +1,255 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#ifndef __UPCALL_H +#define __UPCALL_H + +/* + * Sanitized this header file to fix + * 32-64 bit interaction issues between + * client-core and device + */ +struct pvfs2_io_request_s { + __s32 async_vfs_io; + __s32 buf_index; + __s32 count; + __s32 __pad1; + __s64 offset; + struct pvfs2_object_kref refn; + enum PVFS_io_type io_type; + __s32 readahead_size; +}; + +struct pvfs2_iox_request_s { + __s32 buf_index; + __s32 count; + struct pvfs2_object_kref refn; + enum PVFS_io_type io_type; + __s32 __pad1; +}; + +struct pvfs2_lookup_request_s { + __s32 sym_follow; + __s32 __pad1; + struct pvfs2_object_kref parent_refn; + char d_name[PVFS2_NAME_LEN]; +}; + +struct pvfs2_create_request_s { + struct pvfs2_object_kref parent_refn; + struct PVFS_sys_attr_s attributes; + char d_name[PVFS2_NAME_LEN]; +}; + +struct pvfs2_symlink_request_s { + struct pvfs2_object_kref parent_refn; + struct PVFS_sys_attr_s attributes; + char entry_name[PVFS2_NAME_LEN]; + char target[PVFS2_NAME_LEN]; +}; + +struct pvfs2_getattr_request_s { + struct pvfs2_object_kref refn; + __u32 mask; + __u32 __pad1; +}; + +struct pvfs2_setattr_request_s { + struct pvfs2_object_kref refn; + struct PVFS_sys_attr_s attributes; +}; + +struct pvfs2_remove_request_s { + struct pvfs2_object_kref parent_refn; + char d_name[PVFS2_NAME_LEN]; +}; + +struct pvfs2_mkdir_request_s { + struct pvfs2_object_kref parent_refn; + struct PVFS_sys_attr_s attributes; + char d_name[PVFS2_NAME_LEN]; +}; + +struct pvfs2_readdir_request_s { + struct pvfs2_object_kref refn; + __u64 token; + __s32 max_dirent_count; + __s32 buf_index; +}; + +struct pvfs2_readdirplus_request_s { + struct pvfs2_object_kref refn; + __u64 token; + __s32 max_dirent_count; + __u32 mask; + __s32 buf_index; + __s32 __pad1; +}; + +struct pvfs2_rename_request_s { + struct pvfs2_object_kref old_parent_refn; + struct pvfs2_object_kref new_parent_refn; + char d_old_name[PVFS2_NAME_LEN]; + char d_new_name[PVFS2_NAME_LEN]; +}; + +struct pvfs2_statfs_request_s { + __s32 fs_id; + __s32 __pad1; +}; + +struct pvfs2_truncate_request_s { + struct pvfs2_object_kref refn; + __s64 size; +}; + +struct pvfs2_mmap_ra_cache_flush_request_s { + struct pvfs2_object_kref refn; +}; + +struct pvfs2_fs_mount_request_s { + char pvfs2_config_server[PVFS_MAX_SERVER_ADDR_LEN]; +}; + +struct pvfs2_fs_umount_request_s { + __s32 id; + __s32 fs_id; + char pvfs2_config_server[PVFS_MAX_SERVER_ADDR_LEN]; +}; + +struct pvfs2_getxattr_request_s { + struct pvfs2_object_kref refn; + __s32 key_sz; + __s32 __pad1; + char key[PVFS_MAX_XATTR_NAMELEN]; +}; + +struct pvfs2_setxattr_request_s { + struct pvfs2_object_kref refn; + struct PVFS_keyval_pair keyval; + __s32 flags; + __s32 __pad1; +}; + +struct pvfs2_listxattr_request_s { + struct pvfs2_object_kref refn; + __s32 requested_count; + __s32 __pad1; + __u64 token; +}; + +struct pvfs2_removexattr_request_s { + struct pvfs2_object_kref refn; + __s32 key_sz; + __s32 __pad1; + char key[PVFS_MAX_XATTR_NAMELEN]; +}; + +struct pvfs2_op_cancel_s { + __u64 op_tag; +}; + +struct pvfs2_fsync_request_s { + struct pvfs2_object_kref refn; +}; + +enum pvfs2_param_request_type { + PVFS2_PARAM_REQUEST_SET = 1, + PVFS2_PARAM_REQUEST_GET = 2 +}; + +enum pvfs2_param_request_op { + PVFS2_PARAM_REQUEST_OP_ACACHE_TIMEOUT_MSECS = 1, + PVFS2_PARAM_REQUEST_OP_ACACHE_HARD_LIMIT = 2, + PVFS2_PARAM_REQUEST_OP_ACACHE_SOFT_LIMIT = 3, + PVFS2_PARAM_REQUEST_OP_ACACHE_RECLAIM_PERCENTAGE = 4, + PVFS2_PARAM_REQUEST_OP_PERF_TIME_INTERVAL_SECS = 5, + PVFS2_PARAM_REQUEST_OP_PERF_HISTORY_SIZE = 6, + PVFS2_PARAM_REQUEST_OP_PERF_RESET = 7, + PVFS2_PARAM_REQUEST_OP_NCACHE_TIMEOUT_MSECS = 8, + PVFS2_PARAM_REQUEST_OP_NCACHE_HARD_LIMIT = 9, + PVFS2_PARAM_REQUEST_OP_NCACHE_SOFT_LIMIT = 10, + PVFS2_PARAM_REQUEST_OP_NCACHE_RECLAIM_PERCENTAGE = 11, + PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_TIMEOUT_MSECS = 12, + PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_HARD_LIMIT = 13, + PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_SOFT_LIMIT = 14, + PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_RECLAIM_PERCENTAGE = 15, + PVFS2_PARAM_REQUEST_OP_CLIENT_DEBUG = 16, + PVFS2_PARAM_REQUEST_OP_CCACHE_TIMEOUT_SECS = 17, + PVFS2_PARAM_REQUEST_OP_CCACHE_HARD_LIMIT = 18, + PVFS2_PARAM_REQUEST_OP_CCACHE_SOFT_LIMIT = 19, + PVFS2_PARAM_REQUEST_OP_CCACHE_RECLAIM_PERCENTAGE = 20, + PVFS2_PARAM_REQUEST_OP_CAPCACHE_TIMEOUT_SECS = 21, + PVFS2_PARAM_REQUEST_OP_CAPCACHE_HARD_LIMIT = 22, + PVFS2_PARAM_REQUEST_OP_CAPCACHE_SOFT_LIMIT = 23, + PVFS2_PARAM_REQUEST_OP_CAPCACHE_RECLAIM_PERCENTAGE = 24, + PVFS2_PARAM_REQUEST_OP_TWO_MASK_VALUES = 25, +}; + +struct pvfs2_param_request_s { + enum pvfs2_param_request_type type; + enum pvfs2_param_request_op op; + __s64 value; + char s_value[PVFS2_MAX_DEBUG_STRING_LEN]; +}; + +enum pvfs2_perf_count_request_type { + PVFS2_PERF_COUNT_REQUEST_ACACHE = 1, + PVFS2_PERF_COUNT_REQUEST_NCACHE = 2, + PVFS2_PERF_COUNT_REQUEST_CAPCACHE = 3, +}; + +struct pvfs2_perf_count_request_s { + enum pvfs2_perf_count_request_type type; + __s32 __pad1; +}; + +struct pvfs2_fs_key_request_s { + __s32 fsid; + __s32 __pad1; +}; + +struct pvfs2_upcall_s { + __s32 type; + __u32 uid; + __u32 gid; + int pid; + int tgid; + /* currently trailer is used only by readx/writex (iox) */ + __s64 trailer_size; + char *trailer_buf; + + union { + struct pvfs2_io_request_s io; + struct pvfs2_iox_request_s iox; + struct pvfs2_lookup_request_s lookup; + struct pvfs2_create_request_s create; + struct pvfs2_symlink_request_s sym; + struct pvfs2_getattr_request_s getattr; + struct pvfs2_setattr_request_s setattr; + struct pvfs2_remove_request_s remove; + struct pvfs2_mkdir_request_s mkdir; + struct pvfs2_readdir_request_s readdir; + struct pvfs2_readdirplus_request_s readdirplus; + struct pvfs2_rename_request_s rename; + struct pvfs2_statfs_request_s statfs; + struct pvfs2_truncate_request_s truncate; + struct pvfs2_mmap_ra_cache_flush_request_s ra_cache_flush; + struct pvfs2_fs_mount_request_s fs_mount; + struct pvfs2_fs_umount_request_s fs_umount; + struct pvfs2_getxattr_request_s getxattr; + struct pvfs2_setxattr_request_s setxattr; + struct pvfs2_listxattr_request_s listxattr; + struct pvfs2_removexattr_request_s removexattr; + struct pvfs2_op_cancel_s cancel; + struct pvfs2_fsync_request_s fsync; + struct pvfs2_param_request_s param; + struct pvfs2_perf_count_request_s perf_count; + struct pvfs2_fs_key_request_s fs_key; + } req; +}; + +#endif /* __UPCALL_H */ From 5db11c21a929cd9d8c0484006efb1014fc723c93 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Fri, 17 Jul 2015 10:38:12 -0400 Subject: [PATCH 002/174] Orangefs: kernel client part 2 Signed-off-by: Mike Marshall --- fs/orangefs/acl.c | 175 +++++++ fs/orangefs/dcache.c | 142 +++++ fs/orangefs/devpvfs2-req.c | 997 +++++++++++++++++++++++++++++++++++ fs/orangefs/dir.c | 394 ++++++++++++++ fs/orangefs/file.c | 1019 ++++++++++++++++++++++++++++++++++++ fs/orangefs/inode.c | 469 +++++++++++++++++ 6 files changed, 3196 insertions(+) create mode 100644 fs/orangefs/acl.c create mode 100644 fs/orangefs/dcache.c create mode 100644 fs/orangefs/devpvfs2-req.c create mode 100644 fs/orangefs/dir.c create mode 100644 fs/orangefs/file.c create mode 100644 fs/orangefs/inode.c diff --git a/fs/orangefs/acl.c b/fs/orangefs/acl.c new file mode 100644 index 000000000000..e462b81a3ba1 --- /dev/null +++ b/fs/orangefs/acl.c @@ -0,0 +1,175 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#include "protocol.h" +#include "pvfs2-kernel.h" +#include "pvfs2-bufmap.h" +#include +#include + +struct posix_acl *pvfs2_get_acl(struct inode *inode, int type) +{ + struct posix_acl *acl; + int ret; + char *key = NULL, *value = NULL; + + switch (type) { + case ACL_TYPE_ACCESS: + key = PVFS2_XATTR_NAME_ACL_ACCESS; + break; + case ACL_TYPE_DEFAULT: + key = PVFS2_XATTR_NAME_ACL_DEFAULT; + break; + default: + gossip_err("pvfs2_get_acl: bogus value of type %d\n", type); + return ERR_PTR(-EINVAL); + } + /* + * Rather than incurring a network call just to determine the exact + * length of the attribute, I just allocate a max length to save on + * the network call. Conceivably, we could pass NULL to + * pvfs2_inode_getxattr() to probe the length of the value, but + * I don't do that for now. + */ + value = kmalloc(PVFS_MAX_XATTR_VALUELEN, GFP_KERNEL); + if (value == NULL) + return ERR_PTR(-ENOMEM); + + gossip_debug(GOSSIP_ACL_DEBUG, + "inode %pU, key %s, type %d\n", + get_khandle_from_ino(inode), + key, + type); + ret = pvfs2_inode_getxattr(inode, + "", + key, + value, + PVFS_MAX_XATTR_VALUELEN); + /* if the key exists, convert it to an in-memory rep */ + if (ret > 0) { + acl = posix_acl_from_xattr(&init_user_ns, value, ret); + } else if (ret == -ENODATA || ret == -ENOSYS) { + acl = NULL; + } else { + gossip_err("inode %pU retrieving acl's failed with error %d\n", + get_khandle_from_ino(inode), + ret); + acl = ERR_PTR(ret); + } + /* kfree(NULL) is safe, so don't worry if value ever got used */ + kfree(value); + return acl; +} + +int pvfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) +{ + struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); + int error = 0; + void *value = NULL; + size_t size = 0; + const char *name = NULL; + + switch (type) { + case ACL_TYPE_ACCESS: + name = PVFS2_XATTR_NAME_ACL_ACCESS; + if (acl) { + umode_t mode = inode->i_mode; + /* + * can we represent this with the traditional file + * mode permission bits? + */ + error = posix_acl_equiv_mode(acl, &mode); + if (error < 0) { + gossip_err("%s: posix_acl_equiv_mode err: %d\n", + __func__, + error); + return error; + } + + if (inode->i_mode != mode) + SetModeFlag(pvfs2_inode); + inode->i_mode = mode; + mark_inode_dirty_sync(inode); + if (error == 0) + acl = NULL; + } + break; + case ACL_TYPE_DEFAULT: + name = PVFS2_XATTR_NAME_ACL_DEFAULT; + break; + default: + gossip_err("%s: invalid type %d!\n", __func__, type); + return -EINVAL; + } + + gossip_debug(GOSSIP_ACL_DEBUG, + "%s: inode %pU, key %s type %d\n", + __func__, get_khandle_from_ino(inode), + name, + type); + + if (acl) { + size = posix_acl_xattr_size(acl->a_count); + value = kmalloc(size, GFP_KERNEL); + if (!value) + return -ENOMEM; + + error = posix_acl_to_xattr(&init_user_ns, acl, value, size); + if (error < 0) + goto out; + } + + gossip_debug(GOSSIP_ACL_DEBUG, + "%s: name %s, value %p, size %zd, acl %p\n", + __func__, name, value, size, acl); + /* + * Go ahead and set the extended attribute now. NOTE: Suppose acl + * was NULL, then value will be NULL and size will be 0 and that + * will xlate to a removexattr. However, we don't want removexattr + * complain if attributes does not exist. + */ + error = pvfs2_inode_setxattr(inode, "", name, value, size, 0); + +out: + kfree(value); + if (!error) + set_cached_acl(inode, type, acl); + return error; +} + +int pvfs2_init_acl(struct inode *inode, struct inode *dir) +{ + struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); + struct posix_acl *default_acl, *acl; + umode_t mode = inode->i_mode; + int error = 0; + + ClearModeFlag(pvfs2_inode); + + error = posix_acl_create(dir, &mode, &default_acl, &acl); + if (error) + return error; + + if (default_acl) { + error = pvfs2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); + posix_acl_release(default_acl); + } + + if (acl) { + if (!error) + error = pvfs2_set_acl(inode, acl, ACL_TYPE_ACCESS); + posix_acl_release(acl); + } + + /* If mode of the inode was changed, then do a forcible ->setattr */ + if (mode != inode->i_mode) { + SetModeFlag(pvfs2_inode); + inode->i_mode = mode; + pvfs2_flush_inode(inode); + } + + return error; +} diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c new file mode 100644 index 000000000000..9466b179bf24 --- /dev/null +++ b/fs/orangefs/dcache.c @@ -0,0 +1,142 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/* + * Implementation of dentry (directory cache) functions. + */ + +#include "protocol.h" +#include "pvfs2-kernel.h" + +/* Returns 1 if dentry can still be trusted, else 0. */ +static int pvfs2_revalidate_lookup(struct dentry *dentry) +{ + struct dentry *parent_dentry = dget_parent(dentry); + struct inode *parent_inode = parent_dentry->d_inode; + struct pvfs2_inode_s *parent = PVFS2_I(parent_inode); + struct inode *inode = dentry->d_inode; + struct pvfs2_kernel_op_s *new_op; + int ret = 0; + int err = 0; + + gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: attempting lookup.\n", __func__); + + new_op = op_alloc(PVFS2_VFS_OP_LOOKUP); + if (!new_op) + goto out_put_parent; + + new_op->upcall.req.lookup.sym_follow = PVFS2_LOOKUP_LINK_NO_FOLLOW; + new_op->upcall.req.lookup.parent_refn = parent->refn; + strncpy(new_op->upcall.req.lookup.d_name, + dentry->d_name.name, + PVFS2_NAME_LEN); + + gossip_debug(GOSSIP_DCACHE_DEBUG, + "%s:%s:%d interrupt flag [%d]\n", + __FILE__, + __func__, + __LINE__, + get_interruptible_flag(parent_inode)); + + err = service_operation(new_op, "pvfs2_lookup", + get_interruptible_flag(parent_inode)); + if (err) + goto out_drop; + + if (new_op->downcall.status != 0 || + !match_handle(new_op->downcall.resp.lookup.refn.khandle, inode)) { + gossip_debug(GOSSIP_DCACHE_DEBUG, + "%s:%s:%d " + "lookup failure |%s| or no match |%s|.\n", + __FILE__, + __func__, + __LINE__, + new_op->downcall.status ? "true" : "false", + match_handle(new_op->downcall.resp.lookup.refn.khandle, + inode) ? "false" : "true"); + gossip_debug(GOSSIP_DCACHE_DEBUG, + "%s:%s:%d revalidate failed\n", + __FILE__, __func__, __LINE__); + goto out_drop; + } + + ret = 1; +out_release_op: + op_release(new_op); +out_put_parent: + dput(parent_dentry); + return ret; +out_drop: + d_drop(dentry); + goto out_release_op; +} + +/* + * Verify that dentry is valid. + * + * Should return 1 if dentry can still be trusted, else 0 + */ +static int pvfs2_d_revalidate(struct dentry *dentry, unsigned int flags) +{ + struct inode *inode; + int ret = 0; + + if (flags & LOOKUP_RCU) + return -ECHILD; + + gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: called on dentry %p.\n", + __func__, dentry); + + /* find inode from dentry */ + if (!dentry->d_inode) { + gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: negative dentry.\n", + __func__); + goto invalid_exit; + } + + gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: inode valid.\n", __func__); + inode = dentry->d_inode; + + /* + * first perform a lookup to make sure that the object not only + * exists, but is still in the expected place in the name space + */ + if (!is_root_handle(inode)) { + if (!pvfs2_revalidate_lookup(dentry)) + goto invalid_exit; + } else { + gossip_debug(GOSSIP_DCACHE_DEBUG, + "%s: root handle, lookup skipped.\n", + __func__); + } + + /* now perform getattr */ + gossip_debug(GOSSIP_DCACHE_DEBUG, + "%s: doing getattr: inode: %p, handle: %pU\n", + __func__, + inode, + get_khandle_from_ino(inode)); + ret = pvfs2_inode_getattr(inode, PVFS_ATTR_SYS_ALL_NOHINT); + gossip_debug(GOSSIP_DCACHE_DEBUG, + "%s: getattr %s (ret = %d), returning %s for dentry i_count=%d\n", + __func__, + (ret == 0 ? "succeeded" : "failed"), + ret, + (ret == 0 ? "valid" : "INVALID"), + atomic_read(&inode->i_count)); + if (ret != 0) + goto invalid_exit; + + /* dentry is valid! */ + return 1; + +invalid_exit: + return 0; +} + +const struct dentry_operations pvfs2_dentry_operations = { + .d_revalidate = pvfs2_d_revalidate, +}; diff --git a/fs/orangefs/devpvfs2-req.c b/fs/orangefs/devpvfs2-req.c new file mode 100644 index 000000000000..3e450228f3dc --- /dev/null +++ b/fs/orangefs/devpvfs2-req.c @@ -0,0 +1,997 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * Changes by Acxiom Corporation to add protocol version to kernel + * communication, Copyright Acxiom Corporation, 2005. + * + * See COPYING in top-level directory. + */ + +#include "protocol.h" +#include "pvfs2-kernel.h" +#include "pvfs2-dev-proto.h" +#include "pvfs2-bufmap.h" + +#include +#include + +/* this file implements the /dev/pvfs2-req device node */ + +static int open_access_count; + +#define DUMP_DEVICE_ERROR() \ +do { \ + gossip_err("*****************************************************\n");\ + gossip_err("PVFS2 Device Error: You cannot open the device file "); \ + gossip_err("\n/dev/%s more than once. Please make sure that\nthere " \ + "are no ", PVFS2_REQDEVICE_NAME); \ + gossip_err("instances of a program using this device\ncurrently " \ + "running. (You must verify this!)\n"); \ + gossip_err("For example, you can use the lsof program as follows:\n");\ + gossip_err("'lsof | grep %s' (run this as root)\n", \ + PVFS2_REQDEVICE_NAME); \ + gossip_err(" open_access_count = %d\n", open_access_count); \ + gossip_err("*****************************************************\n");\ +} while (0) + +static int hash_func(__u64 tag, int table_size) +{ + return tag % ((unsigned int)table_size); +} + +static void pvfs2_devreq_add_op(struct pvfs2_kernel_op_s *op) +{ + int index = hash_func(op->tag, hash_table_size); + + spin_lock(&htable_ops_in_progress_lock); + list_add_tail(&op->list, &htable_ops_in_progress[index]); + spin_unlock(&htable_ops_in_progress_lock); +} + +static struct pvfs2_kernel_op_s *pvfs2_devreq_remove_op(__u64 tag) +{ + struct pvfs2_kernel_op_s *op, *next; + int index; + + index = hash_func(tag, hash_table_size); + + spin_lock(&htable_ops_in_progress_lock); + list_for_each_entry_safe(op, + next, + &htable_ops_in_progress[index], + list) { + if (op->tag == tag) { + list_del(&op->list); + spin_unlock(&htable_ops_in_progress_lock); + return op; + } + } + + spin_unlock(&htable_ops_in_progress_lock); + return NULL; +} + +static int pvfs2_devreq_open(struct inode *inode, struct file *file) +{ + int ret = -EINVAL; + + if (!(file->f_flags & O_NONBLOCK)) { + gossip_err("pvfs2: device cannot be opened in blocking mode\n"); + goto out; + } + ret = -EACCES; + gossip_debug(GOSSIP_DEV_DEBUG, "pvfs2-client-core: opening device\n"); + mutex_lock(&devreq_mutex); + + if (open_access_count == 0) { + ret = generic_file_open(inode, file); + if (ret == 0) + open_access_count++; + } else { + DUMP_DEVICE_ERROR(); + } + mutex_unlock(&devreq_mutex); + +out: + + gossip_debug(GOSSIP_DEV_DEBUG, + "pvfs2-client-core: open device complete (ret = %d)\n", + ret); + return ret; +} + +static ssize_t pvfs2_devreq_read(struct file *file, + char __user *buf, + size_t count, loff_t *offset) +{ + int ret = 0; + ssize_t len = 0; + struct pvfs2_kernel_op_s *cur_op = NULL; + static __s32 magic = PVFS2_DEVREQ_MAGIC; + __s32 proto_ver = PVFS_KERNEL_PROTO_VERSION; + + if (!(file->f_flags & O_NONBLOCK)) { + /* We do not support blocking reads/opens any more */ + gossip_err("pvfs2: blocking reads are not supported! (pvfs2-client-core bug)\n"); + return -EINVAL; + } else { + struct pvfs2_kernel_op_s *op = NULL, *temp = NULL; + /* get next op (if any) from top of list */ + spin_lock(&pvfs2_request_list_lock); + list_for_each_entry_safe(op, temp, &pvfs2_request_list, list) { + __s32 fsid = fsid_of_op(op); + /* + * Check if this op's fsid is known and needs + * remounting + */ + if (fsid != PVFS_FS_ID_NULL && + fs_mount_pending(fsid) == 1) { + gossip_debug(GOSSIP_DEV_DEBUG, + "Skipping op tag %llu %s\n", + llu(op->tag), + get_opname_string(op)); + continue; + } else { + /* + * op does not belong to any particular fsid + * or already mounted.. let it through + */ + cur_op = op; + spin_lock(&cur_op->lock); + list_del(&cur_op->list); + cur_op->op_linger_tmp--; + /* + * if there is a trailer, re-add it to + * the request list. + */ + if (cur_op->op_linger == 2 && + cur_op->op_linger_tmp == 1) { + if (cur_op->upcall.trailer_size <= 0 || + cur_op->upcall.trailer_buf == NULL) + gossip_err("BUG:trailer_size is %ld and trailer buf is %p\n", (long)cur_op->upcall.trailer_size, cur_op->upcall.trailer_buf); + /* re-add it to the head of the list */ + list_add(&cur_op->list, + &pvfs2_request_list); + } + spin_unlock(&cur_op->lock); + break; + } + } + spin_unlock(&pvfs2_request_list_lock); + } + + if (cur_op) { + spin_lock(&cur_op->lock); + + gossip_debug(GOSSIP_DEV_DEBUG, + "client-core: reading op tag %llu %s\n", + llu(cur_op->tag), get_opname_string(cur_op)); + if (op_state_in_progress(cur_op) || op_state_serviced(cur_op)) { + if (cur_op->op_linger == 1) + gossip_err("WARNING: Current op already queued...skipping\n"); + } else if (cur_op->op_linger == 1 || + (cur_op->op_linger == 2 && + cur_op->op_linger_tmp == 0)) { + /* + * atomically move the operation to the + * htable_ops_in_progress + */ + set_op_state_inprogress(cur_op); + pvfs2_devreq_add_op(cur_op); + } + + spin_unlock(&cur_op->lock); + + /* 2 cases + * a) OPs with no trailers + * b) OPs with trailers, Stage 1 + * Either way push the upcall out + */ + if (cur_op->op_linger == 1 || + (cur_op->op_linger == 2 && cur_op->op_linger_tmp == 1)) { + len = MAX_ALIGNED_DEV_REQ_UPSIZE; + if ((size_t) len <= count) { + ret = copy_to_user(buf, + &proto_ver, + sizeof(__s32)); + if (ret == 0) { + ret = copy_to_user(buf + sizeof(__s32), + &magic, + sizeof(__s32)); + if (ret == 0) { + ret = copy_to_user(buf+2 * sizeof(__s32), + &cur_op->tag, + sizeof(__u64)); + if (ret == 0) { + ret = copy_to_user( + buf + + 2 * + sizeof(__s32) + + sizeof(__u64), + &cur_op->upcall, + sizeof(struct pvfs2_upcall_s)); + } + } + } + + if (ret) { + gossip_err("Failed to copy data to user space\n"); + len = -EFAULT; + } + } else { + gossip_err + ("Failed to copy data to user space\n"); + len = -EIO; + } + } + /* Stage 2: Push the trailer out */ + else if (cur_op->op_linger == 2 && cur_op->op_linger_tmp == 0) { + len = cur_op->upcall.trailer_size; + if ((size_t) len <= count) { + ret = copy_to_user(buf, + cur_op->upcall.trailer_buf, + len); + if (ret) { + gossip_err("Failed to copy trailer to user space\n"); + len = -EFAULT; + } + } else { + gossip_err("Read buffer for trailer is too small (%ld as opposed to %ld)\n", + (long)count, + (long)len); + len = -EIO; + } + } else { + gossip_err("cur_op: %p (op_linger %d), (op_linger_tmp %d), erroneous request list?\n", + cur_op, + cur_op->op_linger, + cur_op->op_linger_tmp); + len = 0; + } + } else if (file->f_flags & O_NONBLOCK) { + /* + * if in non-blocking mode, return EAGAIN since no requests are + * ready yet + */ + len = -EAGAIN; + } + return len; +} + +/* Function for writev() callers into the device */ +static ssize_t pvfs2_devreq_writev(struct file *file, + const struct iovec *iov, + size_t count, + loff_t *offset) +{ + struct pvfs2_kernel_op_s *op = NULL; + void *buffer = NULL; + void *ptr = NULL; + unsigned long i = 0; + static int max_downsize = MAX_ALIGNED_DEV_REQ_DOWNSIZE; + int ret = 0, num_remaining = max_downsize; + int notrailer_count = 4; /* num elements in iovec without trailer */ + int payload_size = 0; + __s32 magic = 0; + __s32 proto_ver = 0; + __u64 tag = 0; + ssize_t total_returned_size = 0; + + /* Either there is a trailer or there isn't */ + if (count != notrailer_count && count != (notrailer_count + 1)) { + gossip_err("Error: Number of iov vectors is (%ld) and notrailer count is %d\n", + count, + notrailer_count); + return -EPROTO; + } + buffer = dev_req_alloc(); + if (!buffer) + return -ENOMEM; + ptr = buffer; + + for (i = 0; i < notrailer_count; i++) { + if (iov[i].iov_len > num_remaining) { + gossip_err + ("writev error: Freeing buffer and returning\n"); + dev_req_release(buffer); + return -EMSGSIZE; + } + ret = copy_from_user(ptr, iov[i].iov_base, iov[i].iov_len); + if (ret) { + gossip_err("Failed to copy data from user space\n"); + dev_req_release(buffer); + return -EIO; + } + num_remaining -= iov[i].iov_len; + ptr += iov[i].iov_len; + payload_size += iov[i].iov_len; + } + total_returned_size = payload_size; + + /* these elements are currently 8 byte aligned (8 bytes for (version + + * magic) 8 bytes for tag). If you add another element, either + * make it 8 bytes big, or use get_unaligned when asigning. + */ + ptr = buffer; + proto_ver = *((__s32 *) ptr); + ptr += sizeof(__s32); + + magic = *((__s32 *) ptr); + ptr += sizeof(__s32); + + tag = *((__u64 *) ptr); + ptr += sizeof(__u64); + + if (magic != PVFS2_DEVREQ_MAGIC) { + gossip_err("Error: Device magic number does not match.\n"); + dev_req_release(buffer); + return -EPROTO; + } + + /* + * proto_ver = 20902 for 2.9.2 + */ + + op = pvfs2_devreq_remove_op(tag); + if (op) { + /* Increase ref count! */ + get_op(op); + /* cut off magic and tag from payload size */ + payload_size -= (2 * sizeof(__s32) + sizeof(__u64)); + if (payload_size <= sizeof(struct pvfs2_downcall_s)) + /* copy the passed in downcall into the op */ + memcpy(&op->downcall, + ptr, + sizeof(struct pvfs2_downcall_s)); + else + gossip_debug(GOSSIP_DEV_DEBUG, + "writev: Ignoring %d bytes\n", + payload_size); + + /* Do not allocate needlessly if client-core forgets + * to reset trailer size on op errors. + */ + if (op->downcall.status == 0 && op->downcall.trailer_size > 0) { + gossip_debug(GOSSIP_DEV_DEBUG, + "writev: trailer size %ld\n", + (unsigned long)op->downcall.trailer_size); + if (count != (notrailer_count + 1)) { + gossip_err("Error: trailer size (%ld) is non-zero, no trailer elements though? (%ld)\n", (unsigned long)op->downcall.trailer_size, count); + dev_req_release(buffer); + put_op(op); + return -EPROTO; + } + if (iov[notrailer_count].iov_len > + op->downcall.trailer_size) { + gossip_err("writev error: trailer size (%ld) != iov_len (%ld)\n", (unsigned long)op->downcall.trailer_size, (unsigned long)iov[notrailer_count].iov_len); + dev_req_release(buffer); + put_op(op); + return -EMSGSIZE; + } + /* Allocate a buffer large enough to hold the + * trailer bytes. + */ + op->downcall.trailer_buf = + vmalloc(op->downcall.trailer_size); + if (op->downcall.trailer_buf != NULL) { + gossip_debug(GOSSIP_DEV_DEBUG, "vmalloc: %p\n", + op->downcall.trailer_buf); + ret = copy_from_user(op->downcall.trailer_buf, + iov[notrailer_count]. + iov_base, + iov[notrailer_count]. + iov_len); + if (ret) { + gossip_err("Failed to copy trailer data from user space\n"); + dev_req_release(buffer); + gossip_debug(GOSSIP_DEV_DEBUG, + "vfree: %p\n", + op->downcall.trailer_buf); + vfree(op->downcall.trailer_buf); + op->downcall.trailer_buf = NULL; + put_op(op); + return -EIO; + } + } else { + /* Change downcall status */ + op->downcall.status = -ENOMEM; + gossip_err("writev: could not vmalloc for trailer!\n"); + } + } + + /* if this operation is an I/O operation and if it was + * initiated on behalf of a *synchronous* VFS I/O operation, + * only then we need to wait + * for all data to be copied before we can return to avoid + * buffer corruption and races that can pull the buffers + * out from under us. + * + * Essentially we're synchronizing with other parts of the + * vfs implicitly by not allowing the user space + * application reading/writing this device to return until + * the buffers are done being used. + */ + if ((op->upcall.type == PVFS2_VFS_OP_FILE_IO && + op->upcall.req.io.async_vfs_io == PVFS_VFS_SYNC_IO) || + op->upcall.type == PVFS2_VFS_OP_FILE_IOX) { + int timed_out = 0; + DECLARE_WAITQUEUE(wait_entry, current); + + /* tell the vfs op waiting on a waitqueue + * that this op is done + */ + spin_lock(&op->lock); + set_op_state_serviced(op); + spin_unlock(&op->lock); + + add_wait_queue_exclusive(&op->io_completion_waitq, + &wait_entry); + wake_up_interruptible(&op->waitq); + + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + + spin_lock(&op->lock); + if (op->io_completed) { + spin_unlock(&op->lock); + break; + } + spin_unlock(&op->lock); + + if (!signal_pending(current)) { + int timeout = + MSECS_TO_JIFFIES(1000 * + op_timeout_secs); + if (!schedule_timeout(timeout)) { + gossip_debug(GOSSIP_DEV_DEBUG, "*** I/O wait time is up\n"); + timed_out = 1; + break; + } + continue; + } + + gossip_debug(GOSSIP_DEV_DEBUG, "*** signal on I/O wait -- aborting\n"); + break; + } + + set_current_state(TASK_RUNNING); + remove_wait_queue(&op->io_completion_waitq, + &wait_entry); + + /* NOTE: for I/O operations we handle releasing the op + * object except in the case of timeout. the reason we + * can't free the op in timeout cases is that the op + * service logic in the vfs retries operations using + * the same op ptr, thus it can't be freed. + */ + if (!timed_out) + op_release(op); + } else { + + /* + * tell the vfs op waiting on a waitqueue that + * this op is done + */ + spin_lock(&op->lock); + set_op_state_serviced(op); + spin_unlock(&op->lock); + /* + for every other operation (i.e. non-I/O), we need to + wake up the callers for downcall completion + notification + */ + wake_up_interruptible(&op->waitq); + } + } else { + /* ignore downcalls that we're not interested in */ + gossip_debug(GOSSIP_DEV_DEBUG, + "WARNING: No one's waiting for tag %llu\n", + llu(tag)); + } + dev_req_release(buffer); + + return total_returned_size; +} + +static ssize_t pvfs2_devreq_write_iter(struct kiocb *iocb, + struct iov_iter *iter) +{ + return pvfs2_devreq_writev(iocb->ki_filp, + iter->iov, + iter->nr_segs, + &iocb->ki_pos); +} + +/* Returns whether any FS are still pending remounted */ +static int mark_all_pending_mounts(void) +{ + int unmounted = 1; + struct pvfs2_sb_info_s *pvfs2_sb = NULL; + + spin_lock(&pvfs2_superblocks_lock); + list_for_each_entry(pvfs2_sb, &pvfs2_superblocks, list) { + /* All of these file system require a remount */ + pvfs2_sb->mount_pending = 1; + unmounted = 0; + } + spin_unlock(&pvfs2_superblocks_lock); + return unmounted; +} + +/* + * Determine if a given file system needs to be remounted or not + * Returns -1 on error + * 0 if already mounted + * 1 if needs remount + */ +int fs_mount_pending(__s32 fsid) +{ + int mount_pending = -1; + struct pvfs2_sb_info_s *pvfs2_sb = NULL; + + spin_lock(&pvfs2_superblocks_lock); + list_for_each_entry(pvfs2_sb, &pvfs2_superblocks, list) { + if (pvfs2_sb->fs_id == fsid) { + mount_pending = pvfs2_sb->mount_pending; + break; + } + } + spin_unlock(&pvfs2_superblocks_lock); + return mount_pending; +} + +/* + * NOTE: gets called when the last reference to this device is dropped. + * Using the open_access_count variable, we enforce a reference count + * on this file so that it can be opened by only one process at a time. + * the devreq_mutex is used to make sure all i/o has completed + * before we call pvfs_bufmap_finalize, and similar such tricky + * situations + */ +static int pvfs2_devreq_release(struct inode *inode, struct file *file) +{ + int unmounted = 0; + + gossip_debug(GOSSIP_DEV_DEBUG, + "%s:pvfs2-client-core: exiting, closing device\n", + __func__); + + mutex_lock(&devreq_mutex); + pvfs_bufmap_finalize(); + + open_access_count--; + + unmounted = mark_all_pending_mounts(); + gossip_debug(GOSSIP_DEV_DEBUG, "PVFS2 Device Close: Filesystem(s) %s\n", + (unmounted ? "UNMOUNTED" : "MOUNTED")); + mutex_unlock(&devreq_mutex); + + /* + * Walk through the list of ops in the request list, mark them + * as purged and wake them up. + */ + purge_waiting_ops(); + /* + * Walk through the hash table of in progress operations; mark + * them as purged and wake them up + */ + purge_inprogress_ops(); + gossip_debug(GOSSIP_DEV_DEBUG, + "pvfs2-client-core: device close complete\n"); + return 0; +} + +int is_daemon_in_service(void) +{ + int in_service; + + /* + * What this function does is checks if client-core is alive + * based on the access count we maintain on the device. + */ + mutex_lock(&devreq_mutex); + in_service = open_access_count == 1 ? 0 : -EIO; + mutex_unlock(&devreq_mutex); + return in_service; +} + +static inline long check_ioctl_command(unsigned int command) +{ + /* Check for valid ioctl codes */ + if (_IOC_TYPE(command) != PVFS_DEV_MAGIC) { + gossip_err("device ioctl magic numbers don't match! Did you rebuild pvfs2-client-core/libpvfs2? [cmd %x, magic %x != %x]\n", + command, + _IOC_TYPE(command), + PVFS_DEV_MAGIC); + return -EINVAL; + } + /* and valid ioctl commands */ + if (_IOC_NR(command) >= PVFS_DEV_MAXNR || _IOC_NR(command) <= 0) { + gossip_err("Invalid ioctl command number [%d >= %d]\n", + _IOC_NR(command), PVFS_DEV_MAXNR); + return -ENOIOCTLCMD; + } + return 0; +} + +static long dispatch_ioctl_command(unsigned int command, unsigned long arg) +{ + static __s32 magic = PVFS2_DEVREQ_MAGIC; + static __s32 max_up_size = MAX_ALIGNED_DEV_REQ_UPSIZE; + static __s32 max_down_size = MAX_ALIGNED_DEV_REQ_DOWNSIZE; + struct PVFS_dev_map_desc user_desc; + int ret = 0; + struct dev_mask_info_s mask_info = { 0 }; + struct dev_mask2_info_s mask2_info = { 0, 0 }; + int upstream_kmod = 1; + struct list_head *tmp = NULL; + struct pvfs2_sb_info_s *pvfs2_sb = NULL; + + /* mtmoore: add locking here */ + + switch (command) { + case PVFS_DEV_GET_MAGIC: + return ((put_user(magic, (__s32 __user *) arg) == -EFAULT) ? + -EIO : + 0); + case PVFS_DEV_GET_MAX_UPSIZE: + return ((put_user(max_up_size, + (__s32 __user *) arg) == -EFAULT) ? + -EIO : + 0); + case PVFS_DEV_GET_MAX_DOWNSIZE: + return ((put_user(max_down_size, + (__s32 __user *) arg) == -EFAULT) ? + -EIO : + 0); + case PVFS_DEV_MAP: + ret = copy_from_user(&user_desc, + (struct PVFS_dev_map_desc __user *) + arg, + sizeof(struct PVFS_dev_map_desc)); + return ret ? -EIO : pvfs_bufmap_initialize(&user_desc); + case PVFS_DEV_REMOUNT_ALL: + gossip_debug(GOSSIP_DEV_DEBUG, + "pvfs2_devreq_ioctl: got PVFS_DEV_REMOUNT_ALL\n"); + + /* + * remount all mounted pvfs2 volumes to regain the lost + * dynamic mount tables (if any) -- NOTE: this is done + * without keeping the superblock list locked due to the + * upcall/downcall waiting. also, the request semaphore is + * used to ensure that no operations will be serviced until + * all of the remounts are serviced (to avoid ops between + * mounts to fail) + */ + ret = mutex_lock_interruptible(&request_mutex); + if (ret < 0) + return ret; + gossip_debug(GOSSIP_DEV_DEBUG, + "pvfs2_devreq_ioctl: priority remount in progress\n"); + list_for_each(tmp, &pvfs2_superblocks) { + pvfs2_sb = + list_entry(tmp, struct pvfs2_sb_info_s, list); + if (pvfs2_sb && (pvfs2_sb->sb)) { + gossip_debug(GOSSIP_DEV_DEBUG, + "Remounting SB %p\n", + pvfs2_sb); + + ret = pvfs2_remount(pvfs2_sb->sb); + if (ret) { + gossip_debug(GOSSIP_DEV_DEBUG, + "SB %p remount failed\n", + pvfs2_sb); + break; + } + } + } + gossip_debug(GOSSIP_DEV_DEBUG, + "pvfs2_devreq_ioctl: priority remount complete\n"); + mutex_unlock(&request_mutex); + return ret; + + case PVFS_DEV_UPSTREAM: + ret = copy_to_user((void __user *)arg, + &upstream_kmod, + sizeof(upstream_kmod)); + + if (ret != 0) + return -EIO; + else + return ret; + + case PVFS_DEV_CLIENT_MASK: + ret = copy_from_user(&mask2_info, + (void __user *)arg, + sizeof(struct dev_mask2_info_s)); + + if (ret != 0) + return -EIO; + + client_debug_mask.mask1 = mask2_info.mask1_value; + client_debug_mask.mask2 = mask2_info.mask2_value; + + pr_info("%s: client debug mask has been been received " + ":%llx: :%llx:\n", + __func__, + (unsigned long long)client_debug_mask.mask1, + (unsigned long long)client_debug_mask.mask2); + + return ret; + + case PVFS_DEV_CLIENT_STRING: + ret = copy_from_user(&client_debug_array_string, + (void __user *)arg, + PVFS2_MAX_DEBUG_STRING_LEN); + if (ret != 0) { + pr_info("%s: " + "PVFS_DEV_CLIENT_STRING: copy_from_user failed" + "\n", + __func__); + return -EIO; + } + + pr_info("%s: client debug array string has been been received." + "\n", + __func__); + + if (!help_string_initialized) { + + /* Free the "we don't know yet" default string... */ + kfree(debug_help_string); + + /* build a proper debug help string */ + if (orangefs_prepare_debugfs_help_string(0)) { + gossip_err("%s: " + "prepare_debugfs_help_string failed" + "\n", + __func__); + return -EIO; + } + + /* Replace the boilerplate boot-time debug-help file. */ + debugfs_remove(help_file_dentry); + + help_file_dentry = + debugfs_create_file( + ORANGEFS_KMOD_DEBUG_HELP_FILE, + 0444, + debug_dir, + debug_help_string, + &debug_help_fops); + + if (!help_file_dentry) { + gossip_err("%s: debugfs_create_file failed for" + " :%s:!\n", + __func__, + ORANGEFS_KMOD_DEBUG_HELP_FILE); + return -EIO; + } + } + + debug_mask_to_string(&client_debug_mask, 1); + + debugfs_remove(client_debug_dentry); + + pvfs2_client_debug_init(); + + help_string_initialized++; + + return ret; + + case PVFS_DEV_DEBUG: + ret = copy_from_user(&mask_info, + (void __user *)arg, + sizeof(mask_info)); + + if (ret != 0) + return -EIO; + + if (mask_info.mask_type == KERNEL_MASK) { + if ((mask_info.mask_value == 0) + && (kernel_mask_set_mod_init)) { + /* + * the kernel debug mask was set when the + * kernel module was loaded; don't override + * it if the client-core was started without + * a value for PVFS2_KMODMASK. + */ + return 0; + } + debug_mask_to_string(&mask_info.mask_value, + mask_info.mask_type); + gossip_debug_mask = mask_info.mask_value; + pr_info("PVFS: kernel debug mask has been modified to " + ":%s: :%llx:\n", + kernel_debug_string, + (unsigned long long)gossip_debug_mask); + } else if (mask_info.mask_type == CLIENT_MASK) { + debug_mask_to_string(&mask_info.mask_value, + mask_info.mask_type); + pr_info("PVFS: client debug mask has been modified to" + ":%s: :%llx:\n", + client_debug_string, + llu(mask_info.mask_value)); + } else { + gossip_lerr("Invalid mask type....\n"); + return -EINVAL; + } + + return ret; + + default: + return -ENOIOCTLCMD; + } + return -ENOIOCTLCMD; +} + +static long pvfs2_devreq_ioctl(struct file *file, + unsigned int command, unsigned long arg) +{ + long ret; + + /* Check for properly constructed commands */ + ret = check_ioctl_command(command); + if (ret < 0) + return (int)ret; + + return (int)dispatch_ioctl_command(command, arg); +} + +#ifdef CONFIG_COMPAT /* CONFIG_COMPAT is in .config */ + +/* Compat structure for the PVFS_DEV_MAP ioctl */ +struct PVFS_dev_map_desc32 { + compat_uptr_t ptr; + __s32 total_size; + __s32 size; + __s32 count; +}; + +static unsigned long translate_dev_map26(unsigned long args, long *error) +{ + struct PVFS_dev_map_desc32 __user *p32 = (void __user *)args; + /* + * Depending on the architecture, allocate some space on the + * user-call-stack based on our expected layout. + */ + struct PVFS_dev_map_desc __user *p = + compat_alloc_user_space(sizeof(*p)); + u32 addr; + + *error = 0; + /* get the ptr from the 32 bit user-space */ + if (get_user(addr, &p32->ptr)) + goto err; + /* try to put that into a 64-bit layout */ + if (put_user(compat_ptr(addr), &p->ptr)) + goto err; + /* copy the remaining fields */ + if (copy_in_user(&p->total_size, &p32->total_size, sizeof(__s32))) + goto err; + if (copy_in_user(&p->size, &p32->size, sizeof(__s32))) + goto err; + if (copy_in_user(&p->count, &p32->count, sizeof(__s32))) + goto err; + return (unsigned long)p; +err: + *error = -EFAULT; + return 0; +} + +/* + * 32 bit user-space apps' ioctl handlers when kernel modules + * is compiled as a 64 bit one + */ +static long pvfs2_devreq_compat_ioctl(struct file *filp, unsigned int cmd, + unsigned long args) +{ + long ret; + unsigned long arg = args; + + /* Check for properly constructed commands */ + ret = check_ioctl_command(cmd); + if (ret < 0) + return ret; + if (cmd == PVFS_DEV_MAP) { + /* + * convert the arguments to what we expect internally + * in kernel space + */ + arg = translate_dev_map26(args, &ret); + if (ret < 0) { + gossip_err("Could not translate dev map\n"); + return ret; + } + } + /* no other ioctl requires translation */ + return dispatch_ioctl_command(cmd, arg); +} + +static int pvfs2_ioctl32_init(void) +{ + return 0; +} + +static void pvfs2_ioctl32_cleanup(void) +{ + return; +} + +#endif /* CONFIG_COMPAT is in .config */ + +/* the assigned character device major number */ +static int pvfs2_dev_major; + +/* + * Initialize pvfs2 device specific state: + * Must be called at module load time only + */ +int pvfs2_dev_init(void) +{ + int ret; + + /* register the ioctl32 sub-system */ + ret = pvfs2_ioctl32_init(); + if (ret < 0) + return ret; + + /* register pvfs2-req device */ + pvfs2_dev_major = register_chrdev(0, + PVFS2_REQDEVICE_NAME, + &pvfs2_devreq_file_operations); + if (pvfs2_dev_major < 0) { + gossip_debug(GOSSIP_DEV_DEBUG, + "Failed to register /dev/%s (error %d)\n", + PVFS2_REQDEVICE_NAME, pvfs2_dev_major); + pvfs2_ioctl32_cleanup(); + return pvfs2_dev_major; + } + + gossip_debug(GOSSIP_DEV_DEBUG, + "*** /dev/%s character device registered ***\n", + PVFS2_REQDEVICE_NAME); + gossip_debug(GOSSIP_DEV_DEBUG, "'mknod /dev/%s c %d 0'.\n", + PVFS2_REQDEVICE_NAME, pvfs2_dev_major); + return 0; +} + +void pvfs2_dev_cleanup(void) +{ + unregister_chrdev(pvfs2_dev_major, PVFS2_REQDEVICE_NAME); + gossip_debug(GOSSIP_DEV_DEBUG, + "*** /dev/%s character device unregistered ***\n", + PVFS2_REQDEVICE_NAME); + /* unregister the ioctl32 sub-system */ + pvfs2_ioctl32_cleanup(); +} + +static unsigned int pvfs2_devreq_poll(struct file *file, + struct poll_table_struct *poll_table) +{ + int poll_revent_mask = 0; + + if (open_access_count == 1) { + poll_wait(file, &pvfs2_request_list_waitq, poll_table); + + spin_lock(&pvfs2_request_list_lock); + if (!list_empty(&pvfs2_request_list)) + poll_revent_mask |= POLL_IN; + spin_unlock(&pvfs2_request_list_lock); + } + return poll_revent_mask; +} + +const struct file_operations pvfs2_devreq_file_operations = { + .owner = THIS_MODULE, + .read = pvfs2_devreq_read, + .write_iter = pvfs2_devreq_write_iter, + .open = pvfs2_devreq_open, + .release = pvfs2_devreq_release, + .unlocked_ioctl = pvfs2_devreq_ioctl, + +#ifdef CONFIG_COMPAT /* CONFIG_COMPAT is in .config */ + .compat_ioctl = pvfs2_devreq_compat_ioctl, +#endif + .poll = pvfs2_devreq_poll +}; diff --git a/fs/orangefs/dir.c b/fs/orangefs/dir.c new file mode 100644 index 000000000000..9b5f4bb17874 --- /dev/null +++ b/fs/orangefs/dir.c @@ -0,0 +1,394 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#include "protocol.h" +#include "pvfs2-kernel.h" +#include "pvfs2-bufmap.h" + +struct readdir_handle_s { + int buffer_index; + struct pvfs2_readdir_response_s readdir_response; + void *dents_buf; +}; + +/* + * decode routine needed by kmod to make sense of the shared page for readdirs. + */ +static long decode_dirents(char *ptr, struct pvfs2_readdir_response_s *readdir) +{ + int i; + struct pvfs2_readdir_response_s *rd = + (struct pvfs2_readdir_response_s *) ptr; + char *buf = ptr; + char **pptr = &buf; + + readdir->token = rd->token; + readdir->pvfs_dirent_outcount = rd->pvfs_dirent_outcount; + readdir->dirent_array = kmalloc(readdir->pvfs_dirent_outcount * + sizeof(*readdir->dirent_array), + GFP_KERNEL); + if (readdir->dirent_array == NULL) + return -ENOMEM; + *pptr += offsetof(struct pvfs2_readdir_response_s, dirent_array); + for (i = 0; i < readdir->pvfs_dirent_outcount; i++) { + dec_string(pptr, &readdir->dirent_array[i].d_name, + &readdir->dirent_array[i].d_length); + readdir->dirent_array[i].khandle = + *(struct pvfs2_khandle *) *pptr; + *pptr += 16; + } + return (unsigned long)*pptr - (unsigned long)ptr; +} + +static long readdir_handle_ctor(struct readdir_handle_s *rhandle, void *buf, + int buffer_index) +{ + long ret; + + if (buf == NULL) { + gossip_err + ("Invalid NULL buffer specified in readdir_handle_ctor\n"); + return -ENOMEM; + } + if (buffer_index < 0) { + gossip_err + ("Invalid buffer index specified in readdir_handle_ctor\n"); + return -EINVAL; + } + rhandle->buffer_index = buffer_index; + rhandle->dents_buf = buf; + ret = decode_dirents(buf, &rhandle->readdir_response); + if (ret < 0) { + gossip_err("Could not decode readdir from buffer %ld\n", ret); + rhandle->buffer_index = -1; + gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", buf); + vfree(buf); + rhandle->dents_buf = NULL; + } + return ret; +} + +static void readdir_handle_dtor(struct pvfs2_bufmap *bufmap, + struct readdir_handle_s *rhandle) +{ + if (rhandle == NULL) + return; + + /* kfree(NULL) is safe */ + kfree(rhandle->readdir_response.dirent_array); + rhandle->readdir_response.dirent_array = NULL; + + if (rhandle->buffer_index >= 0) { + readdir_index_put(bufmap, rhandle->buffer_index); + rhandle->buffer_index = -1; + } + if (rhandle->dents_buf) { + gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", + rhandle->dents_buf); + vfree(rhandle->dents_buf); + rhandle->dents_buf = NULL; + } +} + +/* + * Read directory entries from an instance of an open directory. + * + * \note This routine was converted for the readdir to iterate change + * in "struct file_operations". "converted" mostly amounts to + * changing occurrences of "readdir" and "filldir" in the + * comments to "iterate" and "dir_emit". Also filldir calls + * were changed to dir_emit calls. + * + * \param dir_emit callback function called for each entry read. + * + * \retval <0 on error + * \retval 0 when directory has been completely traversed + * \retval >0 if we don't call dir_emit for all entries + * + * \note If the dir_emit call-back returns non-zero, then iterate should + * assume that it has had enough, and should return as well. + */ +static int pvfs2_readdir(struct file *file, struct dir_context *ctx) +{ + struct pvfs2_bufmap *bufmap = NULL; + int ret = 0; + int buffer_index; + __u64 *ptoken = file->private_data; + __u64 pos = 0; + ino_t ino = 0; + struct dentry *dentry = file->f_path.dentry; + struct pvfs2_kernel_op_s *new_op = NULL; + struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(dentry->d_inode); + int buffer_full = 0; + struct readdir_handle_s rhandle; + int i = 0; + int len = 0; + ino_t current_ino = 0; + char *current_entry = NULL; + long bytes_decoded; + + gossip_ldebug(GOSSIP_DIR_DEBUG, + "%s: ctx->pos:%lld, token = %llu\n", + __func__, + lld(ctx->pos), + llu(*ptoken)); + + pos = (__u64) ctx->pos; + + /* are we done? */ + if (pos == PVFS_READDIR_END) { + gossip_debug(GOSSIP_DIR_DEBUG, + "Skipping to termination path\n"); + return 0; + } + + gossip_debug(GOSSIP_DIR_DEBUG, + "pvfs2_readdir called on %s (pos=%llu)\n", + dentry->d_name.name, llu(pos)); + + rhandle.buffer_index = -1; + rhandle.dents_buf = NULL; + memset(&rhandle.readdir_response, 0, sizeof(rhandle.readdir_response)); + + new_op = op_alloc(PVFS2_VFS_OP_READDIR); + if (!new_op) + return -ENOMEM; + + new_op->uses_shared_memory = 1; + new_op->upcall.req.readdir.refn = pvfs2_inode->refn; + new_op->upcall.req.readdir.max_dirent_count = MAX_DIRENT_COUNT_READDIR; + + gossip_debug(GOSSIP_DIR_DEBUG, + "%s: upcall.req.readdir.refn.khandle: %pU\n", + __func__, + &new_op->upcall.req.readdir.refn.khandle); + + /* + * NOTE: the position we send to the readdir upcall is out of + * sync with ctx->pos since: + * 1. pvfs2 doesn't include the "." and ".." entries that are + * added below. + * 2. the introduction of distributed directory logic makes token no + * longer be related to f_pos and pos. Instead an independent + * variable is used inside the function and stored in the + * private_data of the file structure. + */ + new_op->upcall.req.readdir.token = *ptoken; + +get_new_buffer_index: + ret = readdir_index_get(&bufmap, &buffer_index); + if (ret < 0) { + gossip_lerr("pvfs2_readdir: readdir_index_get() failure (%d)\n", + ret); + goto out_free_op; + } + new_op->upcall.req.readdir.buf_index = buffer_index; + + ret = service_operation(new_op, + "pvfs2_readdir", + get_interruptible_flag(dentry->d_inode)); + + gossip_debug(GOSSIP_DIR_DEBUG, + "Readdir downcall status is %d. ret:%d\n", + new_op->downcall.status, + ret); + + if (ret == -EAGAIN && op_state_purged(new_op)) { + /* + * readdir shared memory aread has been wiped due to + * pvfs2-client-core restarting, so we must get a new + * index into the shared memory. + */ + gossip_debug(GOSSIP_DIR_DEBUG, + "%s: Getting new buffer_index for retry of readdir..\n", + __func__); + readdir_index_put(bufmap, buffer_index); + goto get_new_buffer_index; + } + + if (ret == -EIO && op_state_purged(new_op)) { + gossip_err("%s: Client is down. Aborting readdir call.\n", + __func__); + readdir_index_put(bufmap, buffer_index); + goto out_free_op; + } + + if (ret < 0 || new_op->downcall.status != 0) { + gossip_debug(GOSSIP_DIR_DEBUG, + "Readdir request failed. Status:%d\n", + new_op->downcall.status); + readdir_index_put(bufmap, buffer_index); + if (ret >= 0) + ret = new_op->downcall.status; + goto out_free_op; + } + + bytes_decoded = + readdir_handle_ctor(&rhandle, + new_op->downcall.trailer_buf, + buffer_index); + if (bytes_decoded < 0) { + gossip_err("pvfs2_readdir: Could not decode trailer buffer into a readdir response %d\n", + ret); + ret = bytes_decoded; + readdir_index_put(bufmap, buffer_index); + goto out_free_op; + } + + if (bytes_decoded != new_op->downcall.trailer_size) { + gossip_err("pvfs2_readdir: # bytes decoded (%ld) != trailer size (%ld)\n", + bytes_decoded, + (long)new_op->downcall.trailer_size); + ret = -EINVAL; + goto out_destroy_handle; + } + + if (pos == 0) { + ino = get_ino_from_khandle(dentry->d_inode); + gossip_debug(GOSSIP_DIR_DEBUG, + "%s: calling dir_emit of \".\" with pos = %llu\n", + __func__, + llu(pos)); + ret = dir_emit(ctx, ".", 1, ino, DT_DIR); + if (ret < 0) + goto out_destroy_handle; + ctx->pos++; + gossip_ldebug(GOSSIP_DIR_DEBUG, + "%s: ctx->pos:%lld\n", + __func__, + lld(ctx->pos)); + pos++; + } + + if (pos == 1) { + ino = get_parent_ino_from_dentry(dentry); + gossip_debug(GOSSIP_DIR_DEBUG, + "%s: calling dir_emit of \"..\" with pos = %llu\n", + __func__, + llu(pos)); + ret = dir_emit(ctx, "..", 2, ino, DT_DIR); + if (ret < 0) + goto out_destroy_handle; + ctx->pos++; + gossip_ldebug(GOSSIP_DIR_DEBUG, + "%s: ctx->pos:%lld\n", + __func__, + lld(ctx->pos)); + pos++; + } + + for (i = 0; i < rhandle.readdir_response.pvfs_dirent_outcount; i++) { + len = rhandle.readdir_response.dirent_array[i].d_length; + current_entry = rhandle.readdir_response.dirent_array[i].d_name; + current_ino = pvfs2_khandle_to_ino( + &(rhandle.readdir_response.dirent_array[i].khandle)); + + gossip_debug(GOSSIP_DIR_DEBUG, + "calling dir_emit for %s with len %d, pos %ld\n", + current_entry, + len, + (unsigned long)pos); + ret = + dir_emit(ctx, current_entry, len, current_ino, DT_UNKNOWN); + if (ret < 0) { + gossip_debug(GOSSIP_DIR_DEBUG, + "dir_emit() failed. ret:%d\n", + ret); + if (i < 2) { + gossip_err("dir_emit failed on one of the first two true PVFS directory entries.\n"); + gossip_err("Duplicate entries may appear.\n"); + } + buffer_full = 1; + break; + } + ctx->pos++; + gossip_ldebug(GOSSIP_DIR_DEBUG, + "%s: ctx->pos:%lld\n", + __func__, + lld(ctx->pos)); + + pos++; + } + + /* this means that all of the dir_emit calls succeeded */ + if (i == rhandle.readdir_response.pvfs_dirent_outcount) { + /* update token */ + *ptoken = rhandle.readdir_response.token; + } else { + /* this means a dir_emit call failed */ + if (rhandle.readdir_response.token == PVFS_READDIR_END) { + /* + * If PVFS hit end of directory, then there + * is no way to do math on the token that it + * returned. Instead we go by ctx->pos but + * back up to account for the artificial . + * and .. entries. + */ + ctx->pos -= 3; + } else { + /* + * this means a dir_emit call failed. !!! need to set + * back to previous ctx->pos, no middle value allowed + */ + pos -= (i - 1); + ctx->pos -= (i - 1); + } + gossip_debug(GOSSIP_DIR_DEBUG, + "at least one dir_emit call failed. Setting ctx->pos to: %lld\n", + lld(ctx->pos)); + } + + /* + * Did we hit the end of the directory? + */ + if (rhandle.readdir_response.token == PVFS_READDIR_END && + !buffer_full) { + gossip_debug(GOSSIP_DIR_DEBUG, "End of dir detected; setting ctx->pos to PVFS_READDIR_END.\n"); + ctx->pos = PVFS_READDIR_END; + } + + gossip_debug(GOSSIP_DIR_DEBUG, + "pos = %llu, token = %llu" + ", ctx->pos should have been %lld\n", + llu(pos), + llu(*ptoken), + lld(ctx->pos)); + +out_destroy_handle: + readdir_handle_dtor(bufmap, &rhandle); +out_free_op: + op_release(new_op); + gossip_debug(GOSSIP_DIR_DEBUG, "pvfs2_readdir returning %d\n", ret); + return ret; +} + +static int pvfs2_dir_open(struct inode *inode, struct file *file) +{ + __u64 *ptoken; + + file->private_data = kmalloc(sizeof(__u64), GFP_KERNEL); + if (!file->private_data) + return -ENOMEM; + + ptoken = file->private_data; + *ptoken = PVFS_READDIR_START; + return 0; +} + +static int pvfs2_dir_release(struct inode *inode, struct file *file) +{ + pvfs2_flush_inode(inode); + kfree(file->private_data); + return 0; +} + +/** PVFS2 implementation of VFS directory operations */ +const struct file_operations pvfs2_dir_operations = { + .read = generic_read_dir, + .iterate = pvfs2_readdir, + .open = pvfs2_dir_open, + .release = pvfs2_dir_release, +}; diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c new file mode 100644 index 000000000000..8e26f9fac289 --- /dev/null +++ b/fs/orangefs/file.c @@ -0,0 +1,1019 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/* + * Linux VFS file operations. + */ + +#include "protocol.h" +#include "pvfs2-kernel.h" +#include "pvfs2-bufmap.h" +#include +#include + +#define wake_up_daemon_for_return(op) \ +do { \ + spin_lock(&op->lock); \ + op->io_completed = 1; \ + spin_unlock(&op->lock); \ + wake_up_interruptible(&op->io_completion_waitq);\ +} while (0) + +/* + * Copy to client-core's address space from the buffers specified + * by the iovec upto total_size bytes. + * NOTE: the iovector can either contain addresses which + * can futher be kernel-space or user-space addresses. + * or it can pointers to struct page's + */ +static int precopy_buffers(struct pvfs2_bufmap *bufmap, + int buffer_index, + const struct iovec *vec, + unsigned long nr_segs, + size_t total_size, + int from_user) +{ + int ret = 0; + + /* + * copy data from application/kernel by pulling it out + * of the iovec. + */ + /* Are we copying from User Virtual Addresses? */ + if (from_user) + ret = pvfs_bufmap_copy_iovec_from_user( + bufmap, + buffer_index, + vec, + nr_segs, + total_size); + /* Are we copying from Kernel Virtual Addresses? */ + else + ret = pvfs_bufmap_copy_iovec_from_kernel( + bufmap, + buffer_index, + vec, + nr_segs, + total_size); + if (ret < 0) + gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n", + __func__, + (long)ret); + return ret; +} + +/* + * Copy from client-core's address space to the buffers specified + * by the iovec upto total_size bytes. + * NOTE: the iovector can either contain addresses which + * can futher be kernel-space or user-space addresses. + * or it can pointers to struct page's + */ +static int postcopy_buffers(struct pvfs2_bufmap *bufmap, + int buffer_index, + const struct iovec *vec, + int nr_segs, + size_t total_size, + int to_user) +{ + int ret = 0; + + /* + * copy data to application/kernel by pushing it out to + * the iovec. NOTE; target buffers can be addresses or + * struct page pointers. + */ + if (total_size) { + /* Are we copying to User Virtual Addresses? */ + if (to_user) + ret = pvfs_bufmap_copy_to_user_iovec( + bufmap, + buffer_index, + vec, + nr_segs, + total_size); + /* Are we copying to Kern Virtual Addresses? */ + else + ret = pvfs_bufmap_copy_to_kernel_iovec( + bufmap, + buffer_index, + vec, + nr_segs, + total_size); + if (ret < 0) + gossip_err("%s: Failed to copy-out buffers. Please make sure that the pvfs2-client is running (%ld)\n", + __func__, + (long)ret); + } + return ret; +} + +/* + * Post and wait for the I/O upcall to finish + */ +static ssize_t wait_for_direct_io(enum PVFS_io_type type, struct inode *inode, + loff_t *offset, struct iovec *vec, unsigned long nr_segs, + size_t total_size, loff_t readahead_size, int to_user) +{ + struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); + struct pvfs2_khandle *handle = &pvfs2_inode->refn.khandle; + struct pvfs2_bufmap *bufmap = NULL; + struct pvfs2_kernel_op_s *new_op = NULL; + int buffer_index = -1; + ssize_t ret; + + new_op = op_alloc(PVFS2_VFS_OP_FILE_IO); + if (!new_op) { + ret = -ENOMEM; + goto out; + } + /* synchronous I/O */ + new_op->upcall.req.io.async_vfs_io = PVFS_VFS_SYNC_IO; + new_op->upcall.req.io.readahead_size = readahead_size; + new_op->upcall.req.io.io_type = type; + new_op->upcall.req.io.refn = pvfs2_inode->refn; + +populate_shared_memory: + /* get a shared buffer index */ + ret = pvfs_bufmap_get(&bufmap, &buffer_index); + if (ret < 0) { + gossip_debug(GOSSIP_FILE_DEBUG, + "%s: pvfs_bufmap_get failure (%ld)\n", + __func__, (long)ret); + goto out; + } + gossip_debug(GOSSIP_FILE_DEBUG, + "%s(%pU): GET op %p -> buffer_index %d\n", + __func__, + handle, + new_op, + buffer_index); + + new_op->uses_shared_memory = 1; + new_op->upcall.req.io.buf_index = buffer_index; + new_op->upcall.req.io.count = total_size; + new_op->upcall.req.io.offset = *offset; + + gossip_debug(GOSSIP_FILE_DEBUG, + "%s(%pU): copy_to_user %d nr_segs %lu, offset: %llu total_size: %zd\n", + __func__, + handle, + to_user, + nr_segs, + llu(*offset), + total_size); + /* + * Stage 1: copy the buffers into client-core's address space + * precopy_buffers only pertains to writes. + */ + if (type == PVFS_IO_WRITE) { + ret = precopy_buffers(bufmap, + buffer_index, + vec, + nr_segs, + total_size, + to_user); + if (ret < 0) + goto out; + } + + gossip_debug(GOSSIP_FILE_DEBUG, + "%s(%pU): Calling post_io_request with tag (%llu)\n", + __func__, + handle, + llu(new_op->tag)); + + /* Stage 2: Service the I/O operation */ + ret = service_operation(new_op, + type == PVFS_IO_WRITE ? + "file_write" : + "file_read", + get_interruptible_flag(inode)); + + /* + * If service_operation() returns -EAGAIN #and# the operation was + * purged from pvfs2_request_list or htable_ops_in_progress, then + * we know that the client was restarted, causing the shared memory + * area to be wiped clean. To restart a write operation in this + * case, we must re-copy the data from the user's iovec to a NEW + * shared memory location. To restart a read operation, we must get + * a new shared memory location. + */ + if (ret == -EAGAIN && op_state_purged(new_op)) { + pvfs_bufmap_put(bufmap, buffer_index); + gossip_debug(GOSSIP_FILE_DEBUG, + "%s:going to repopulate_shared_memory.\n", + __func__); + goto populate_shared_memory; + } + + if (ret < 0) { + handle_io_error(); /* defined in pvfs2-kernel.h */ + /* + don't write an error to syslog on signaled operation + termination unless we've got debugging turned on, as + this can happen regularly (i.e. ctrl-c) + */ + if (ret == -EINTR) + gossip_debug(GOSSIP_FILE_DEBUG, + "%s: returning error %ld\n", __func__, + (long)ret); + else + gossip_err("%s: error in %s handle %pU, returning %zd\n", + __func__, + type == PVFS_IO_READ ? + "read from" : "write to", + handle, ret); + goto out; + } + + /* + * Stage 3: Post copy buffers from client-core's address space + * postcopy_buffers only pertains to reads. + */ + if (type == PVFS_IO_READ) { + ret = postcopy_buffers(bufmap, + buffer_index, + vec, + nr_segs, + new_op->downcall.resp.io.amt_complete, + to_user); + if (ret < 0) { + /* + * put error codes in downcall so that handle_io_error() + * preserves it properly + */ + new_op->downcall.status = ret; + handle_io_error(); + goto out; + } + } + gossip_debug(GOSSIP_FILE_DEBUG, + "%s(%pU): Amount written as returned by the sys-io call:%d\n", + __func__, + handle, + (int)new_op->downcall.resp.io.amt_complete); + + ret = new_op->downcall.resp.io.amt_complete; + + /* + tell the device file owner waiting on I/O that this read has + completed and it can return now. in this exact case, on + wakeup the daemon will free the op, so we *cannot* touch it + after this. + */ + wake_up_daemon_for_return(new_op); + new_op = NULL; + +out: + if (buffer_index >= 0) { + pvfs_bufmap_put(bufmap, buffer_index); + gossip_debug(GOSSIP_FILE_DEBUG, + "%s(%pU): PUT buffer_index %d\n", + __func__, handle, buffer_index); + buffer_index = -1; + } + if (new_op) { + op_release(new_op); + new_op = NULL; + } + return ret; +} + +/* + * The reason we need to do this is to be able to support readv and writev + * that are larger than (pvfs_bufmap_size_query()) Default is + * PVFS2_BUFMAP_DEFAULT_DESC_SIZE MB. What that means is that we will + * create a new io vec descriptor for those memory addresses that + * go beyond the limit. Return value for this routine is negative in case + * of errors and 0 in case of success. + * + * Further, the new_nr_segs pointer is updated to hold the new value + * of number of iovecs, the new_vec pointer is updated to hold the pointer + * to the new split iovec, and the size array is an array of integers holding + * the number of iovecs that straddle pvfs_bufmap_size_query(). + * The max_new_nr_segs value is computed by the caller and returned. + * (It will be (count of all iov_len/ block_size) + 1). + */ +static int split_iovecs(unsigned long max_new_nr_segs, /* IN */ + unsigned long nr_segs, /* IN */ + const struct iovec *original_iovec, /* IN */ + unsigned long *new_nr_segs, /* OUT */ + struct iovec **new_vec, /* OUT */ + unsigned long *seg_count, /* OUT */ + unsigned long **seg_array) /* OUT */ +{ + unsigned long seg; + unsigned long count = 0; + unsigned long begin_seg; + unsigned long tmpnew_nr_segs = 0; + struct iovec *new_iovec = NULL; + struct iovec *orig_iovec; + unsigned long *sizes = NULL; + unsigned long sizes_count = 0; + + if (nr_segs <= 0 || + original_iovec == NULL || + new_nr_segs == NULL || + new_vec == NULL || + seg_count == NULL || + seg_array == NULL || + max_new_nr_segs <= 0) { + gossip_err("Invalid parameters to split_iovecs\n"); + return -EINVAL; + } + *new_nr_segs = 0; + *new_vec = NULL; + *seg_count = 0; + *seg_array = NULL; + /* copy the passed in iovec descriptor to a temp structure */ + orig_iovec = kmalloc_array(nr_segs, + sizeof(*orig_iovec), + PVFS2_BUFMAP_GFP_FLAGS); + if (orig_iovec == NULL) { + gossip_err( + "split_iovecs: Could not allocate memory for %lu bytes!\n", + (unsigned long)(nr_segs * sizeof(*orig_iovec))); + return -ENOMEM; + } + new_iovec = kcalloc(max_new_nr_segs, + sizeof(*new_iovec), + PVFS2_BUFMAP_GFP_FLAGS); + if (new_iovec == NULL) { + kfree(orig_iovec); + gossip_err( + "split_iovecs: Could not allocate memory for %lu bytes!\n", + (unsigned long)(max_new_nr_segs * sizeof(*new_iovec))); + return -ENOMEM; + } + sizes = kcalloc(max_new_nr_segs, + sizeof(*sizes), + PVFS2_BUFMAP_GFP_FLAGS); + if (sizes == NULL) { + kfree(new_iovec); + kfree(orig_iovec); + gossip_err( + "split_iovecs: Could not allocate memory for %lu bytes!\n", + (unsigned long)(max_new_nr_segs * sizeof(*sizes))); + return -ENOMEM; + } + /* copy the passed in iovec to a temp structure */ + memcpy(orig_iovec, original_iovec, nr_segs * sizeof(*orig_iovec)); + begin_seg = 0; +repeat: + for (seg = begin_seg; seg < nr_segs; seg++) { + if (tmpnew_nr_segs >= max_new_nr_segs || + sizes_count >= max_new_nr_segs) { + kfree(sizes); + kfree(orig_iovec); + kfree(new_iovec); + gossip_err + ("split_iovecs: exceeded the index limit (%lu)\n", + tmpnew_nr_segs); + return -EINVAL; + } + if (count + orig_iovec[seg].iov_len < + pvfs_bufmap_size_query()) { + count += orig_iovec[seg].iov_len; + memcpy(&new_iovec[tmpnew_nr_segs], + &orig_iovec[seg], + sizeof(*new_iovec)); + tmpnew_nr_segs++; + sizes[sizes_count]++; + } else { + new_iovec[tmpnew_nr_segs].iov_base = + orig_iovec[seg].iov_base; + new_iovec[tmpnew_nr_segs].iov_len = + (pvfs_bufmap_size_query() - count); + tmpnew_nr_segs++; + sizes[sizes_count]++; + sizes_count++; + begin_seg = seg; + orig_iovec[seg].iov_base += + (pvfs_bufmap_size_query() - count); + orig_iovec[seg].iov_len -= + (pvfs_bufmap_size_query() - count); + count = 0; + break; + } + } + if (seg != nr_segs) + goto repeat; + else + sizes_count++; + + *new_nr_segs = tmpnew_nr_segs; + /* new_iovec is freed by the caller */ + *new_vec = new_iovec; + *seg_count = sizes_count; + /* seg_array is also freed by the caller */ + *seg_array = sizes; + kfree(orig_iovec); + return 0; +} + +static long bound_max_iovecs(const struct iovec *curr, unsigned long nr_segs, + ssize_t *total_count) +{ + unsigned long i; + long max_nr_iovecs; + ssize_t total; + ssize_t count; + + total = 0; + count = 0; + max_nr_iovecs = 0; + for (i = 0; i < nr_segs; i++) { + const struct iovec *iv = &curr[i]; + + count += iv->iov_len; + if (unlikely((ssize_t) (count | iv->iov_len) < 0)) + return -EINVAL; + if (total + iv->iov_len < pvfs_bufmap_size_query()) { + total += iv->iov_len; + max_nr_iovecs++; + } else { + total = + (total + iv->iov_len - pvfs_bufmap_size_query()); + max_nr_iovecs += (total / pvfs_bufmap_size_query() + 2); + } + } + *total_count = count; + return max_nr_iovecs; +} + +/* + * Common entry point for read/write/readv/writev + * This function will dispatch it to either the direct I/O + * or buffered I/O path depending on the mount options and/or + * augmented/extended metadata attached to the file. + * Note: File extended attributes override any mount options. + */ +static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, + loff_t *offset, const struct iovec *iov, unsigned long nr_segs) +{ + struct inode *inode = file->f_mapping->host; + struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); + struct pvfs2_khandle *handle = &pvfs2_inode->refn.khandle; + ssize_t ret; + ssize_t total_count; + unsigned int to_free; + size_t count; + unsigned long seg; + unsigned long new_nr_segs = 0; + unsigned long max_new_nr_segs = 0; + unsigned long seg_count = 0; + unsigned long *seg_array = NULL; + struct iovec *iovecptr = NULL; + struct iovec *ptr = NULL; + + total_count = 0; + ret = -EINVAL; + count = 0; + to_free = 0; + + /* Compute total and max number of segments after split */ + max_new_nr_segs = bound_max_iovecs(iov, nr_segs, &count); + if (max_new_nr_segs < 0) { + gossip_lerr("%s: could not bound iovec %lu\n", + __func__, + max_new_nr_segs); + goto out; + } + + gossip_debug(GOSSIP_FILE_DEBUG, + "%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n", + __func__, + handle, + (int)count); + + if (type == PVFS_IO_WRITE) { + gossip_debug(GOSSIP_FILE_DEBUG, + "%s(%pU): proceeding with offset : %llu, " + "size %d\n", + __func__, + handle, + llu(*offset), + (int)count); + } + + if (count == 0) { + ret = 0; + goto out; + } + + /* + * if the total size of data transfer requested is greater than + * the kernel-set blocksize of PVFS2, then we split the iovecs + * such that no iovec description straddles a block size limit + */ + + gossip_debug(GOSSIP_FILE_DEBUG, + "%s: pvfs_bufmap_size:%d\n", + __func__, + pvfs_bufmap_size_query()); + + if (count > pvfs_bufmap_size_query()) { + /* + * Split up the given iovec description such that + * no iovec descriptor straddles over the block-size limitation. + * This makes us our job easier to stage the I/O. + * In addition, this function will also compute an array + * with seg_count entries that will store the number of + * segments that straddle the block-size boundaries. + */ + ret = split_iovecs(max_new_nr_segs, /* IN */ + nr_segs, /* IN */ + iov, /* IN */ + &new_nr_segs, /* OUT */ + &iovecptr, /* OUT */ + &seg_count, /* OUT */ + &seg_array); /* OUT */ + if (ret < 0) { + gossip_err("%s: Failed to split iovecs to satisfy larger than blocksize readv/writev request %zd\n", + __func__, + ret); + goto out; + } + gossip_debug(GOSSIP_FILE_DEBUG, + "%s: Splitting iovecs from %lu to %lu" + " [max_new %lu]\n", + __func__, + nr_segs, + new_nr_segs, + max_new_nr_segs); + /* We must free seg_array and iovecptr */ + to_free = 1; + } else { + new_nr_segs = nr_segs; + /* use the given iovec description */ + iovecptr = (struct iovec *)iov; + /* There is only 1 element in the seg_array */ + seg_count = 1; + /* and its value is the number of segments passed in */ + seg_array = &nr_segs; + /* We dont have to free up anything */ + to_free = 0; + } + ptr = iovecptr; + + gossip_debug(GOSSIP_FILE_DEBUG, + "%s(%pU) %zd@%llu\n", + __func__, + handle, + count, + llu(*offset)); + gossip_debug(GOSSIP_FILE_DEBUG, + "%s(%pU): new_nr_segs: %lu, seg_count: %lu\n", + __func__, + handle, + new_nr_segs, seg_count); + +/* PVFS2_KERNEL_DEBUG is a CFLAGS define. */ +#ifdef PVFS2_KERNEL_DEBUG + for (seg = 0; seg < new_nr_segs; seg++) + gossip_debug(GOSSIP_FILE_DEBUG, + "%s: %d) %p to %p [%d bytes]\n", + __func__, + (int)seg + 1, + iovecptr[seg].iov_base, + iovecptr[seg].iov_base + iovecptr[seg].iov_len, + (int)iovecptr[seg].iov_len); + for (seg = 0; seg < seg_count; seg++) + gossip_debug(GOSSIP_FILE_DEBUG, + "%s: %zd) %lu\n", + __func__, + seg + 1, + seg_array[seg]); +#endif + seg = 0; + while (total_count < count) { + size_t each_count; + size_t amt_complete; + + /* how much to transfer in this loop iteration */ + each_count = + (((count - total_count) > pvfs_bufmap_size_query()) ? + pvfs_bufmap_size_query() : + (count - total_count)); + + gossip_debug(GOSSIP_FILE_DEBUG, + "%s(%pU): size of each_count(%d)\n", + __func__, + handle, + (int)each_count); + gossip_debug(GOSSIP_FILE_DEBUG, + "%s(%pU): BEFORE wait_for_io: offset is %d\n", + __func__, + handle, + (int)*offset); + + ret = wait_for_direct_io(type, inode, offset, ptr, + seg_array[seg], each_count, 0, 1); + gossip_debug(GOSSIP_FILE_DEBUG, + "%s(%pU): return from wait_for_io:%d\n", + __func__, + handle, + (int)ret); + + if (ret < 0) + goto out; + + /* advance the iovec pointer */ + ptr += seg_array[seg]; + seg++; + *offset += ret; + total_count += ret; + amt_complete = ret; + + gossip_debug(GOSSIP_FILE_DEBUG, + "%s(%pU): AFTER wait_for_io: offset is %d\n", + __func__, + handle, + (int)*offset); + + /* + * if we got a short I/O operations, + * fall out and return what we got so far + */ + if (amt_complete < each_count) + break; + } /*end while */ + + if (total_count > 0) + ret = total_count; +out: + if (to_free) { + kfree(iovecptr); + kfree(seg_array); + } + if (ret > 0) { + if (type == PVFS_IO_READ) { + file_accessed(file); + } else { + SetMtimeFlag(pvfs2_inode); + inode->i_mtime = CURRENT_TIME; + mark_inode_dirty_sync(inode); + } + } + + gossip_debug(GOSSIP_FILE_DEBUG, + "%s(%pU): Value(%d) returned.\n", + __func__, + handle, + (int)ret); + + return ret; +} + +/* + * Read data from a specified offset in a file (referenced by inode). + * Data may be placed either in a user or kernel buffer. + */ +ssize_t pvfs2_inode_read(struct inode *inode, + char __user *buf, + size_t count, + loff_t *offset, + loff_t readahead_size) +{ + struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); + size_t bufmap_size; + struct iovec vec; + ssize_t ret = -EINVAL; + + g_pvfs2_stats.reads++; + + vec.iov_base = buf; + vec.iov_len = count; + + bufmap_size = pvfs_bufmap_size_query(); + if (count > bufmap_size) { + gossip_debug(GOSSIP_FILE_DEBUG, + "%s: count is too large (%zd/%zd)!\n", + __func__, count, bufmap_size); + return -EINVAL; + } + + gossip_debug(GOSSIP_FILE_DEBUG, + "%s(%pU) %zd@%llu\n", + __func__, + &pvfs2_inode->refn.khandle, + count, + llu(*offset)); + + ret = wait_for_direct_io(PVFS_IO_READ, inode, offset, &vec, 1, + count, readahead_size, 0); + if (ret > 0) + *offset += ret; + + gossip_debug(GOSSIP_FILE_DEBUG, + "%s(%pU): Value(%zd) returned.\n", + __func__, + &pvfs2_inode->refn.khandle, + ret); + + return ret; +} + +static ssize_t pvfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + struct file *file = iocb->ki_filp; + loff_t pos = *(&iocb->ki_pos); + ssize_t rc = 0; + unsigned long nr_segs = iter->nr_segs; + + BUG_ON(iocb->private); + + gossip_debug(GOSSIP_FILE_DEBUG, "pvfs2_file_read_iter\n"); + + g_pvfs2_stats.reads++; + + rc = do_readv_writev(PVFS_IO_READ, + file, + &pos, + iter->iov, + nr_segs); + iocb->ki_pos = pos; + + return rc; +} + +static ssize_t pvfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + struct file *file = iocb->ki_filp; + loff_t pos = *(&iocb->ki_pos); + unsigned long nr_segs = iter->nr_segs; + ssize_t rc; + + BUG_ON(iocb->private); + + gossip_debug(GOSSIP_FILE_DEBUG, "pvfs2_file_write_iter\n"); + + mutex_lock(&file->f_mapping->host->i_mutex); + + /* Make sure generic_write_checks sees an up to date inode size. */ + if (file->f_flags & O_APPEND) { + rc = pvfs2_inode_getattr(file->f_mapping->host, + PVFS_ATTR_SYS_SIZE); + if (rc) { + gossip_err("%s: pvfs2_inode_getattr failed, rc:%zd:.\n", + __func__, rc); + goto out; + } + } + + if (file->f_pos > i_size_read(file->f_mapping->host)) + pvfs2_i_size_write(file->f_mapping->host, file->f_pos); + + rc = generic_write_checks(iocb, iter); + + if (rc <= 0) { + gossip_err("%s: generic_write_checks failed, rc:%zd:.\n", + __func__, rc); + goto out; + } + + rc = do_readv_writev(PVFS_IO_WRITE, + file, + &pos, + iter->iov, + nr_segs); + if (rc < 0) { + gossip_err("%s: do_readv_writev failed, rc:%zd:.\n", + __func__, rc); + goto out; + } + + iocb->ki_pos = pos; + g_pvfs2_stats.writes++; + +out: + + mutex_unlock(&file->f_mapping->host->i_mutex); + return rc; +} + +/* + * Perform a miscellaneous operation on a file. + */ +long pvfs2_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + int ret = -ENOTTY; + __u64 val = 0; + unsigned long uval; + + gossip_debug(GOSSIP_FILE_DEBUG, + "pvfs2_ioctl: called with cmd %d\n", + cmd); + + /* + * we understand some general ioctls on files, such as the immutable + * and append flags + */ + if (cmd == FS_IOC_GETFLAGS) { + val = 0; + ret = pvfs2_xattr_get_default(file->f_path.dentry, + "user.pvfs2.meta_hint", + &val, + sizeof(val), + 0); + if (ret < 0 && ret != -ENODATA) + return ret; + else if (ret == -ENODATA) + val = 0; + uval = val; + gossip_debug(GOSSIP_FILE_DEBUG, + "pvfs2_ioctl: FS_IOC_GETFLAGS: %llu\n", + (unsigned long long)uval); + return put_user(uval, (int __user *)arg); + } else if (cmd == FS_IOC_SETFLAGS) { + ret = 0; + if (get_user(uval, (int __user *)arg)) + return -EFAULT; + /* + * PVFS_MIRROR_FL is set internally when the mirroring mode + * is turned on for a file. The user is not allowed to turn + * on this bit, but the bit is present if the user first gets + * the flags and then updates the flags with some new + * settings. So, we ignore it in the following edit. bligon. + */ + if ((uval & ~PVFS_MIRROR_FL) & + (~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NOATIME_FL))) { + gossip_err("pvfs2_ioctl: the FS_IOC_SETFLAGS only supports setting one of FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NOATIME_FL\n"); + return -EINVAL; + } + val = uval; + gossip_debug(GOSSIP_FILE_DEBUG, + "pvfs2_ioctl: FS_IOC_SETFLAGS: %llu\n", + (unsigned long long)val); + ret = pvfs2_xattr_set_default(file->f_path.dentry, + "user.pvfs2.meta_hint", + &val, + sizeof(val), + 0, + 0); + } + + return ret; +} + +/* + * Memory map a region of a file. + */ +static int pvfs2_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + gossip_debug(GOSSIP_FILE_DEBUG, + "pvfs2_file_mmap: called on %s\n", + (file ? + (char *)file->f_path.dentry->d_name.name : + (char *)"Unknown")); + + /* set the sequential readahead hint */ + vma->vm_flags |= VM_SEQ_READ; + vma->vm_flags &= ~VM_RAND_READ; + return generic_file_mmap(file, vma); +} + +#define mapping_nrpages(idata) ((idata)->nrpages) + +/* + * Called to notify the module that there are no more references to + * this file (i.e. no processes have it open). + * + * \note Not called when each file is closed. + */ +int pvfs2_file_release(struct inode *inode, struct file *file) +{ + gossip_debug(GOSSIP_FILE_DEBUG, + "pvfs2_file_release: called on %s\n", + file->f_path.dentry->d_name.name); + + pvfs2_flush_inode(inode); + + /* + remove all associated inode pages from the page cache and mmap + readahead cache (if any); this forces an expensive refresh of + data for the next caller of mmap (or 'get_block' accesses) + */ + if (file->f_path.dentry->d_inode && + file->f_path.dentry->d_inode->i_mapping && + mapping_nrpages(&file->f_path.dentry->d_inode->i_data)) + truncate_inode_pages(file->f_path.dentry->d_inode->i_mapping, + 0); + return 0; +} + +/* + * Push all data for a specific file onto permanent storage. + */ +int pvfs2_fsync(struct file *file, loff_t start, loff_t end, int datasync) +{ + int ret = -EINVAL; + struct pvfs2_inode_s *pvfs2_inode = + PVFS2_I(file->f_path.dentry->d_inode); + struct pvfs2_kernel_op_s *new_op = NULL; + + /* required call */ + filemap_write_and_wait_range(file->f_mapping, start, end); + + new_op = op_alloc(PVFS2_VFS_OP_FSYNC); + if (!new_op) + return -ENOMEM; + new_op->upcall.req.fsync.refn = pvfs2_inode->refn; + + ret = service_operation(new_op, + "pvfs2_fsync", + get_interruptible_flag(file->f_path.dentry->d_inode)); + + gossip_debug(GOSSIP_FILE_DEBUG, + "pvfs2_fsync got return value of %d\n", + ret); + + op_release(new_op); + + pvfs2_flush_inode(file->f_path.dentry->d_inode); + return ret; +} + +/* + * Change the file pointer position for an instance of an open file. + * + * \note If .llseek is overriden, we must acquire lock as described in + * Documentation/filesystems/Locking. + * + * Future upgrade could support SEEK_DATA and SEEK_HOLE but would + * require much changes to the FS + */ +loff_t pvfs2_file_llseek(struct file *file, loff_t offset, int origin) +{ + int ret = -EINVAL; + struct inode *inode = file->f_path.dentry->d_inode; + + if (!inode) { + gossip_err("pvfs2_file_llseek: invalid inode (NULL)\n"); + return ret; + } + + if (origin == PVFS2_SEEK_END) { + /* + * revalidate the inode's file size. + * NOTE: We are only interested in file size here, + * so we set mask accordingly. + */ + ret = pvfs2_inode_getattr(inode, PVFS_ATTR_SYS_SIZE); + if (ret) { + gossip_debug(GOSSIP_FILE_DEBUG, + "%s:%s:%d calling make bad inode\n", + __FILE__, + __func__, + __LINE__); + pvfs2_make_bad_inode(inode); + return ret; + } + } + + gossip_debug(GOSSIP_FILE_DEBUG, + "pvfs2_file_llseek: offset is %ld | origin is %d | " + "inode size is %lu\n", + (long)offset, + origin, + (unsigned long)file->f_path.dentry->d_inode->i_size); + + return generic_file_llseek(file, offset, origin); +} + +/* + * Support local locks (locks that only this kernel knows about) + * if Orangefs was mounted -o local_lock. + */ +int pvfs2_lock(struct file *filp, int cmd, struct file_lock *fl) +{ + int rc = -ENOLCK; + + if (PVFS2_SB(filp->f_inode->i_sb)->flags & PVFS2_OPT_LOCAL_LOCK) { + if (cmd == F_GETLK) { + rc = 0; + posix_test_lock(filp, fl); + } else { + rc = posix_lock_file(filp, fl, NULL); + } + } + + return rc; +} + +/** PVFS2 implementation of VFS file operations */ +const struct file_operations pvfs2_file_operations = { + .llseek = pvfs2_file_llseek, + .read_iter = pvfs2_file_read_iter, + .write_iter = pvfs2_file_write_iter, + .lock = pvfs2_lock, + .unlocked_ioctl = pvfs2_ioctl, + .mmap = pvfs2_file_mmap, + .open = generic_file_open, + .release = pvfs2_file_release, + .fsync = pvfs2_fsync, +}; diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c new file mode 100644 index 000000000000..feda00fcdd7d --- /dev/null +++ b/fs/orangefs/inode.c @@ -0,0 +1,469 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/* + * Linux VFS inode operations. + */ + +#include "protocol.h" +#include "pvfs2-kernel.h" +#include "pvfs2-bufmap.h" + +static int read_one_page(struct page *page) +{ + void *page_data; + int ret; + int max_block; + ssize_t bytes_read = 0; + struct inode *inode = page->mapping->host; + const __u32 blocksize = PAGE_CACHE_SIZE; /* inode->i_blksize */ + const __u32 blockbits = PAGE_CACHE_SHIFT; /* inode->i_blkbits */ + + gossip_debug(GOSSIP_INODE_DEBUG, + "pvfs2_readpage called with page %p\n", + page); + page_data = pvfs2_kmap(page); + + max_block = ((inode->i_size / blocksize) + 1); + + if (page->index < max_block) { + loff_t blockptr_offset = (((loff_t) page->index) << blockbits); + + bytes_read = pvfs2_inode_read(inode, + page_data, + blocksize, + &blockptr_offset, + inode->i_size); + } + /* only zero remaining unread portions of the page data */ + if (bytes_read > 0) + memset(page_data + bytes_read, 0, blocksize - bytes_read); + else + memset(page_data, 0, blocksize); + /* takes care of potential aliasing */ + flush_dcache_page(page); + if (bytes_read < 0) { + ret = bytes_read; + SetPageError(page); + } else { + SetPageUptodate(page); + if (PageError(page)) + ClearPageError(page); + ret = 0; + } + pvfs2_kunmap(page); + /* unlock the page after the ->readpage() routine completes */ + unlock_page(page); + return ret; +} + +static int pvfs2_readpage(struct file *file, struct page *page) +{ + return read_one_page(page); +} + +static int pvfs2_readpages(struct file *file, + struct address_space *mapping, + struct list_head *pages, + unsigned nr_pages) +{ + int page_idx; + int ret; + + gossip_debug(GOSSIP_INODE_DEBUG, "pvfs2_readpages called\n"); + + for (page_idx = 0; page_idx < nr_pages; page_idx++) { + struct page *page; + + page = list_entry(pages->prev, struct page, lru); + list_del(&page->lru); + if (!add_to_page_cache(page, + mapping, + page->index, + GFP_KERNEL)) { + ret = read_one_page(page); + gossip_debug(GOSSIP_INODE_DEBUG, + "failure adding page to cache, read_one_page returned: %d\n", + ret); + } else { + page_cache_release(page); + } + } + BUG_ON(!list_empty(pages)); + return 0; +} + +static void pvfs2_invalidatepage(struct page *page, + unsigned int offset, + unsigned int length) +{ + gossip_debug(GOSSIP_INODE_DEBUG, + "pvfs2_invalidatepage called on page %p " + "(offset is %u)\n", + page, + offset); + + ClearPageUptodate(page); + ClearPageMappedToDisk(page); + return; + +} + +static int pvfs2_releasepage(struct page *page, gfp_t foo) +{ + gossip_debug(GOSSIP_INODE_DEBUG, + "pvfs2_releasepage called on page %p\n", + page); + return 0; +} + +/* + * Having a direct_IO entry point in the address_space_operations + * struct causes the kernel to allows us to use O_DIRECT on + * open. Nothing will ever call this thing, but in the future we + * will need to be able to use O_DIRECT on open in order to support + * AIO. Modeled after NFS, they do this too. + */ +/* +static ssize_t pvfs2_direct_IO(int rw, + struct kiocb *iocb, + struct iov_iter *iter, + loff_t offset) +{ + gossip_debug(GOSSIP_INODE_DEBUG, + "pvfs2_direct_IO: %s\n", + iocb->ki_filp->f_path.dentry->d_name.name); + + return -EINVAL; +} +*/ + +struct backing_dev_info pvfs2_backing_dev_info = { + .name = "pvfs2", + .ra_pages = 0, + .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, +}; + +/** PVFS2 implementation of address space operations */ +const struct address_space_operations pvfs2_address_operations = { + .readpage = pvfs2_readpage, + .readpages = pvfs2_readpages, + .invalidatepage = pvfs2_invalidatepage, + .releasepage = pvfs2_releasepage, +/* .direct_IO = pvfs2_direct_IO */ +}; + +static int pvfs2_setattr_size(struct inode *inode, struct iattr *iattr) +{ + struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); + struct pvfs2_kernel_op_s *new_op; + loff_t orig_size = i_size_read(inode); + int ret = -EINVAL; + + gossip_debug(GOSSIP_INODE_DEBUG, + "%s: %pU: Handle is %pU | fs_id %d | size is %llu\n", + __func__, + get_khandle_from_ino(inode), + &pvfs2_inode->refn.khandle, + pvfs2_inode->refn.fs_id, + iattr->ia_size); + + truncate_setsize(inode, iattr->ia_size); + + new_op = op_alloc(PVFS2_VFS_OP_TRUNCATE); + if (!new_op) + return -ENOMEM; + + new_op->upcall.req.truncate.refn = pvfs2_inode->refn; + new_op->upcall.req.truncate.size = (__s64) iattr->ia_size; + + ret = service_operation(new_op, __func__, + get_interruptible_flag(inode)); + + /* + * the truncate has no downcall members to retrieve, but + * the status value tells us if it went through ok or not + */ + gossip_debug(GOSSIP_INODE_DEBUG, + "pvfs2: pvfs2_truncate got return value of %d\n", + ret); + + op_release(new_op); + + if (ret != 0) + return ret; + + /* + * Only change the c/mtime if we are changing the size or we are + * explicitly asked to change it. This handles the semantic difference + * between truncate() and ftruncate() as implemented in the VFS. + * + * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a + * special case where we need to update the times despite not having + * these flags set. For all other operations the VFS set these flags + * explicitly if it wants a timestamp update. + */ + if (orig_size != i_size_read(inode) && + !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) { + iattr->ia_ctime = iattr->ia_mtime = + current_fs_time(inode->i_sb); + iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME; + } + + return ret; +} + +/* + * Change attributes of an object referenced by dentry. + */ +int pvfs2_setattr(struct dentry *dentry, struct iattr *iattr) +{ + int ret = -EINVAL; + struct inode *inode = dentry->d_inode; + + gossip_debug(GOSSIP_INODE_DEBUG, + "pvfs2_setattr: called on %s\n", + dentry->d_name.name); + + ret = inode_change_ok(inode, iattr); + if (ret) + goto out; + + if ((iattr->ia_valid & ATTR_SIZE) && + iattr->ia_size != i_size_read(inode)) { + ret = pvfs2_setattr_size(inode, iattr); + if (ret) + goto out; + } + + setattr_copy(inode, iattr); + mark_inode_dirty(inode); + + ret = pvfs2_inode_setattr(inode, iattr); + gossip_debug(GOSSIP_INODE_DEBUG, + "pvfs2_setattr: inode_setattr returned %d\n", + ret); + + if (!ret && (iattr->ia_valid & ATTR_MODE)) + /* change mod on a file that has ACLs */ + ret = posix_acl_chmod(inode, inode->i_mode); + +out: + gossip_debug(GOSSIP_INODE_DEBUG, "pvfs2_setattr: returning %d\n", ret); + return ret; +} + +/* + * Obtain attributes of an object given a dentry + */ +int pvfs2_getattr(struct vfsmount *mnt, + struct dentry *dentry, + struct kstat *kstat) +{ + int ret = -ENOENT; + struct inode *inode = dentry->d_inode; + struct pvfs2_inode_s *pvfs2_inode = NULL; + + gossip_debug(GOSSIP_INODE_DEBUG, + "pvfs2_getattr: called on %s\n", + dentry->d_name.name); + + /* + * Similar to the above comment, a getattr also expects that all + * fields/attributes of the inode would be refreshed. So again, we + * dont have too much of a choice but refresh all the attributes. + */ + ret = pvfs2_inode_getattr(inode, PVFS_ATTR_SYS_ALL_NOHINT); + if (ret == 0) { + generic_fillattr(inode, kstat); + /* override block size reported to stat */ + pvfs2_inode = PVFS2_I(inode); + kstat->blksize = pvfs2_inode->blksize; + } else { + /* assume an I/O error and flag inode as bad */ + gossip_debug(GOSSIP_INODE_DEBUG, + "%s:%s:%d calling make bad inode\n", + __FILE__, + __func__, + __LINE__); + pvfs2_make_bad_inode(inode); + } + return ret; +} + +/* PVFS2 implementation of VFS inode operations for files */ +struct inode_operations pvfs2_file_inode_operations = { + .get_acl = pvfs2_get_acl, + .set_acl = pvfs2_set_acl, + .setattr = pvfs2_setattr, + .getattr = pvfs2_getattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = pvfs2_listxattr, + .removexattr = generic_removexattr, +}; + +static int pvfs2_init_iops(struct inode *inode) +{ + inode->i_mapping->a_ops = &pvfs2_address_operations; + + switch (inode->i_mode & S_IFMT) { + case S_IFREG: + inode->i_op = &pvfs2_file_inode_operations; + inode->i_fop = &pvfs2_file_operations; + inode->i_blkbits = PAGE_CACHE_SHIFT; + break; + case S_IFLNK: + inode->i_op = &pvfs2_symlink_inode_operations; + break; + case S_IFDIR: + inode->i_op = &pvfs2_dir_inode_operations; + inode->i_fop = &pvfs2_dir_operations; + break; + default: + gossip_debug(GOSSIP_INODE_DEBUG, + "%s: unsupported mode\n", + __func__); + return -EINVAL; + } + + return 0; +} + +/* + * Given a PVFS2 object identifier (fsid, handle), convert it into a ino_t type + * that will be used as a hash-index from where the handle will + * be searched for in the VFS hash table of inodes. + */ +static inline ino_t pvfs2_handle_hash(struct pvfs2_object_kref *ref) +{ + if (!ref) + return 0; + return pvfs2_khandle_to_ino(&(ref->khandle)); +} + +/* + * Called to set up an inode from iget5_locked. + */ +static int pvfs2_set_inode(struct inode *inode, void *data) +{ + struct pvfs2_object_kref *ref = (struct pvfs2_object_kref *) data; + struct pvfs2_inode_s *pvfs2_inode = NULL; + + /* Make sure that we have sane parameters */ + if (!data || !inode) + return 0; + pvfs2_inode = PVFS2_I(inode); + if (!pvfs2_inode) + return 0; + pvfs2_inode->refn.fs_id = ref->fs_id; + pvfs2_inode->refn.khandle = ref->khandle; + return 0; +} + +/* + * Called to determine if handles match. + */ +static int pvfs2_test_inode(struct inode *inode, void *data) +{ + struct pvfs2_object_kref *ref = (struct pvfs2_object_kref *) data; + struct pvfs2_inode_s *pvfs2_inode = NULL; + + pvfs2_inode = PVFS2_I(inode); + return (!PVFS_khandle_cmp(&(pvfs2_inode->refn.khandle), &(ref->khandle)) + && pvfs2_inode->refn.fs_id == ref->fs_id); +} + +/* + * Front-end to lookup the inode-cache maintained by the VFS using the PVFS2 + * file handle. + * + * @sb: the file system super block instance. + * @ref: The PVFS2 object for which we are trying to locate an inode structure. + */ +struct inode *pvfs2_iget(struct super_block *sb, struct pvfs2_object_kref *ref) +{ + struct inode *inode = NULL; + unsigned long hash; + int error; + + hash = pvfs2_handle_hash(ref); + inode = iget5_locked(sb, hash, pvfs2_test_inode, pvfs2_set_inode, ref); + if (!inode || !(inode->i_state & I_NEW)) + return inode; + + error = pvfs2_inode_getattr(inode, PVFS_ATTR_SYS_ALL_NOHINT); + if (error) { + iget_failed(inode); + return ERR_PTR(error); + } + + inode->i_ino = hash; /* needed for stat etc */ + pvfs2_init_iops(inode); + unlock_new_inode(inode); + + gossip_debug(GOSSIP_INODE_DEBUG, + "iget handle %pU, fsid %d hash %ld i_ino %lu\n", + &ref->khandle, + ref->fs_id, + hash, + inode->i_ino); + + return inode; +} + +/* + * Allocate an inode for a newly created file and insert it into the inode hash. + */ +struct inode *pvfs2_new_inode(struct super_block *sb, struct inode *dir, + int mode, dev_t dev, struct pvfs2_object_kref *ref) +{ + unsigned long hash = pvfs2_handle_hash(ref); + struct inode *inode; + int error; + + gossip_debug(GOSSIP_INODE_DEBUG, + "pvfs2_get_custom_inode_common: called\n" + "(sb is %p | MAJOR(dev)=%u | MINOR(dev)=%u mode=%o)\n", + sb, + MAJOR(dev), + MINOR(dev), + mode); + + inode = new_inode(sb); + if (!inode) + return NULL; + + pvfs2_set_inode(inode, ref); + inode->i_ino = hash; /* needed for stat etc */ + + error = pvfs2_inode_getattr(inode, PVFS_ATTR_SYS_ALL_NOHINT); + if (error) + goto out_iput; + + pvfs2_init_iops(inode); + + inode->i_mode = mode; + inode->i_uid = current_fsuid(); + inode->i_gid = current_fsgid(); + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_size = PAGE_CACHE_SIZE; + inode->i_rdev = dev; + + error = insert_inode_locked4(inode, hash, pvfs2_test_inode, ref); + if (error < 0) + goto out_iput; + + gossip_debug(GOSSIP_INODE_DEBUG, + "Initializing ACL's for inode %pU\n", + get_khandle_from_ino(inode)); + pvfs2_init_acl(inode, dir); + return inode; + +out_iput: + iput(inode); + return ERR_PTR(error); +} From 274dcf55bd4ab12af1cc1d3b77416285bef8ebf4 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Fri, 17 Jul 2015 10:38:13 -0400 Subject: [PATCH 003/174] Orangefs: kernel client part 3 Signed-off-by: Mike Marshall --- fs/orangefs/namei.c | 473 ++++++++++++++++++ fs/orangefs/pvfs2-bufmap.c | 970 ++++++++++++++++++++++++++++++++++++ fs/orangefs/pvfs2-cache.c | 260 ++++++++++ fs/orangefs/pvfs2-debugfs.c | 458 +++++++++++++++++ fs/orangefs/pvfs2-mod.c | 316 ++++++++++++ 5 files changed, 2477 insertions(+) create mode 100644 fs/orangefs/namei.c create mode 100644 fs/orangefs/pvfs2-bufmap.c create mode 100644 fs/orangefs/pvfs2-cache.c create mode 100644 fs/orangefs/pvfs2-debugfs.c create mode 100644 fs/orangefs/pvfs2-mod.c diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c new file mode 100644 index 000000000000..747fe6a690af --- /dev/null +++ b/fs/orangefs/namei.c @@ -0,0 +1,473 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/* + * Linux VFS namei operations. + */ + +#include "protocol.h" +#include "pvfs2-kernel.h" + +/* + * Get a newly allocated inode to go with a negative dentry. + */ +static int pvfs2_create(struct inode *dir, + struct dentry *dentry, + umode_t mode, + bool exclusive) +{ + struct pvfs2_inode_s *parent = PVFS2_I(dir); + struct pvfs2_kernel_op_s *new_op; + struct inode *inode; + int ret; + + gossip_debug(GOSSIP_NAME_DEBUG, "%s: called\n", __func__); + + new_op = op_alloc(PVFS2_VFS_OP_CREATE); + if (!new_op) + return -ENOMEM; + + new_op->upcall.req.create.parent_refn = parent->refn; + + fill_default_sys_attrs(new_op->upcall.req.create.attributes, + PVFS_TYPE_METAFILE, mode); + + strncpy(new_op->upcall.req.create.d_name, + dentry->d_name.name, PVFS2_NAME_LEN); + + ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); + + gossip_debug(GOSSIP_NAME_DEBUG, + "Create Got PVFS2 handle %pU on fsid %d (ret=%d)\n", + &new_op->downcall.resp.create.refn.khandle, + new_op->downcall.resp.create.refn.fs_id, ret); + + if (ret < 0) { + gossip_debug(GOSSIP_NAME_DEBUG, + "%s: failed with error code %d\n", + __func__, ret); + goto out; + } + + inode = pvfs2_new_inode(dir->i_sb, dir, S_IFREG | mode, 0, + &new_op->downcall.resp.create.refn); + if (IS_ERR(inode)) { + gossip_err("*** Failed to allocate pvfs2 file inode\n"); + ret = PTR_ERR(inode); + goto out; + } + + gossip_debug(GOSSIP_NAME_DEBUG, + "Assigned file inode new number of %pU\n", + get_khandle_from_ino(inode)); + + d_instantiate(dentry, inode); + unlock_new_inode(inode); + + gossip_debug(GOSSIP_NAME_DEBUG, + "Inode (Regular File) %pU -> %s\n", + get_khandle_from_ino(inode), + dentry->d_name.name); + + SetMtimeFlag(parent); + dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb); + mark_inode_dirty_sync(dir); + ret = 0; +out: + op_release(new_op); + gossip_debug(GOSSIP_NAME_DEBUG, "%s: returning %d\n", __func__, ret); + return ret; +} + +/* + * Attempt to resolve an object name (dentry->d_name), parent handle, and + * fsid into a handle for the object. + */ +static struct dentry *pvfs2_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + struct pvfs2_inode_s *parent = PVFS2_I(dir); + struct pvfs2_kernel_op_s *new_op; + struct inode *inode; + struct dentry *res; + int ret = -EINVAL; + + /* + * in theory we could skip a lookup here (if the intent is to + * create) in order to avoid a potentially failed lookup, but + * leaving it in can skip a valid lookup and try to create a file + * that already exists (e.g. the vfs already handles checking for + * -EEXIST on O_EXCL opens, which is broken if we skip this lookup + * in the create path) + */ + gossip_debug(GOSSIP_NAME_DEBUG, "%s called on %s\n", + __func__, dentry->d_name.name); + + if (dentry->d_name.len > (PVFS2_NAME_LEN - 1)) + return ERR_PTR(-ENAMETOOLONG); + + new_op = op_alloc(PVFS2_VFS_OP_LOOKUP); + if (!new_op) + return ERR_PTR(-ENOMEM); + + new_op->upcall.req.lookup.sym_follow = flags & LOOKUP_FOLLOW; + + gossip_debug(GOSSIP_NAME_DEBUG, "%s:%s:%d using parent %pU\n", + __FILE__, + __func__, + __LINE__, + &parent->refn.khandle); + new_op->upcall.req.lookup.parent_refn = parent->refn; + + strncpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name, + PVFS2_NAME_LEN); + + gossip_debug(GOSSIP_NAME_DEBUG, + "%s: doing lookup on %s under %pU,%d (follow=%s)\n", + __func__, + new_op->upcall.req.lookup.d_name, + &new_op->upcall.req.lookup.parent_refn.khandle, + new_op->upcall.req.lookup.parent_refn.fs_id, + ((new_op->upcall.req.lookup.sym_follow == + PVFS2_LOOKUP_LINK_FOLLOW) ? "yes" : "no")); + + ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); + + gossip_debug(GOSSIP_NAME_DEBUG, + "Lookup Got %pU, fsid %d (ret=%d)\n", + &new_op->downcall.resp.lookup.refn.khandle, + new_op->downcall.resp.lookup.refn.fs_id, + ret); + + if (ret < 0) { + if (ret == -ENOENT) { + /* + * if no inode was found, add a negative dentry to + * dcache anyway; if we don't, we don't hold expected + * lookup semantics and we most noticeably break + * during directory renames. + * + * however, if the operation failed or exited, do not + * add the dentry (e.g. in the case that a touch is + * issued on a file that already exists that was + * interrupted during this lookup -- no need to add + * another negative dentry for an existing file) + */ + + gossip_debug(GOSSIP_NAME_DEBUG, + "pvfs2_lookup: Adding *negative* dentry " + "%p for %s\n", + dentry, + dentry->d_name.name); + + d_add(dentry, NULL); + res = NULL; + goto out; + } + + /* must be a non-recoverable error */ + res = ERR_PTR(ret); + goto out; + } + + inode = pvfs2_iget(dir->i_sb, &new_op->downcall.resp.lookup.refn); + if (IS_ERR(inode)) { + gossip_debug(GOSSIP_NAME_DEBUG, + "error %ld from iget\n", PTR_ERR(inode)); + res = ERR_CAST(inode); + goto out; + } + + gossip_debug(GOSSIP_NAME_DEBUG, + "%s:%s:%d " + "Found good inode [%lu] with count [%d]\n", + __FILE__, + __func__, + __LINE__, + inode->i_ino, + (int)atomic_read(&inode->i_count)); + + /* update dentry/inode pair into dcache */ + res = d_splice_alias(inode, dentry); + + gossip_debug(GOSSIP_NAME_DEBUG, + "Lookup success (inode ct = %d)\n", + (int)atomic_read(&inode->i_count)); +out: + op_release(new_op); + return res; +} + +/* return 0 on success; non-zero otherwise */ +static int pvfs2_unlink(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + struct pvfs2_inode_s *parent = PVFS2_I(dir); + struct pvfs2_kernel_op_s *new_op; + int ret; + + gossip_debug(GOSSIP_NAME_DEBUG, + "%s: called on %s\n" + " (inode %pU): Parent is %pU | fs_id %d\n", + __func__, + dentry->d_name.name, + get_khandle_from_ino(inode), + &parent->refn.khandle, + parent->refn.fs_id); + + new_op = op_alloc(PVFS2_VFS_OP_REMOVE); + if (!new_op) + return -ENOMEM; + + new_op->upcall.req.remove.parent_refn = parent->refn; + strncpy(new_op->upcall.req.remove.d_name, dentry->d_name.name, + PVFS2_NAME_LEN); + + ret = service_operation(new_op, "pvfs2_unlink", + get_interruptible_flag(inode)); + + /* when request is serviced properly, free req op struct */ + op_release(new_op); + + if (!ret) { + drop_nlink(inode); + + SetMtimeFlag(parent); + dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb); + mark_inode_dirty_sync(dir); + } + return ret; +} + +/* + * pvfs2_link() is only implemented here to make sure that we return a + * reasonable error code (the kernel will return a misleading EPERM + * otherwise). PVFS2 does not support hard links. + */ +static int pvfs2_link(struct dentry *old_dentry, + struct inode *dir, + struct dentry *dentry) +{ + return -EOPNOTSUPP; +} + +/* + * pvfs2_mknod() is only implemented here to make sure that we return a + * reasonable error code (the kernel will return a misleading EPERM + * otherwise). PVFS2 does not support special files such as fifos or devices. + */ +static int pvfs2_mknod(struct inode *dir, + struct dentry *dentry, + umode_t mode, + dev_t rdev) +{ + return -EOPNOTSUPP; +} + +static int pvfs2_symlink(struct inode *dir, + struct dentry *dentry, + const char *symname) +{ + struct pvfs2_inode_s *parent = PVFS2_I(dir); + struct pvfs2_kernel_op_s *new_op; + struct inode *inode; + int mode = 755; + int ret; + + gossip_debug(GOSSIP_NAME_DEBUG, "%s: called\n", __func__); + + if (!symname) + return -EINVAL; + + new_op = op_alloc(PVFS2_VFS_OP_SYMLINK); + if (!new_op) + return -ENOMEM; + + new_op->upcall.req.sym.parent_refn = parent->refn; + + fill_default_sys_attrs(new_op->upcall.req.sym.attributes, + PVFS_TYPE_SYMLINK, + mode); + + strncpy(new_op->upcall.req.sym.entry_name, + dentry->d_name.name, + PVFS2_NAME_LEN); + strncpy(new_op->upcall.req.sym.target, symname, PVFS2_NAME_LEN); + + ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); + + gossip_debug(GOSSIP_NAME_DEBUG, + "Symlink Got PVFS2 handle %pU on fsid %d (ret=%d)\n", + &new_op->downcall.resp.sym.refn.khandle, + new_op->downcall.resp.sym.refn.fs_id, ret); + + if (ret < 0) { + gossip_debug(GOSSIP_NAME_DEBUG, + "%s: failed with error code %d\n", + __func__, ret); + goto out; + } + + inode = pvfs2_new_inode(dir->i_sb, dir, S_IFLNK | mode, 0, + &new_op->downcall.resp.sym.refn); + if (IS_ERR(inode)) { + gossip_err + ("*** Failed to allocate pvfs2 symlink inode\n"); + ret = PTR_ERR(inode); + goto out; + } + + gossip_debug(GOSSIP_NAME_DEBUG, + "Assigned symlink inode new number of %pU\n", + get_khandle_from_ino(inode)); + + d_instantiate(dentry, inode); + unlock_new_inode(inode); + + gossip_debug(GOSSIP_NAME_DEBUG, + "Inode (Symlink) %pU -> %s\n", + get_khandle_from_ino(inode), + dentry->d_name.name); + + SetMtimeFlag(parent); + dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb); + mark_inode_dirty_sync(dir); + ret = 0; +out: + op_release(new_op); + return ret; +} + +static int pvfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + struct pvfs2_inode_s *parent = PVFS2_I(dir); + struct pvfs2_kernel_op_s *new_op; + struct inode *inode; + int ret; + + new_op = op_alloc(PVFS2_VFS_OP_MKDIR); + if (!new_op) + return -ENOMEM; + + new_op->upcall.req.mkdir.parent_refn = parent->refn; + + fill_default_sys_attrs(new_op->upcall.req.mkdir.attributes, + PVFS_TYPE_DIRECTORY, mode); + + strncpy(new_op->upcall.req.mkdir.d_name, + dentry->d_name.name, PVFS2_NAME_LEN); + + ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); + + gossip_debug(GOSSIP_NAME_DEBUG, + "Mkdir Got PVFS2 handle %pU on fsid %d\n", + &new_op->downcall.resp.mkdir.refn.khandle, + new_op->downcall.resp.mkdir.refn.fs_id); + + if (ret < 0) { + gossip_debug(GOSSIP_NAME_DEBUG, + "%s: failed with error code %d\n", + __func__, ret); + goto out; + } + + inode = pvfs2_new_inode(dir->i_sb, dir, S_IFDIR | mode, 0, + &new_op->downcall.resp.mkdir.refn); + if (IS_ERR(inode)) { + gossip_err("*** Failed to allocate pvfs2 dir inode\n"); + ret = PTR_ERR(inode); + goto out; + } + + gossip_debug(GOSSIP_NAME_DEBUG, + "Assigned dir inode new number of %pU\n", + get_khandle_from_ino(inode)); + + d_instantiate(dentry, inode); + unlock_new_inode(inode); + + gossip_debug(GOSSIP_NAME_DEBUG, + "Inode (Directory) %pU -> %s\n", + get_khandle_from_ino(inode), + dentry->d_name.name); + + /* + * NOTE: we have no good way to keep nlink consistent for directories + * across clients; keep constant at 1. + */ + SetMtimeFlag(parent); + dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb); + mark_inode_dirty_sync(dir); +out: + op_release(new_op); + return ret; +} + +static int pvfs2_rename(struct inode *old_dir, + struct dentry *old_dentry, + struct inode *new_dir, + struct dentry *new_dentry) +{ + struct pvfs2_kernel_op_s *new_op; + int ret; + + gossip_debug(GOSSIP_NAME_DEBUG, + "pvfs2_rename: called (%s/%s => %s/%s) ct=%d\n", + old_dentry->d_parent->d_name.name, + old_dentry->d_name.name, + new_dentry->d_parent->d_name.name, + new_dentry->d_name.name, + d_count(new_dentry)); + + new_op = op_alloc(PVFS2_VFS_OP_RENAME); + if (!new_op) + return -EINVAL; + + new_op->upcall.req.rename.old_parent_refn = PVFS2_I(old_dir)->refn; + new_op->upcall.req.rename.new_parent_refn = PVFS2_I(new_dir)->refn; + + strncpy(new_op->upcall.req.rename.d_old_name, + old_dentry->d_name.name, + PVFS2_NAME_LEN); + strncpy(new_op->upcall.req.rename.d_new_name, + new_dentry->d_name.name, + PVFS2_NAME_LEN); + + ret = service_operation(new_op, + "pvfs2_rename", + get_interruptible_flag(old_dentry->d_inode)); + + gossip_debug(GOSSIP_NAME_DEBUG, + "pvfs2_rename: got downcall status %d\n", + ret); + + if (new_dentry->d_inode) + new_dentry->d_inode->i_ctime = CURRENT_TIME; + + op_release(new_op); + return ret; +} + +/* PVFS2 implementation of VFS inode operations for directories */ +struct inode_operations pvfs2_dir_inode_operations = { + .lookup = pvfs2_lookup, + .get_acl = pvfs2_get_acl, + .set_acl = pvfs2_set_acl, + .create = pvfs2_create, + .link = pvfs2_link, + .unlink = pvfs2_unlink, + .symlink = pvfs2_symlink, + .mkdir = pvfs2_mkdir, + .rmdir = pvfs2_unlink, + .mknod = pvfs2_mknod, + .rename = pvfs2_rename, + .setattr = pvfs2_setattr, + .getattr = pvfs2_getattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .removexattr = generic_removexattr, + .listxattr = pvfs2_listxattr, +}; diff --git a/fs/orangefs/pvfs2-bufmap.c b/fs/orangefs/pvfs2-bufmap.c new file mode 100644 index 000000000000..aa14c37d0216 --- /dev/null +++ b/fs/orangefs/pvfs2-bufmap.c @@ -0,0 +1,970 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ +#include "protocol.h" +#include "pvfs2-kernel.h" +#include "pvfs2-bufmap.h" + +DECLARE_WAIT_QUEUE_HEAD(pvfs2_bufmap_init_waitq); + +struct pvfs2_bufmap { + atomic_t refcnt; + + int desc_size; + int desc_shift; + int desc_count; + int total_size; + int page_count; + + struct page **page_array; + struct pvfs_bufmap_desc *desc_array; + + /* array to track usage of buffer descriptors */ + int *buffer_index_array; + spinlock_t buffer_index_lock; + + /* array to track usage of buffer descriptors for readdir */ + int readdir_index_array[PVFS2_READDIR_DEFAULT_DESC_COUNT]; + spinlock_t readdir_index_lock; +} *__pvfs2_bufmap; + +static DEFINE_SPINLOCK(pvfs2_bufmap_lock); + +static void +pvfs2_bufmap_unmap(struct pvfs2_bufmap *bufmap) +{ + int i; + + for (i = 0; i < bufmap->page_count; i++) + page_cache_release(bufmap->page_array[i]); +} + +static void +pvfs2_bufmap_free(struct pvfs2_bufmap *bufmap) +{ + kfree(bufmap->page_array); + kfree(bufmap->desc_array); + kfree(bufmap->buffer_index_array); + kfree(bufmap); +} + +struct pvfs2_bufmap *pvfs2_bufmap_ref(void) +{ + struct pvfs2_bufmap *bufmap = NULL; + + spin_lock(&pvfs2_bufmap_lock); + if (__pvfs2_bufmap) { + bufmap = __pvfs2_bufmap; + atomic_inc(&bufmap->refcnt); + } + spin_unlock(&pvfs2_bufmap_lock); + return bufmap; +} + +void pvfs2_bufmap_unref(struct pvfs2_bufmap *bufmap) +{ + if (atomic_dec_and_lock(&bufmap->refcnt, &pvfs2_bufmap_lock)) { + __pvfs2_bufmap = NULL; + spin_unlock(&pvfs2_bufmap_lock); + + pvfs2_bufmap_unmap(bufmap); + pvfs2_bufmap_free(bufmap); + } +} + +inline int pvfs_bufmap_size_query(void) +{ + struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref(); + int size = bufmap ? bufmap->desc_size : 0; + + pvfs2_bufmap_unref(bufmap); + return size; +} + +inline int pvfs_bufmap_shift_query(void) +{ + struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref(); + int shift = bufmap ? bufmap->desc_shift : 0; + + pvfs2_bufmap_unref(bufmap); + return shift; +} + +static DECLARE_WAIT_QUEUE_HEAD(bufmap_waitq); +static DECLARE_WAIT_QUEUE_HEAD(readdir_waitq); + +/* + * get_bufmap_init + * + * If bufmap_init is 1, then the shared memory system, including the + * buffer_index_array, is available. Otherwise, it is not. + * + * returns the value of bufmap_init + */ +int get_bufmap_init(void) +{ + return __pvfs2_bufmap ? 1 : 0; +} + + +static struct pvfs2_bufmap * +pvfs2_bufmap_alloc(struct PVFS_dev_map_desc *user_desc) +{ + struct pvfs2_bufmap *bufmap; + + bufmap = kzalloc(sizeof(*bufmap), GFP_KERNEL); + if (!bufmap) + goto out; + + atomic_set(&bufmap->refcnt, 1); + bufmap->total_size = user_desc->total_size; + bufmap->desc_count = user_desc->count; + bufmap->desc_size = user_desc->size; + bufmap->desc_shift = ilog2(bufmap->desc_size); + + spin_lock_init(&bufmap->buffer_index_lock); + bufmap->buffer_index_array = + kcalloc(bufmap->desc_count, sizeof(int), GFP_KERNEL); + if (!bufmap->buffer_index_array) { + gossip_err("pvfs2: could not allocate %d buffer indices\n", + bufmap->desc_count); + goto out_free_bufmap; + } + spin_lock_init(&bufmap->readdir_index_lock); + + bufmap->desc_array = + kcalloc(bufmap->desc_count, sizeof(struct pvfs_bufmap_desc), + GFP_KERNEL); + if (!bufmap->desc_array) { + gossip_err("pvfs2: could not allocate %d descriptors\n", + bufmap->desc_count); + goto out_free_index_array; + } + + bufmap->page_count = bufmap->total_size / PAGE_SIZE; + + /* allocate storage to track our page mappings */ + bufmap->page_array = + kcalloc(bufmap->page_count, sizeof(struct page *), GFP_KERNEL); + if (!bufmap->page_array) + goto out_free_desc_array; + + return bufmap; + +out_free_desc_array: + kfree(bufmap->desc_array); +out_free_index_array: + kfree(bufmap->buffer_index_array); +out_free_bufmap: + kfree(bufmap); +out: + return NULL; +} + +static int +pvfs2_bufmap_map(struct pvfs2_bufmap *bufmap, + struct PVFS_dev_map_desc *user_desc) +{ + int pages_per_desc = bufmap->desc_size / PAGE_SIZE; + int offset = 0, ret, i; + + /* map the pages */ + down_write(¤t->mm->mmap_sem); + ret = get_user_pages(current, + current->mm, + (unsigned long)user_desc->ptr, + bufmap->page_count, + 1, + 0, + bufmap->page_array, + NULL); + up_write(¤t->mm->mmap_sem); + + if (ret < 0) + return ret; + + if (ret != bufmap->page_count) { + gossip_err("pvfs2 error: asked for %d pages, only got %d.\n", + bufmap->page_count, ret); + + for (i = 0; i < ret; i++) { + SetPageError(bufmap->page_array[i]); + page_cache_release(bufmap->page_array[i]); + } + return -ENOMEM; + } + + /* + * ideally we want to get kernel space pointers for each page, but + * we can't kmap that many pages at once if highmem is being used. + * so instead, we just kmap/kunmap the page address each time the + * kaddr is needed. + */ + for (i = 0; i < bufmap->page_count; i++) + flush_dcache_page(bufmap->page_array[i]); + + /* build a list of available descriptors */ + for (offset = 0, i = 0; i < bufmap->desc_count; i++) { + bufmap->desc_array[i].page_array = &bufmap->page_array[offset]; + bufmap->desc_array[i].array_count = pages_per_desc; + bufmap->desc_array[i].uaddr = + (user_desc->ptr + (i * pages_per_desc * PAGE_SIZE)); + offset += pages_per_desc; + } + + return 0; +} + +/* + * pvfs_bufmap_initialize() + * + * initializes the mapped buffer interface + * + * returns 0 on success, -errno on failure + */ +int pvfs_bufmap_initialize(struct PVFS_dev_map_desc *user_desc) +{ + struct pvfs2_bufmap *bufmap; + int ret = -EINVAL; + + gossip_debug(GOSSIP_BUFMAP_DEBUG, + "pvfs_bufmap_initialize: called (ptr (" + "%p) sz (%d) cnt(%d).\n", + user_desc->ptr, + user_desc->size, + user_desc->count); + + /* + * sanity check alignment and size of buffer that caller wants to + * work with + */ + if (PAGE_ALIGN((unsigned long)user_desc->ptr) != + (unsigned long)user_desc->ptr) { + gossip_err("pvfs2 error: memory alignment (front). %p\n", + user_desc->ptr); + goto out; + } + + if (PAGE_ALIGN(((unsigned long)user_desc->ptr + user_desc->total_size)) + != (unsigned long)(user_desc->ptr + user_desc->total_size)) { + gossip_err("pvfs2 error: memory alignment (back).(%p + %d)\n", + user_desc->ptr, + user_desc->total_size); + goto out; + } + + if (user_desc->total_size != (user_desc->size * user_desc->count)) { + gossip_err("pvfs2 error: user provided an oddly sized buffer: (%d, %d, %d)\n", + user_desc->total_size, + user_desc->size, + user_desc->count); + goto out; + } + + if ((user_desc->size % PAGE_SIZE) != 0) { + gossip_err("pvfs2 error: bufmap size not page size divisible (%d).\n", + user_desc->size); + goto out; + } + + ret = -ENOMEM; + bufmap = pvfs2_bufmap_alloc(user_desc); + if (!bufmap) + goto out; + + ret = pvfs2_bufmap_map(bufmap, user_desc); + if (ret) + goto out_free_bufmap; + + + spin_lock(&pvfs2_bufmap_lock); + if (__pvfs2_bufmap) { + spin_unlock(&pvfs2_bufmap_lock); + gossip_err("pvfs2: error: bufmap already initialized.\n"); + ret = -EALREADY; + goto out_unmap_bufmap; + } + __pvfs2_bufmap = bufmap; + spin_unlock(&pvfs2_bufmap_lock); + + /* + * If there are operations in pvfs2_bufmap_init_waitq, wake them up. + * This scenario occurs when the client-core is restarted and I/O + * requests in the in-progress or waiting tables are restarted. I/O + * requests cannot be restarted until the shared memory system is + * completely re-initialized, so we put the I/O requests in this + * waitq until initialization has completed. NOTE: the I/O requests + * are also on a timer, so they don't wait forever just in case the + * client-core doesn't come back up. + */ + wake_up_interruptible(&pvfs2_bufmap_init_waitq); + + gossip_debug(GOSSIP_BUFMAP_DEBUG, + "pvfs_bufmap_initialize: exiting normally\n"); + return 0; + +out_unmap_bufmap: + pvfs2_bufmap_unmap(bufmap); +out_free_bufmap: + pvfs2_bufmap_free(bufmap); +out: + return ret; +} + +/* + * pvfs_bufmap_finalize() + * + * shuts down the mapped buffer interface and releases any resources + * associated with it + * + * no return value + */ +void pvfs_bufmap_finalize(void) +{ + gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs2_bufmap_finalize: called\n"); + BUG_ON(!__pvfs2_bufmap); + pvfs2_bufmap_unref(__pvfs2_bufmap); + gossip_debug(GOSSIP_BUFMAP_DEBUG, + "pvfs2_bufmap_finalize: exiting normally\n"); +} + +struct slot_args { + int slot_count; + int *slot_array; + spinlock_t *slot_lock; + wait_queue_head_t *slot_wq; +}; + +static int wait_for_a_slot(struct slot_args *slargs, int *buffer_index) +{ + int ret = -1; + int i = 0; + DECLARE_WAITQUEUE(my_wait, current); + + + add_wait_queue_exclusive(slargs->slot_wq, &my_wait); + + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + + /* + * check for available desc, slot_lock is the appropriate + * index_lock + */ + spin_lock(slargs->slot_lock); + for (i = 0; i < slargs->slot_count; i++) + if (slargs->slot_array[i] == 0) { + slargs->slot_array[i] = 1; + *buffer_index = i; + ret = 0; + break; + } + spin_unlock(slargs->slot_lock); + + /* if we acquired a buffer, then break out of while */ + if (ret == 0) + break; + + if (!signal_pending(current)) { + int timeout = + MSECS_TO_JIFFIES(1000 * slot_timeout_secs); + gossip_debug(GOSSIP_BUFMAP_DEBUG, + "[BUFMAP]: waiting %d " + "seconds for a slot\n", + slot_timeout_secs); + if (!schedule_timeout(timeout)) { + gossip_debug(GOSSIP_BUFMAP_DEBUG, + "*** wait_for_a_slot timed out\n"); + ret = -ETIMEDOUT; + break; + } + gossip_debug(GOSSIP_BUFMAP_DEBUG, + "[BUFMAP]: woken up by a slot becoming available.\n"); + continue; + } + + gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs2: %s interrupted.\n", + __func__); + ret = -EINTR; + break; + } + + set_current_state(TASK_RUNNING); + remove_wait_queue(slargs->slot_wq, &my_wait); + return ret; +} + +static void put_back_slot(struct slot_args *slargs, int buffer_index) +{ + /* slot_lock is the appropriate index_lock */ + spin_lock(slargs->slot_lock); + if (buffer_index < 0 || buffer_index >= slargs->slot_count) { + spin_unlock(slargs->slot_lock); + return; + } + + /* put the desc back on the queue */ + slargs->slot_array[buffer_index] = 0; + spin_unlock(slargs->slot_lock); + + /* wake up anyone who may be sleeping on the queue */ + wake_up_interruptible(slargs->slot_wq); +} + +/* + * pvfs_bufmap_get() + * + * gets a free mapped buffer descriptor, will sleep until one becomes + * available if necessary + * + * returns 0 on success, -errno on failure + */ +int pvfs_bufmap_get(struct pvfs2_bufmap **mapp, int *buffer_index) +{ + struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref(); + struct slot_args slargs; + int ret; + + if (!bufmap) { + gossip_err("pvfs2: please confirm that pvfs2-client daemon is running.\n"); + return -EIO; + } + + slargs.slot_count = bufmap->desc_count; + slargs.slot_array = bufmap->buffer_index_array; + slargs.slot_lock = &bufmap->buffer_index_lock; + slargs.slot_wq = &bufmap_waitq; + ret = wait_for_a_slot(&slargs, buffer_index); + if (ret) + pvfs2_bufmap_unref(bufmap); + *mapp = bufmap; + return ret; +} + +/* + * pvfs_bufmap_put() + * + * returns a mapped buffer descriptor to the collection + * + * no return value + */ +void pvfs_bufmap_put(struct pvfs2_bufmap *bufmap, int buffer_index) +{ + struct slot_args slargs; + + slargs.slot_count = bufmap->desc_count; + slargs.slot_array = bufmap->buffer_index_array; + slargs.slot_lock = &bufmap->buffer_index_lock; + slargs.slot_wq = &bufmap_waitq; + put_back_slot(&slargs, buffer_index); + pvfs2_bufmap_unref(bufmap); +} + +/* + * readdir_index_get() + * + * gets a free descriptor, will sleep until one becomes + * available if necessary. + * Although the readdir buffers are not mapped into kernel space + * we could do that at a later point of time. Regardless, these + * indices are used by the client-core. + * + * returns 0 on success, -errno on failure + */ +int readdir_index_get(struct pvfs2_bufmap **mapp, int *buffer_index) +{ + struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref(); + struct slot_args slargs; + int ret; + + if (!bufmap) { + gossip_err("pvfs2: please confirm that pvfs2-client daemon is running.\n"); + return -EIO; + } + + slargs.slot_count = PVFS2_READDIR_DEFAULT_DESC_COUNT; + slargs.slot_array = bufmap->readdir_index_array; + slargs.slot_lock = &bufmap->readdir_index_lock; + slargs.slot_wq = &readdir_waitq; + ret = wait_for_a_slot(&slargs, buffer_index); + if (ret) + pvfs2_bufmap_unref(bufmap); + *mapp = bufmap; + return ret; +} + +void readdir_index_put(struct pvfs2_bufmap *bufmap, int buffer_index) +{ + struct slot_args slargs; + + slargs.slot_count = PVFS2_READDIR_DEFAULT_DESC_COUNT; + slargs.slot_array = bufmap->readdir_index_array; + slargs.slot_lock = &bufmap->readdir_index_lock; + slargs.slot_wq = &readdir_waitq; + put_back_slot(&slargs, buffer_index); + pvfs2_bufmap_unref(bufmap); +} + +/* + * pvfs_bufmap_copy_iovec_from_user() + * + * copies data from several user space address's in an iovec + * to a mapped buffer + * + * Note that the mapped buffer is a series of pages and therefore + * the copies have to be split by PAGE_SIZE bytes at a time. + * Note that this routine checks that summation of iov_len + * across all the elements of iov is equal to size. + * + * returns 0 on success, -errno on failure + */ +int pvfs_bufmap_copy_iovec_from_user(struct pvfs2_bufmap *bufmap, + int buffer_index, + const struct iovec *iov, + unsigned long nr_segs, + size_t size) +{ + size_t ret = 0; + size_t amt_copied = 0; + size_t cur_copy_size = 0; + unsigned int to_page_offset = 0; + unsigned int to_page_index = 0; + void *to_kaddr = NULL; + void __user *from_addr = NULL; + struct iovec *copied_iovec = NULL; + struct pvfs_bufmap_desc *to; + unsigned int seg; + char *tmp_printer = NULL; + int tmp_int = 0; + + gossip_debug(GOSSIP_BUFMAP_DEBUG, + "pvfs_bufmap_copy_iovec_from_user: index %d, " + "size %zd\n", + buffer_index, + size); + + to = &bufmap->desc_array[buffer_index]; + + /* + * copy the passed in iovec so that we can change some of its fields + */ + copied_iovec = kmalloc_array(nr_segs, + sizeof(*copied_iovec), + PVFS2_BUFMAP_GFP_FLAGS); + if (copied_iovec == NULL) + return -ENOMEM; + + memcpy(copied_iovec, iov, nr_segs * sizeof(*copied_iovec)); + /* + * Go through each segment in the iovec and make sure that + * the summation of iov_len matches the given size. + */ + for (seg = 0, amt_copied = 0; seg < nr_segs; seg++) + amt_copied += copied_iovec[seg].iov_len; + if (amt_copied != size) { + gossip_err( + "pvfs2_bufmap_copy_iovec_from_user: computed total (" + "%zd) is not equal to (%zd)\n", + amt_copied, + size); + kfree(copied_iovec); + return -EINVAL; + } + + to_page_index = 0; + to_page_offset = 0; + amt_copied = 0; + seg = 0; + /* + * Go through each segment in the iovec and copy its + * buffer into the mapped buffer one page at a time though + */ + while (amt_copied < size) { + struct iovec *iv = &copied_iovec[seg]; + int inc_to_page_index; + + if (iv->iov_len < (PAGE_SIZE - to_page_offset)) { + cur_copy_size = + PVFS_util_min(iv->iov_len, size - amt_copied); + seg++; + from_addr = iv->iov_base; + inc_to_page_index = 0; + } else if (iv->iov_len == (PAGE_SIZE - to_page_offset)) { + cur_copy_size = + PVFS_util_min(iv->iov_len, size - amt_copied); + seg++; + from_addr = iv->iov_base; + inc_to_page_index = 1; + } else { + cur_copy_size = + PVFS_util_min(PAGE_SIZE - to_page_offset, + size - amt_copied); + from_addr = iv->iov_base; + iv->iov_base += cur_copy_size; + iv->iov_len -= cur_copy_size; + inc_to_page_index = 1; + } + to_kaddr = pvfs2_kmap(to->page_array[to_page_index]); + ret = + copy_from_user(to_kaddr + to_page_offset, + from_addr, + cur_copy_size); + if (!PageReserved(to->page_array[to_page_index])) + SetPageDirty(to->page_array[to_page_index]); + + if (!tmp_printer) { + tmp_printer = (char *)(to_kaddr + to_page_offset); + tmp_int += tmp_printer[0]; + gossip_debug(GOSSIP_BUFMAP_DEBUG, + "First character (integer value) in pvfs_bufmap_copy_from_user: %d\n", + tmp_int); + } + + pvfs2_kunmap(to->page_array[to_page_index]); + if (ret) { + gossip_err("Failed to copy data from user space\n"); + kfree(copied_iovec); + return -EFAULT; + } + + amt_copied += cur_copy_size; + if (inc_to_page_index) { + to_page_offset = 0; + to_page_index++; + } else { + to_page_offset += cur_copy_size; + } + } + kfree(copied_iovec); + return 0; +} + +/* + * pvfs_bufmap_copy_iovec_from_kernel() + * + * copies data from several kernel space address's in an iovec + * to a mapped buffer + * + * Note that the mapped buffer is a series of pages and therefore + * the copies have to be split by PAGE_SIZE bytes at a time. + * Note that this routine checks that summation of iov_len + * across all the elements of iov is equal to size. + * + * returns 0 on success, -errno on failure + */ +int pvfs_bufmap_copy_iovec_from_kernel(struct pvfs2_bufmap *bufmap, + int buffer_index, const struct iovec *iov, + unsigned long nr_segs, size_t size) +{ + size_t amt_copied = 0; + size_t cur_copy_size = 0; + int to_page_index = 0; + void *to_kaddr = NULL; + void *from_kaddr = NULL; + struct iovec *copied_iovec = NULL; + struct pvfs_bufmap_desc *to; + unsigned int seg; + unsigned to_page_offset = 0; + + gossip_debug(GOSSIP_BUFMAP_DEBUG, + "pvfs_bufmap_copy_iovec_from_kernel: index %d, " + "size %zd\n", + buffer_index, + size); + + to = &bufmap->desc_array[buffer_index]; + /* + * copy the passed in iovec so that we can change some of its fields + */ + copied_iovec = kmalloc_array(nr_segs, + sizeof(*copied_iovec), + PVFS2_BUFMAP_GFP_FLAGS); + if (copied_iovec == NULL) + return -ENOMEM; + + memcpy(copied_iovec, iov, nr_segs * sizeof(*copied_iovec)); + /* + * Go through each segment in the iovec and make sure that + * the summation of iov_len matches the given size. + */ + for (seg = 0, amt_copied = 0; seg < nr_segs; seg++) + amt_copied += copied_iovec[seg].iov_len; + if (amt_copied != size) { + gossip_err("pvfs2_bufmap_copy_iovec_from_kernel: computed total(%zd) is not equal to (%zd)\n", + amt_copied, + size); + kfree(copied_iovec); + return -EINVAL; + } + + to_page_index = 0; + amt_copied = 0; + seg = 0; + to_page_offset = 0; + /* + * Go through each segment in the iovec and copy its + * buffer into the mapped buffer one page at a time though + */ + while (amt_copied < size) { + struct iovec *iv = &copied_iovec[seg]; + int inc_to_page_index; + + if (iv->iov_len < (PAGE_SIZE - to_page_offset)) { + cur_copy_size = + PVFS_util_min(iv->iov_len, size - amt_copied); + seg++; + from_kaddr = iv->iov_base; + inc_to_page_index = 0; + } else if (iv->iov_len == (PAGE_SIZE - to_page_offset)) { + cur_copy_size = + PVFS_util_min(iv->iov_len, size - amt_copied); + seg++; + from_kaddr = iv->iov_base; + inc_to_page_index = 1; + } else { + cur_copy_size = + PVFS_util_min(PAGE_SIZE - to_page_offset, + size - amt_copied); + from_kaddr = iv->iov_base; + iv->iov_base += cur_copy_size; + iv->iov_len -= cur_copy_size; + inc_to_page_index = 1; + } + to_kaddr = pvfs2_kmap(to->page_array[to_page_index]); + memcpy(to_kaddr + to_page_offset, from_kaddr, cur_copy_size); + if (!PageReserved(to->page_array[to_page_index])) + SetPageDirty(to->page_array[to_page_index]); + pvfs2_kunmap(to->page_array[to_page_index]); + amt_copied += cur_copy_size; + if (inc_to_page_index) { + to_page_offset = 0; + to_page_index++; + } else { + to_page_offset += cur_copy_size; + } + } + kfree(copied_iovec); + return 0; +} + +/* + * pvfs_bufmap_copy_to_user_iovec() + * + * copies data to several user space address's in an iovec + * from a mapped buffer + * + * returns 0 on success, -errno on failure + */ +int pvfs_bufmap_copy_to_user_iovec(struct pvfs2_bufmap *bufmap, + int buffer_index, const struct iovec *iov, + unsigned long nr_segs, size_t size) +{ + size_t ret = 0; + size_t amt_copied = 0; + size_t cur_copy_size = 0; + int from_page_index = 0; + void *from_kaddr = NULL; + void __user *to_addr = NULL; + struct iovec *copied_iovec = NULL; + struct pvfs_bufmap_desc *from; + unsigned int seg; + unsigned from_page_offset = 0; + char *tmp_printer = NULL; + int tmp_int = 0; + + gossip_debug(GOSSIP_BUFMAP_DEBUG, + "pvfs_bufmap_copy_to_user_iovec: index %d, size %zd\n", + buffer_index, + size); + + from = &bufmap->desc_array[buffer_index]; + /* + * copy the passed in iovec so that we can change some of its fields + */ + copied_iovec = kmalloc_array(nr_segs, + sizeof(*copied_iovec), + PVFS2_BUFMAP_GFP_FLAGS); + if (copied_iovec == NULL) + return -ENOMEM; + + memcpy(copied_iovec, iov, nr_segs * sizeof(*copied_iovec)); + /* + * Go through each segment in the iovec and make sure that + * the summation of iov_len is greater than the given size. + */ + for (seg = 0, amt_copied = 0; seg < nr_segs; seg++) + amt_copied += copied_iovec[seg].iov_len; + if (amt_copied < size) { + gossip_err("pvfs2_bufmap_copy_to_user_iovec: computed total (%zd) is less than (%zd)\n", + amt_copied, + size); + kfree(copied_iovec); + return -EINVAL; + } + + from_page_index = 0; + amt_copied = 0; + seg = 0; + from_page_offset = 0; + /* + * Go through each segment in the iovec and copy from the mapper buffer, + * but make sure that we do so one page at a time. + */ + while (amt_copied < size) { + struct iovec *iv = &copied_iovec[seg]; + int inc_from_page_index; + + if (iv->iov_len < (PAGE_SIZE - from_page_offset)) { + cur_copy_size = + PVFS_util_min(iv->iov_len, size - amt_copied); + seg++; + to_addr = iv->iov_base; + inc_from_page_index = 0; + } else if (iv->iov_len == (PAGE_SIZE - from_page_offset)) { + cur_copy_size = + PVFS_util_min(iv->iov_len, size - amt_copied); + seg++; + to_addr = iv->iov_base; + inc_from_page_index = 1; + } else { + cur_copy_size = + PVFS_util_min(PAGE_SIZE - from_page_offset, + size - amt_copied); + to_addr = iv->iov_base; + iv->iov_base += cur_copy_size; + iv->iov_len -= cur_copy_size; + inc_from_page_index = 1; + } + from_kaddr = pvfs2_kmap(from->page_array[from_page_index]); + if (!tmp_printer) { + tmp_printer = (char *)(from_kaddr + from_page_offset); + tmp_int += tmp_printer[0]; + gossip_debug(GOSSIP_BUFMAP_DEBUG, + "First character (integer value) in pvfs_bufmap_copy_to_user_iovec: %d\n", + tmp_int); + } + ret = + copy_to_user(to_addr, + from_kaddr + from_page_offset, + cur_copy_size); + pvfs2_kunmap(from->page_array[from_page_index]); + if (ret) { + gossip_err("Failed to copy data to user space\n"); + kfree(copied_iovec); + return -EFAULT; + } + + amt_copied += cur_copy_size; + if (inc_from_page_index) { + from_page_offset = 0; + from_page_index++; + } else { + from_page_offset += cur_copy_size; + } + } + kfree(copied_iovec); + return 0; +} + +/* + * pvfs_bufmap_copy_to_kernel_iovec() + * + * copies data to several kernel space address's in an iovec + * from a mapped buffer + * + * returns 0 on success, -errno on failure + */ +int pvfs_bufmap_copy_to_kernel_iovec(struct pvfs2_bufmap *bufmap, + int buffer_index, const struct iovec *iov, + unsigned long nr_segs, size_t size) +{ + size_t amt_copied = 0; + size_t cur_copy_size = 0; + int from_page_index = 0; + void *from_kaddr = NULL; + void *to_kaddr = NULL; + struct iovec *copied_iovec = NULL; + struct pvfs_bufmap_desc *from; + unsigned int seg; + unsigned int from_page_offset = 0; + + gossip_debug(GOSSIP_BUFMAP_DEBUG, + "pvfs_bufmap_copy_to_kernel_iovec: index %d, size %zd\n", + buffer_index, + size); + + from = &bufmap->desc_array[buffer_index]; + /* + * copy the passed in iovec so that we can change some of its fields + */ + copied_iovec = kmalloc_array(nr_segs, + sizeof(*copied_iovec), + PVFS2_BUFMAP_GFP_FLAGS); + if (copied_iovec == NULL) + return -ENOMEM; + + memcpy(copied_iovec, iov, nr_segs * sizeof(*copied_iovec)); + /* + * Go through each segment in the iovec and make sure that + * the summation of iov_len is greater than the given size. + */ + for (seg = 0, amt_copied = 0; seg < nr_segs; seg++) + amt_copied += copied_iovec[seg].iov_len; + + if (amt_copied < size) { + gossip_err("pvfs2_bufmap_copy_to_kernel_iovec: computed total (%zd) is less than (%zd)\n", + amt_copied, + size); + kfree(copied_iovec); + return -EINVAL; + } + + from_page_index = 0; + amt_copied = 0; + seg = 0; + from_page_offset = 0; + /* + * Go through each segment in the iovec and copy from the mapper buffer, + * but make sure that we do so one page at a time. + */ + while (amt_copied < size) { + struct iovec *iv = &copied_iovec[seg]; + int inc_from_page_index; + + if (iv->iov_len < (PAGE_SIZE - from_page_offset)) { + cur_copy_size = + PVFS_util_min(iv->iov_len, size - amt_copied); + seg++; + to_kaddr = iv->iov_base; + inc_from_page_index = 0; + } else if (iv->iov_len == (PAGE_SIZE - from_page_offset)) { + cur_copy_size = + PVFS_util_min(iv->iov_len, size - amt_copied); + seg++; + to_kaddr = iv->iov_base; + inc_from_page_index = 1; + } else { + cur_copy_size = + PVFS_util_min(PAGE_SIZE - from_page_offset, + size - amt_copied); + to_kaddr = iv->iov_base; + iv->iov_base += cur_copy_size; + iv->iov_len -= cur_copy_size; + inc_from_page_index = 1; + } + from_kaddr = pvfs2_kmap(from->page_array[from_page_index]); + memcpy(to_kaddr, from_kaddr + from_page_offset, cur_copy_size); + pvfs2_kunmap(from->page_array[from_page_index]); + amt_copied += cur_copy_size; + if (inc_from_page_index) { + from_page_offset = 0; + from_page_index++; + } else { + from_page_offset += cur_copy_size; + } + } + kfree(copied_iovec); + return 0; +} diff --git a/fs/orangefs/pvfs2-cache.c b/fs/orangefs/pvfs2-cache.c new file mode 100644 index 000000000000..15251884ba4a --- /dev/null +++ b/fs/orangefs/pvfs2-cache.c @@ -0,0 +1,260 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#include "protocol.h" +#include "pvfs2-kernel.h" + +/* tags assigned to kernel upcall operations */ +static __u64 next_tag_value; +static DEFINE_SPINLOCK(next_tag_value_lock); + +/* the pvfs2 memory caches */ + +/* a cache for pvfs2 upcall/downcall operations */ +static struct kmem_cache *op_cache; + +/* a cache for device (/dev/pvfs2-req) communication */ +static struct kmem_cache *dev_req_cache; + +/* a cache for pvfs2_kiocb objects (i.e pvfs2 iocb structures ) */ +static struct kmem_cache *pvfs2_kiocb_cache; + +int op_cache_initialize(void) +{ + op_cache = kmem_cache_create("pvfs2_op_cache", + sizeof(struct pvfs2_kernel_op_s), + 0, + PVFS2_CACHE_CREATE_FLAGS, + NULL); + + if (!op_cache) { + gossip_err("Cannot create pvfs2_op_cache\n"); + return -ENOMEM; + } + + /* initialize our atomic tag counter */ + spin_lock(&next_tag_value_lock); + next_tag_value = 100; + spin_unlock(&next_tag_value_lock); + return 0; +} + +int op_cache_finalize(void) +{ + kmem_cache_destroy(op_cache); + return 0; +} + +char *get_opname_string(struct pvfs2_kernel_op_s *new_op) +{ + if (new_op) { + __s32 type = new_op->upcall.type; + + if (type == PVFS2_VFS_OP_FILE_IO) + return "OP_FILE_IO"; + else if (type == PVFS2_VFS_OP_LOOKUP) + return "OP_LOOKUP"; + else if (type == PVFS2_VFS_OP_CREATE) + return "OP_CREATE"; + else if (type == PVFS2_VFS_OP_GETATTR) + return "OP_GETATTR"; + else if (type == PVFS2_VFS_OP_REMOVE) + return "OP_REMOVE"; + else if (type == PVFS2_VFS_OP_MKDIR) + return "OP_MKDIR"; + else if (type == PVFS2_VFS_OP_READDIR) + return "OP_READDIR"; + else if (type == PVFS2_VFS_OP_READDIRPLUS) + return "OP_READDIRPLUS"; + else if (type == PVFS2_VFS_OP_SETATTR) + return "OP_SETATTR"; + else if (type == PVFS2_VFS_OP_SYMLINK) + return "OP_SYMLINK"; + else if (type == PVFS2_VFS_OP_RENAME) + return "OP_RENAME"; + else if (type == PVFS2_VFS_OP_STATFS) + return "OP_STATFS"; + else if (type == PVFS2_VFS_OP_TRUNCATE) + return "OP_TRUNCATE"; + else if (type == PVFS2_VFS_OP_MMAP_RA_FLUSH) + return "OP_MMAP_RA_FLUSH"; + else if (type == PVFS2_VFS_OP_FS_MOUNT) + return "OP_FS_MOUNT"; + else if (type == PVFS2_VFS_OP_FS_UMOUNT) + return "OP_FS_UMOUNT"; + else if (type == PVFS2_VFS_OP_GETXATTR) + return "OP_GETXATTR"; + else if (type == PVFS2_VFS_OP_SETXATTR) + return "OP_SETXATTR"; + else if (type == PVFS2_VFS_OP_LISTXATTR) + return "OP_LISTXATTR"; + else if (type == PVFS2_VFS_OP_REMOVEXATTR) + return "OP_REMOVEXATTR"; + else if (type == PVFS2_VFS_OP_PARAM) + return "OP_PARAM"; + else if (type == PVFS2_VFS_OP_PERF_COUNT) + return "OP_PERF_COUNT"; + else if (type == PVFS2_VFS_OP_CANCEL) + return "OP_CANCEL"; + else if (type == PVFS2_VFS_OP_FSYNC) + return "OP_FSYNC"; + else if (type == PVFS2_VFS_OP_FSKEY) + return "OP_FSKEY"; + else if (type == PVFS2_VFS_OP_FILE_IOX) + return "OP_FILE_IOX"; + } + return "OP_UNKNOWN?"; +} + +static struct pvfs2_kernel_op_s *op_alloc_common(__s32 op_linger, __s32 type) +{ + struct pvfs2_kernel_op_s *new_op = NULL; + + new_op = kmem_cache_alloc(op_cache, PVFS2_CACHE_ALLOC_FLAGS); + if (new_op) { + memset(new_op, 0, sizeof(struct pvfs2_kernel_op_s)); + + INIT_LIST_HEAD(&new_op->list); + spin_lock_init(&new_op->lock); + init_waitqueue_head(&new_op->waitq); + + init_waitqueue_head(&new_op->io_completion_waitq); + atomic_set(&new_op->aio_ref_count, 0); + + pvfs2_op_initialize(new_op); + + /* initialize the op specific tag and upcall credentials */ + spin_lock(&next_tag_value_lock); + new_op->tag = next_tag_value++; + if (next_tag_value == 0) + next_tag_value = 100; + spin_unlock(&next_tag_value_lock); + new_op->upcall.type = type; + new_op->attempts = 0; + gossip_debug(GOSSIP_CACHE_DEBUG, + "Alloced OP (%p: %llu %s)\n", + new_op, + llu(new_op->tag), + get_opname_string(new_op)); + + new_op->upcall.uid = from_kuid(current_user_ns(), + current_fsuid()); + + new_op->upcall.gid = from_kgid(current_user_ns(), + current_fsgid()); + + new_op->op_linger = new_op->op_linger_tmp = op_linger; + } else { + gossip_err("op_alloc: kmem_cache_alloc failed!\n"); + } + return new_op; +} + +struct pvfs2_kernel_op_s *op_alloc(__s32 type) +{ + return op_alloc_common(1, type); +} + +struct pvfs2_kernel_op_s *op_alloc_trailer(__s32 type) +{ + return op_alloc_common(2, type); +} + +void op_release(struct pvfs2_kernel_op_s *pvfs2_op) +{ + if (pvfs2_op) { + gossip_debug(GOSSIP_CACHE_DEBUG, + "Releasing OP (%p: %llu)\n", + pvfs2_op, + llu(pvfs2_op->tag)); + pvfs2_op_initialize(pvfs2_op); + kmem_cache_free(op_cache, pvfs2_op); + } else { + gossip_err("NULL pointer in op_release\n"); + } +} + +int dev_req_cache_initialize(void) +{ + dev_req_cache = kmem_cache_create("pvfs2_devreqcache", + MAX_ALIGNED_DEV_REQ_DOWNSIZE, + 0, + PVFS2_CACHE_CREATE_FLAGS, + NULL); + + if (!dev_req_cache) { + gossip_err("Cannot create pvfs2_dev_req_cache\n"); + return -ENOMEM; + } + return 0; +} + +int dev_req_cache_finalize(void) +{ + kmem_cache_destroy(dev_req_cache); + return 0; +} + +void *dev_req_alloc(void) +{ + void *buffer; + + buffer = kmem_cache_alloc(dev_req_cache, PVFS2_CACHE_ALLOC_FLAGS); + if (buffer == NULL) + gossip_err("Failed to allocate from dev_req_cache\n"); + else + memset(buffer, 0, sizeof(MAX_ALIGNED_DEV_REQ_DOWNSIZE)); + return buffer; +} + +void dev_req_release(void *buffer) +{ + if (buffer) + kmem_cache_free(dev_req_cache, buffer); + else + gossip_err("NULL pointer passed to dev_req_release\n"); +} + +int kiocb_cache_initialize(void) +{ + pvfs2_kiocb_cache = kmem_cache_create("pvfs2_kiocbcache", + sizeof(struct pvfs2_kiocb_s), + 0, + PVFS2_CACHE_CREATE_FLAGS, + NULL); + + if (!pvfs2_kiocb_cache) { + gossip_err("Cannot create pvfs2_kiocb_cache!\n"); + return -ENOMEM; + } + return 0; +} + +int kiocb_cache_finalize(void) +{ + kmem_cache_destroy(pvfs2_kiocb_cache); + return 0; +} + +struct pvfs2_kiocb_s *kiocb_alloc(void) +{ + struct pvfs2_kiocb_s *x = NULL; + + x = kmem_cache_alloc(pvfs2_kiocb_cache, PVFS2_CACHE_ALLOC_FLAGS); + if (x == NULL) + gossip_err("kiocb_alloc: kmem_cache_alloc failed!\n"); + else + memset(x, 0, sizeof(struct pvfs2_kiocb_s)); + return x; +} + +void kiocb_release(struct pvfs2_kiocb_s *x) +{ + if (x) + kmem_cache_free(pvfs2_kiocb_cache, x); + else + gossip_err("kiocb_release: kmem_cache_free NULL pointer!\n"); +} diff --git a/fs/orangefs/pvfs2-debugfs.c b/fs/orangefs/pvfs2-debugfs.c new file mode 100644 index 000000000000..8d118da9b88f --- /dev/null +++ b/fs/orangefs/pvfs2-debugfs.c @@ -0,0 +1,458 @@ +/* + * What: /sys/kernel/debug/orangefs/debug-help + * Date: June 2015 + * Contact: Mike Marshall + * Description: + * List of client and kernel debug keywords. + * + * + * What: /sys/kernel/debug/orangefs/client-debug + * Date: June 2015 + * Contact: Mike Marshall + * Description: + * Debug setting for "the client", the userspace + * helper for the kernel module. + * + * + * What: /sys/kernel/debug/orangefs/kernel-debug + * Date: June 2015 + * Contact: Mike Marshall + * Description: + * Debug setting for the orangefs kernel module. + * + * Any of the keywords, or comma-separated lists + * of keywords, from debug-help can be catted to + * client-debug or kernel-debug. + * + * "none", "all" and "verbose" are special keywords + * for client-debug. Setting client-debug to "all" + * is kind of like trying to drink water from a + * fire hose, "verbose" triggers most of the same + * output except for the constant flow of output + * from the main wait loop. + * + * "none" and "all" are similar settings for kernel-debug + * no need for a "verbose". + */ +#include +#include + +#include + +#include "pvfs2-debugfs.h" +#include "protocol.h" +#include "pvfs2-kernel.h" + +static int orangefs_debug_disabled = 1; + +static int orangefs_debug_help_open(struct inode *, struct file *); + +const struct file_operations debug_help_fops = { + .open = orangefs_debug_help_open, + .read = seq_read, + .release = seq_release, + .llseek = seq_lseek, +}; + +static void *help_start(struct seq_file *, loff_t *); +static void *help_next(struct seq_file *, void *, loff_t *); +static void help_stop(struct seq_file *, void *); +static int help_show(struct seq_file *, void *); + +static const struct seq_operations help_debug_ops = { + .start = help_start, + .next = help_next, + .stop = help_stop, + .show = help_show, +}; + +/* + * Used to protect data in ORANGEFS_KMOD_DEBUG_FILE and + * ORANGEFS_KMOD_DEBUG_FILE. + */ +DEFINE_MUTEX(orangefs_debug_lock); + +int orangefs_debug_open(struct inode *, struct file *); + +static ssize_t orangefs_debug_read(struct file *, + char __user *, + size_t, + loff_t *); + +static ssize_t orangefs_debug_write(struct file *, + const char __user *, + size_t, + loff_t *); + +static const struct file_operations kernel_debug_fops = { + .open = orangefs_debug_open, + .read = orangefs_debug_read, + .write = orangefs_debug_write, + .llseek = generic_file_llseek, +}; + +/* + * initialize kmod debug operations, create orangefs debugfs dir and + * ORANGEFS_KMOD_DEBUG_HELP_FILE. + */ +int pvfs2_debugfs_init(void) +{ + + int rc = -ENOMEM; + + debug_dir = debugfs_create_dir("orangefs", NULL); + if (!debug_dir) + goto out; + + help_file_dentry = debugfs_create_file(ORANGEFS_KMOD_DEBUG_HELP_FILE, + 0444, + debug_dir, + debug_help_string, + &debug_help_fops); + if (!help_file_dentry) + goto out; + + orangefs_debug_disabled = 0; + rc = 0; + +out: + if (rc) + pvfs2_debugfs_cleanup(); + + return rc; +} + +void pvfs2_debugfs_cleanup(void) +{ + debugfs_remove_recursive(debug_dir); +} + +/* open ORANGEFS_KMOD_DEBUG_HELP_FILE */ +static int orangefs_debug_help_open(struct inode *inode, struct file *file) +{ + int rc = -ENODEV; + int ret; + + gossip_debug(GOSSIP_DEBUGFS_DEBUG, + "orangefs_debug_help_open: start\n"); + + if (orangefs_debug_disabled) + goto out; + + ret = seq_open(file, &help_debug_ops); + if (ret) + goto out; + + ((struct seq_file *)(file->private_data))->private = inode->i_private; + + rc = 0; + +out: + gossip_debug(GOSSIP_DEBUGFS_DEBUG, + "orangefs_debug_help_open: rc:%d:\n", + rc); + return rc; +} + +/* + * I think start always gets called again after stop. Start + * needs to return NULL when it is done. The whole "payload" + * in this case is a single (long) string, so by the second + * time we get to start (pos = 1), we're done. + */ +static void *help_start(struct seq_file *m, loff_t *pos) +{ + void *payload = NULL; + + gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_start: start\n"); + + if (*pos == 0) + payload = m->private; + + return payload; +} + +static void *help_next(struct seq_file *m, void *v, loff_t *pos) +{ + gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_next: start\n"); + + return NULL; +} + +static void help_stop(struct seq_file *m, void *p) +{ + gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_stop: start\n"); +} + +static int help_show(struct seq_file *m, void *v) +{ + gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_show: start\n"); + + seq_puts(m, v); + + return 0; +} + +/* + * initialize the kernel-debug file. + */ +int pvfs2_kernel_debug_init(void) +{ + + int rc = -ENOMEM; + struct dentry *ret; + char *k_buffer = NULL; + + gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: start\n", __func__); + + k_buffer = kzalloc(PVFS2_MAX_DEBUG_STRING_LEN, GFP_KERNEL); + if (!k_buffer) + goto out; + + if (strlen(kernel_debug_string) + 1 < PVFS2_MAX_DEBUG_STRING_LEN) { + strcpy(k_buffer, kernel_debug_string); + strcat(k_buffer, "\n"); + } else { + strcpy(k_buffer, "none\n"); + pr_info("%s: overflow 1!\n", __func__); + } + + ret = debugfs_create_file(ORANGEFS_KMOD_DEBUG_FILE, + 0444, + debug_dir, + k_buffer, + &kernel_debug_fops); + if (!ret) { + pr_info("%s: failed to create %s.\n", + __func__, + ORANGEFS_KMOD_DEBUG_FILE); + goto out; + } + + rc = 0; + +out: + if (rc) + pvfs2_debugfs_cleanup(); + + gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: rc:%d:\n", __func__, rc); + return rc; +} + +/* + * initialize the client-debug file. + */ +int pvfs2_client_debug_init(void) +{ + + int rc = -ENOMEM; + char *c_buffer = NULL; + + gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: start\n", __func__); + + c_buffer = kzalloc(PVFS2_MAX_DEBUG_STRING_LEN, GFP_KERNEL); + if (!c_buffer) + goto out; + + if (strlen(client_debug_string) + 1 < PVFS2_MAX_DEBUG_STRING_LEN) { + strcpy(c_buffer, client_debug_string); + strcat(c_buffer, "\n"); + } else { + strcpy(c_buffer, "none\n"); + pr_info("%s: overflow! 2\n", __func__); + } + + client_debug_dentry = debugfs_create_file(ORANGEFS_CLIENT_DEBUG_FILE, + 0444, + debug_dir, + c_buffer, + &kernel_debug_fops); + if (!client_debug_dentry) { + pr_info("%s: failed to create %s.\n", + __func__, + ORANGEFS_CLIENT_DEBUG_FILE); + goto out; + } + + rc = 0; + +out: + if (rc) + pvfs2_debugfs_cleanup(); + + gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: rc:%d:\n", __func__, rc); + return rc; +} + +/* open ORANGEFS_KMOD_DEBUG_FILE or ORANGEFS_CLIENT_DEBUG_FILE.*/ +int orangefs_debug_open(struct inode *inode, struct file *file) +{ + int rc = -ENODEV; + + gossip_debug(GOSSIP_DEBUGFS_DEBUG, + "%s: orangefs_debug_disabled: %d\n", + __func__, + orangefs_debug_disabled); + + if (orangefs_debug_disabled) + goto out; + + rc = 0; + mutex_lock(&orangefs_debug_lock); + file->private_data = inode->i_private; + mutex_unlock(&orangefs_debug_lock); + +out: + gossip_debug(GOSSIP_DEBUGFS_DEBUG, + "orangefs_debug_open: rc: %d\n", + rc); + return rc; +} + +static ssize_t orangefs_debug_read(struct file *file, + char __user *ubuf, + size_t count, + loff_t *ppos) +{ + char *buf; + int sprintf_ret; + ssize_t read_ret = -ENOMEM; + + gossip_debug(GOSSIP_DEBUGFS_DEBUG, "orangefs_debug_read: start\n"); + + buf = kmalloc(PVFS2_MAX_DEBUG_STRING_LEN, GFP_KERNEL); + if (!buf) + goto out; + + mutex_lock(&orangefs_debug_lock); + sprintf_ret = sprintf(buf, "%s", (char *)file->private_data); + mutex_unlock(&orangefs_debug_lock); + + read_ret = simple_read_from_buffer(ubuf, count, ppos, buf, sprintf_ret); + + kfree(buf); + +out: + gossip_debug(GOSSIP_DEBUGFS_DEBUG, + "orangefs_debug_read: ret: %zu\n", + read_ret); + + return read_ret; +} + +static ssize_t orangefs_debug_write(struct file *file, + const char __user *ubuf, + size_t count, + loff_t *ppos) +{ + char *buf; + int rc = -EFAULT; + size_t silly = 0; + char *debug_string; + struct pvfs2_kernel_op_s *new_op = NULL; + struct client_debug_mask c_mask = { NULL, 0, 0 }; + + gossip_debug(GOSSIP_DEBUGFS_DEBUG, + "orangefs_debug_write: %s\n", + file->f_path.dentry->d_name.name); + + /* + * Thwart users who try to jamb a ridiculous number + * of bytes into the debug file... + */ + if (count > PVFS2_MAX_DEBUG_STRING_LEN + 1) { + silly = count; + count = PVFS2_MAX_DEBUG_STRING_LEN + 1; + } + + buf = kmalloc(PVFS2_MAX_DEBUG_STRING_LEN, GFP_KERNEL); + if (!buf) + goto out; + memset(buf, 0, PVFS2_MAX_DEBUG_STRING_LEN); + + if (copy_from_user(buf, ubuf, count - 1)) { + gossip_debug(GOSSIP_DEBUGFS_DEBUG, + "%s: copy_from_user failed!\n", + __func__); + goto out; + } + + /* + * Map the keyword string from userspace into a valid debug mask. + * The mapping process involves mapping the human-inputted string + * into a valid mask, and then rebuilding the string from the + * verified valid mask. + * + * A service operation is required to set a new client-side + * debug mask. + */ + if (!strcmp(file->f_path.dentry->d_name.name, + ORANGEFS_KMOD_DEBUG_FILE)) { + debug_string_to_mask(buf, &gossip_debug_mask, 0); + debug_mask_to_string(&gossip_debug_mask, 0); + debug_string = kernel_debug_string; + gossip_debug(GOSSIP_DEBUGFS_DEBUG, + "New kernel debug string is %s\n", + kernel_debug_string); + } else { + /* Can't reset client debug mask if client is not running. */ + if (is_daemon_in_service()) { + pr_info("%s: Client not running :%d:\n", + __func__, + is_daemon_in_service()); + goto out; + } + + debug_string_to_mask(buf, &c_mask, 1); + debug_mask_to_string(&c_mask, 1); + debug_string = client_debug_string; + + new_op = op_alloc(PVFS2_VFS_OP_PARAM); + if (!new_op) { + pr_info("%s: op_alloc failed!\n", __func__); + goto out; + } + + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_TWO_MASK_VALUES; + new_op->upcall.req.param.type = PVFS2_PARAM_REQUEST_SET; + memset(new_op->upcall.req.param.s_value, + 0, + PVFS2_MAX_DEBUG_STRING_LEN); + sprintf(new_op->upcall.req.param.s_value, + "%llx %llx\n", + c_mask.mask1, + c_mask.mask2); + + /* service_operation returns 0 on success... */ + rc = service_operation(new_op, + "pvfs2_param", + PVFS2_OP_INTERRUPTIBLE); + + if (rc) + gossip_debug(GOSSIP_DEBUGFS_DEBUG, + "%s: service_operation failed! rc:%d:\n", + __func__, + rc); + + op_release(new_op); + } + + mutex_lock(&orangefs_debug_lock); + memset(file->f_inode->i_private, 0, PVFS2_MAX_DEBUG_STRING_LEN); + sprintf((char *)file->f_inode->i_private, "%s\n", debug_string); + mutex_unlock(&orangefs_debug_lock); + + *ppos += count; + if (silly) + rc = silly; + else + rc = count; + +out: + gossip_debug(GOSSIP_DEBUGFS_DEBUG, + "orangefs_debug_write: rc: %d\n", + rc); + kfree(buf); + return rc; +} diff --git a/fs/orangefs/pvfs2-mod.c b/fs/orangefs/pvfs2-mod.c new file mode 100644 index 000000000000..9cbc992731d6 --- /dev/null +++ b/fs/orangefs/pvfs2-mod.c @@ -0,0 +1,316 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * Changes by Acxiom Corporation to add proc file handler for pvfs2 client + * parameters, Copyright Acxiom Corporation, 2005. + * + * See COPYING in top-level directory. + */ + +#include "protocol.h" +#include "pvfs2-kernel.h" +#include "pvfs2-debugfs.h" +#include "pvfs2-sysfs.h" + +/* PVFS2_VERSION is a ./configure define */ +#ifndef PVFS2_VERSION +#define PVFS2_VERSION "Unknown" +#endif + +/* + * global variables declared here + */ + +/* array of client debug keyword/mask values */ +struct client_debug_mask *cdm_array; +int cdm_element_count; + +char kernel_debug_string[PVFS2_MAX_DEBUG_STRING_LEN] = "none"; +char client_debug_string[PVFS2_MAX_DEBUG_STRING_LEN]; +char client_debug_array_string[PVFS2_MAX_DEBUG_STRING_LEN]; + +char *debug_help_string; +int help_string_initialized; +struct dentry *help_file_dentry; +struct dentry *client_debug_dentry; +struct dentry *debug_dir; +int client_verbose_index; +int client_all_index; +struct pvfs2_stats g_pvfs2_stats; + +/* the size of the hash tables for ops in progress */ +int hash_table_size = 509; + +static ulong module_parm_debug_mask; +__u64 gossip_debug_mask; +struct client_debug_mask client_debug_mask = { NULL, 0, 0 }; +unsigned int kernel_mask_set_mod_init; /* implicitly false */ +int op_timeout_secs = PVFS2_DEFAULT_OP_TIMEOUT_SECS; +int slot_timeout_secs = PVFS2_DEFAULT_SLOT_TIMEOUT_SECS; +__u32 DEBUG_LINE = 50; + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("PVFS2 Development Team"); +MODULE_DESCRIPTION("The Linux Kernel VFS interface to PVFS2"); +MODULE_PARM_DESC(module_parm_debug_mask, "debugging level (see pvfs2-debug.h for values)"); +MODULE_PARM_DESC(op_timeout_secs, "Operation timeout in seconds"); +MODULE_PARM_DESC(slot_timeout_secs, "Slot timeout in seconds"); +MODULE_PARM_DESC(hash_table_size, + "size of hash table for operations in progress"); + +static struct file_system_type pvfs2_fs_type = { + .name = "pvfs2", + .mount = pvfs2_mount, + .kill_sb = pvfs2_kill_sb, + .owner = THIS_MODULE, +}; + +module_param(hash_table_size, int, 0); +module_param(module_parm_debug_mask, ulong, 0755); +module_param(op_timeout_secs, int, 0); +module_param(slot_timeout_secs, int, 0); + +/* synchronizes the request device file */ +struct mutex devreq_mutex; + +/* + blocks non-priority requests from being queued for servicing. this + could be used for protecting the request list data structure, but + for now it's only being used to stall the op addition to the request + list +*/ +struct mutex request_mutex; + +/* hash table for storing operations waiting for matching downcall */ +struct list_head *htable_ops_in_progress; +DEFINE_SPINLOCK(htable_ops_in_progress_lock); + +/* list for queueing upcall operations */ +LIST_HEAD(pvfs2_request_list); + +/* used to protect the above pvfs2_request_list */ +DEFINE_SPINLOCK(pvfs2_request_list_lock); + +/* used for incoming request notification */ +DECLARE_WAIT_QUEUE_HEAD(pvfs2_request_list_waitq); + +static int __init pvfs2_init(void) +{ + int ret = -1; + __u32 i = 0; + + /* convert input debug mask to a 64-bit unsigned integer */ + gossip_debug_mask = (unsigned long long) module_parm_debug_mask; + + /* + * set the kernel's gossip debug string; invalid mask values will + * be ignored. + */ + debug_mask_to_string(&gossip_debug_mask, 0); + + /* remove any invalid values from the mask */ + debug_string_to_mask(kernel_debug_string, &gossip_debug_mask, 0); + + /* + * if the mask has a non-zero value, then indicate that the mask + * was set when the kernel module was loaded. The pvfs2 dev ioctl + * command will look at this boolean to determine if the kernel's + * debug mask should be overwritten when the client-core is started. + */ + if (gossip_debug_mask != 0) + kernel_mask_set_mod_init = true; + + /* print information message to the system log */ + pr_info("pvfs2: pvfs2_init called with debug mask: :%s: :%llx:\n", + kernel_debug_string, + (unsigned long long)gossip_debug_mask); + + ret = bdi_init(&pvfs2_backing_dev_info); + + if (ret) + return ret; + + if (op_timeout_secs < 0) + op_timeout_secs = 0; + + if (slot_timeout_secs < 0) + slot_timeout_secs = 0; + + /* initialize global book keeping data structures */ + ret = op_cache_initialize(); + if (ret < 0) + goto err; + + ret = dev_req_cache_initialize(); + if (ret < 0) + goto cleanup_op; + + ret = pvfs2_inode_cache_initialize(); + if (ret < 0) + goto cleanup_req; + + ret = kiocb_cache_initialize(); + if (ret < 0) + goto cleanup_inode; + + /* Initialize the pvfsdev subsystem. */ + ret = pvfs2_dev_init(); + if (ret < 0) { + gossip_err("pvfs2: could not initialize device subsystem %d!\n", + ret); + goto cleanup_kiocb; + } + + mutex_init(&devreq_mutex); + mutex_init(&request_mutex); + + htable_ops_in_progress = + kcalloc(hash_table_size, sizeof(struct list_head), GFP_KERNEL); + if (!htable_ops_in_progress) { + gossip_err("Failed to initialize op hashtable"); + ret = -ENOMEM; + goto cleanup_device; + } + + /* initialize a doubly linked at each hash table index */ + for (i = 0; i < hash_table_size; i++) + INIT_LIST_HEAD(&htable_ops_in_progress[i]); + + ret = fsid_key_table_initialize(); + if (ret < 0) + goto cleanup_progress_table; + + /* + * Build the contents of /sys/kernel/debug/orangefs/debug-help + * from the keywords in the kernel keyword/mask array. + * + * The keywords in the client keyword/mask array are + * unknown at boot time. + * + * orangefs_prepare_debugfs_help_string will be used again + * later to rebuild the debug-help file after the client starts + * and passes along the needed info. The argument signifies + * which time orangefs_prepare_debugfs_help_string is being + * called. + * + */ + ret = orangefs_prepare_debugfs_help_string(1); + if (ret) + goto out; + + pvfs2_debugfs_init(); + pvfs2_kernel_debug_init(); + orangefs_sysfs_init(); + + ret = register_filesystem(&pvfs2_fs_type); + if (ret == 0) { + pr_info("pvfs2: module version %s loaded\n", PVFS2_VERSION); + return 0; + } + + pvfs2_debugfs_cleanup(); + orangefs_sysfs_exit(); + fsid_key_table_finalize(); + +cleanup_progress_table: + kfree(htable_ops_in_progress); + +cleanup_device: + pvfs2_dev_cleanup(); + +cleanup_kiocb: + kiocb_cache_finalize(); + +cleanup_inode: + pvfs2_inode_cache_finalize(); + +cleanup_req: + dev_req_cache_finalize(); + +cleanup_op: + op_cache_finalize(); + +err: + bdi_destroy(&pvfs2_backing_dev_info); + +out: + return ret; +} + +static void __exit pvfs2_exit(void) +{ + int i = 0; + struct pvfs2_kernel_op_s *cur_op = NULL; + + gossip_debug(GOSSIP_INIT_DEBUG, "pvfs2: pvfs2_exit called\n"); + + unregister_filesystem(&pvfs2_fs_type); + pvfs2_debugfs_cleanup(); + orangefs_sysfs_exit(); + fsid_key_table_finalize(); + pvfs2_dev_cleanup(); + /* clear out all pending upcall op requests */ + spin_lock(&pvfs2_request_list_lock); + while (!list_empty(&pvfs2_request_list)) { + cur_op = list_entry(pvfs2_request_list.next, + struct pvfs2_kernel_op_s, + list); + list_del(&cur_op->list); + gossip_debug(GOSSIP_INIT_DEBUG, + "Freeing unhandled upcall request type %d\n", + cur_op->upcall.type); + op_release(cur_op); + } + spin_unlock(&pvfs2_request_list_lock); + + for (i = 0; i < hash_table_size; i++) + while (!list_empty(&htable_ops_in_progress[i])) { + cur_op = list_entry(htable_ops_in_progress[i].next, + struct pvfs2_kernel_op_s, + list); + op_release(cur_op); + } + + kiocb_cache_finalize(); + pvfs2_inode_cache_finalize(); + dev_req_cache_finalize(); + op_cache_finalize(); + + kfree(htable_ops_in_progress); + + bdi_destroy(&pvfs2_backing_dev_info); + + pr_info("pvfs2: module version %s unloaded\n", PVFS2_VERSION); +} + +/* + * What we do in this function is to walk the list of operations + * that are in progress in the hash table and mark them as purged as well. + */ +void purge_inprogress_ops(void) +{ + int i; + + for (i = 0; i < hash_table_size; i++) { + struct pvfs2_kernel_op_s *op; + struct pvfs2_kernel_op_s *next; + + list_for_each_entry_safe(op, + next, + &htable_ops_in_progress[i], + list) { + spin_lock(&op->lock); + gossip_debug(GOSSIP_INIT_DEBUG, + "pvfs2-client-core: purging in-progress op tag " + "%llu %s\n", + llu(op->tag), + get_opname_string(op)); + set_op_state_purged(op); + spin_unlock(&op->lock); + wake_up_interruptible(&op->waitq); + } + } +} + +module_init(pvfs2_init); +module_exit(pvfs2_exit); From f7be4ee07fb72a516563bc2870ef41fa589a964a Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Fri, 17 Jul 2015 10:38:14 -0400 Subject: [PATCH 004/174] Orangefs: kernel client part 4 Signed-off-by: Mike Marshall --- fs/orangefs/pvfs2-sysfs.c | 1787 +++++++++++++++++++++++++++++++++++++ fs/orangefs/pvfs2-utils.c | 1128 +++++++++++++++++++++++ 2 files changed, 2915 insertions(+) create mode 100644 fs/orangefs/pvfs2-sysfs.c create mode 100644 fs/orangefs/pvfs2-utils.c diff --git a/fs/orangefs/pvfs2-sysfs.c b/fs/orangefs/pvfs2-sysfs.c new file mode 100644 index 000000000000..6d0e18b7239f --- /dev/null +++ b/fs/orangefs/pvfs2-sysfs.c @@ -0,0 +1,1787 @@ +/* + * Documentation/ABI/stable/orangefs-sysfs: + * + * What: /sys/fs/orangefs/perf_counter_reset + * Date: June 2015 + * Contact: Mike Marshall + * Description: + * echo a 0 or a 1 into perf_counter_reset to + * reset all the counters in + * /sys/fs/orangefs/perf_counters + * except ones with PINT_PERF_PRESERVE set. + * + * + * What: /sys/fs/orangefs/perf_counters/... + * Date: Jun 2015 + * Contact: Mike Marshall + * Description: + * Counters and settings for various caches. + * Read only. + * + * + * What: /sys/fs/orangefs/perf_time_interval_secs + * Date: Jun 2015 + * Contact: Mike Marshall + * Description: + * Length of perf counter intervals in + * seconds. + * + * + * What: /sys/fs/orangefs/perf_history_size + * Date: Jun 2015 + * Contact: Mike Marshall + * Description: + * The perf_counters cache statistics have N, or + * perf_history_size, samples. The default is + * one. + * + * Every perf_time_interval_secs the (first) + * samples are reset. + * + * If N is greater than one, the "current" set + * of samples is reset, and the samples from the + * other N-1 intervals remain available. + * + * + * What: /sys/fs/orangefs/op_timeout_secs + * Date: Jun 2015 + * Contact: Mike Marshall + * Description: + * Service operation timeout in seconds. + * + * + * What: /sys/fs/orangefs/slot_timeout_secs + * Date: Jun 2015 + * Contact: Mike Marshall + * Description: + * "Slot" timeout in seconds. A "slot" + * is an indexed buffer in the shared + * memory segment used for communication + * between the kernel module and userspace. + * Slots are requested and waited for, + * the wait times out after slot_timeout_secs. + * + * + * What: /sys/fs/orangefs/acache/... + * Date: Jun 2015 + * Contact: Mike Marshall + * Description: + * Attribute cache configurable settings. + * + * + * What: /sys/fs/orangefs/ncache/... + * Date: Jun 2015 + * Contact: Mike Marshall + * Description: + * Name cache configurable settings. + * + * + * What: /sys/fs/orangefs/capcache/... + * Date: Jun 2015 + * Contact: Mike Marshall + * Description: + * Capability cache configurable settings. + * + * + * What: /sys/fs/orangefs/ccache/... + * Date: Jun 2015 + * Contact: Mike Marshall + * Description: + * Credential cache configurable settings. + * + */ + +#include +#include +#include +#include +#include +#include + +#include "protocol.h" +#include "pvfs2-kernel.h" +#include "pvfs2-sysfs.h" + +#define ORANGEFS_KOBJ_ID "orangefs" +#define ACACHE_KOBJ_ID "acache" +#define CAPCACHE_KOBJ_ID "capcache" +#define CCACHE_KOBJ_ID "ccache" +#define NCACHE_KOBJ_ID "ncache" +#define PC_KOBJ_ID "pc" +#define STATS_KOBJ_ID "stats" + +struct orangefs_obj { + struct kobject kobj; + int op_timeout_secs; + int perf_counter_reset; + int perf_history_size; + int perf_time_interval_secs; + int slot_timeout_secs; +}; + +struct acache_orangefs_obj { + struct kobject kobj; + int hard_limit; + int reclaim_percentage; + int soft_limit; + int timeout_msecs; +}; + +struct capcache_orangefs_obj { + struct kobject kobj; + int hard_limit; + int reclaim_percentage; + int soft_limit; + int timeout_secs; +}; + +struct ccache_orangefs_obj { + struct kobject kobj; + int hard_limit; + int reclaim_percentage; + int soft_limit; + int timeout_secs; +}; + +struct ncache_orangefs_obj { + struct kobject kobj; + int hard_limit; + int reclaim_percentage; + int soft_limit; + int timeout_msecs; +}; + +struct pc_orangefs_obj { + struct kobject kobj; + char *acache; + char *capcache; + char *ncache; +}; + +struct stats_orangefs_obj { + struct kobject kobj; + int reads; + int writes; +}; + +struct orangefs_attribute { + struct attribute attr; + ssize_t (*show)(struct orangefs_obj *orangefs_obj, + struct orangefs_attribute *attr, + char *buf); + ssize_t (*store)(struct orangefs_obj *orangefs_obj, + struct orangefs_attribute *attr, + const char *buf, + size_t count); +}; + +struct acache_orangefs_attribute { + struct attribute attr; + ssize_t (*show)(struct acache_orangefs_obj *acache_orangefs_obj, + struct acache_orangefs_attribute *attr, + char *buf); + ssize_t (*store)(struct acache_orangefs_obj *acache_orangefs_obj, + struct acache_orangefs_attribute *attr, + const char *buf, + size_t count); +}; + +struct capcache_orangefs_attribute { + struct attribute attr; + ssize_t (*show)(struct capcache_orangefs_obj *capcache_orangefs_obj, + struct capcache_orangefs_attribute *attr, + char *buf); + ssize_t (*store)(struct capcache_orangefs_obj *capcache_orangefs_obj, + struct capcache_orangefs_attribute *attr, + const char *buf, + size_t count); +}; + +struct ccache_orangefs_attribute { + struct attribute attr; + ssize_t (*show)(struct ccache_orangefs_obj *ccache_orangefs_obj, + struct ccache_orangefs_attribute *attr, + char *buf); + ssize_t (*store)(struct ccache_orangefs_obj *ccache_orangefs_obj, + struct ccache_orangefs_attribute *attr, + const char *buf, + size_t count); +}; + +struct ncache_orangefs_attribute { + struct attribute attr; + ssize_t (*show)(struct ncache_orangefs_obj *ncache_orangefs_obj, + struct ncache_orangefs_attribute *attr, + char *buf); + ssize_t (*store)(struct ncache_orangefs_obj *ncache_orangefs_obj, + struct ncache_orangefs_attribute *attr, + const char *buf, + size_t count); +}; + +struct pc_orangefs_attribute { + struct attribute attr; + ssize_t (*show)(struct pc_orangefs_obj *pc_orangefs_obj, + struct pc_orangefs_attribute *attr, + char *buf); + ssize_t (*store)(struct pc_orangefs_obj *pc_orangefs_obj, + struct pc_orangefs_attribute *attr, + const char *buf, + size_t count); +}; + +struct stats_orangefs_attribute { + struct attribute attr; + ssize_t (*show)(struct stats_orangefs_obj *stats_orangefs_obj, + struct stats_orangefs_attribute *attr, + char *buf); + ssize_t (*store)(struct stats_orangefs_obj *stats_orangefs_obj, + struct stats_orangefs_attribute *attr, + const char *buf, + size_t count); +}; + +static ssize_t orangefs_attr_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct orangefs_attribute *attribute; + struct orangefs_obj *orangefs_obj; + int rc; + + attribute = container_of(attr, struct orangefs_attribute, attr); + orangefs_obj = container_of(kobj, struct orangefs_obj, kobj); + + if (!attribute->show) { + rc = -EIO; + goto out; + } + + rc = attribute->show(orangefs_obj, attribute, buf); + +out: + return rc; +} + +static ssize_t orangefs_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, + size_t len) +{ + struct orangefs_attribute *attribute; + struct orangefs_obj *orangefs_obj; + int rc; + + gossip_debug(GOSSIP_SYSFS_DEBUG, + "orangefs_attr_store: start\n"); + + attribute = container_of(attr, struct orangefs_attribute, attr); + orangefs_obj = container_of(kobj, struct orangefs_obj, kobj); + + if (!attribute->store) { + rc = -EIO; + goto out; + } + + rc = attribute->store(orangefs_obj, attribute, buf, len); + +out: + return rc; +} + +static const struct sysfs_ops orangefs_sysfs_ops = { + .show = orangefs_attr_show, + .store = orangefs_attr_store, +}; + +static ssize_t acache_orangefs_attr_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct acache_orangefs_attribute *attribute; + struct acache_orangefs_obj *acache_orangefs_obj; + int rc; + + attribute = container_of(attr, struct acache_orangefs_attribute, attr); + acache_orangefs_obj = + container_of(kobj, struct acache_orangefs_obj, kobj); + + if (!attribute->show) { + rc = -EIO; + goto out; + } + + rc = attribute->show(acache_orangefs_obj, attribute, buf); + +out: + return rc; +} + +static ssize_t acache_orangefs_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, + size_t len) +{ + struct acache_orangefs_attribute *attribute; + struct acache_orangefs_obj *acache_orangefs_obj; + int rc; + + gossip_debug(GOSSIP_SYSFS_DEBUG, + "acache_orangefs_attr_store: start\n"); + + attribute = container_of(attr, struct acache_orangefs_attribute, attr); + acache_orangefs_obj = + container_of(kobj, struct acache_orangefs_obj, kobj); + + if (!attribute->store) { + rc = -EIO; + goto out; + } + + rc = attribute->store(acache_orangefs_obj, attribute, buf, len); + +out: + return rc; +} + +static const struct sysfs_ops acache_orangefs_sysfs_ops = { + .show = acache_orangefs_attr_show, + .store = acache_orangefs_attr_store, +}; + +static ssize_t capcache_orangefs_attr_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct capcache_orangefs_attribute *attribute; + struct capcache_orangefs_obj *capcache_orangefs_obj; + int rc; + + attribute = + container_of(attr, struct capcache_orangefs_attribute, attr); + capcache_orangefs_obj = + container_of(kobj, struct capcache_orangefs_obj, kobj); + + if (!attribute->show) { + rc = -EIO; + goto out; + } + + rc = attribute->show(capcache_orangefs_obj, attribute, buf); + +out: + return rc; +} + +static ssize_t capcache_orangefs_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, + size_t len) +{ + struct capcache_orangefs_attribute *attribute; + struct capcache_orangefs_obj *capcache_orangefs_obj; + int rc; + + gossip_debug(GOSSIP_SYSFS_DEBUG, + "capcache_orangefs_attr_store: start\n"); + + attribute = + container_of(attr, struct capcache_orangefs_attribute, attr); + capcache_orangefs_obj = + container_of(kobj, struct capcache_orangefs_obj, kobj); + + if (!attribute->store) { + rc = -EIO; + goto out; + } + + rc = attribute->store(capcache_orangefs_obj, attribute, buf, len); + +out: + return rc; +} + +static const struct sysfs_ops capcache_orangefs_sysfs_ops = { + .show = capcache_orangefs_attr_show, + .store = capcache_orangefs_attr_store, +}; + +static ssize_t ccache_orangefs_attr_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct ccache_orangefs_attribute *attribute; + struct ccache_orangefs_obj *ccache_orangefs_obj; + int rc; + + attribute = + container_of(attr, struct ccache_orangefs_attribute, attr); + ccache_orangefs_obj = + container_of(kobj, struct ccache_orangefs_obj, kobj); + + if (!attribute->show) { + rc = -EIO; + goto out; + } + + rc = attribute->show(ccache_orangefs_obj, attribute, buf); + +out: + return rc; +} + +static ssize_t ccache_orangefs_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, + size_t len) +{ + struct ccache_orangefs_attribute *attribute; + struct ccache_orangefs_obj *ccache_orangefs_obj; + int rc; + + gossip_debug(GOSSIP_SYSFS_DEBUG, + "ccache_orangefs_attr_store: start\n"); + + attribute = + container_of(attr, struct ccache_orangefs_attribute, attr); + ccache_orangefs_obj = + container_of(kobj, struct ccache_orangefs_obj, kobj); + + if (!attribute->store) { + rc = -EIO; + goto out; + } + + rc = attribute->store(ccache_orangefs_obj, attribute, buf, len); + +out: + return rc; +} + +static const struct sysfs_ops ccache_orangefs_sysfs_ops = { + .show = ccache_orangefs_attr_show, + .store = ccache_orangefs_attr_store, +}; + +static ssize_t ncache_orangefs_attr_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct ncache_orangefs_attribute *attribute; + struct ncache_orangefs_obj *ncache_orangefs_obj; + int rc; + + attribute = container_of(attr, struct ncache_orangefs_attribute, attr); + ncache_orangefs_obj = + container_of(kobj, struct ncache_orangefs_obj, kobj); + + if (!attribute->show) { + rc = -EIO; + goto out; + } + + rc = attribute->show(ncache_orangefs_obj, attribute, buf); + +out: + return rc; +} + +static ssize_t ncache_orangefs_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, + size_t len) +{ + struct ncache_orangefs_attribute *attribute; + struct ncache_orangefs_obj *ncache_orangefs_obj; + int rc; + + gossip_debug(GOSSIP_SYSFS_DEBUG, + "ncache_orangefs_attr_store: start\n"); + + attribute = container_of(attr, struct ncache_orangefs_attribute, attr); + ncache_orangefs_obj = + container_of(kobj, struct ncache_orangefs_obj, kobj); + + if (!attribute->store) { + rc = -EIO; + goto out; + } + + rc = attribute->store(ncache_orangefs_obj, attribute, buf, len); + +out: + return rc; +} + +static const struct sysfs_ops ncache_orangefs_sysfs_ops = { + .show = ncache_orangefs_attr_show, + .store = ncache_orangefs_attr_store, +}; + +static ssize_t pc_orangefs_attr_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct pc_orangefs_attribute *attribute; + struct pc_orangefs_obj *pc_orangefs_obj; + int rc; + + attribute = container_of(attr, struct pc_orangefs_attribute, attr); + pc_orangefs_obj = + container_of(kobj, struct pc_orangefs_obj, kobj); + + if (!attribute->show) { + rc = -EIO; + goto out; + } + + rc = attribute->show(pc_orangefs_obj, attribute, buf); + +out: + return rc; +} + +static const struct sysfs_ops pc_orangefs_sysfs_ops = { + .show = pc_orangefs_attr_show, +}; + +static ssize_t stats_orangefs_attr_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct stats_orangefs_attribute *attribute; + struct stats_orangefs_obj *stats_orangefs_obj; + int rc; + + attribute = container_of(attr, struct stats_orangefs_attribute, attr); + stats_orangefs_obj = + container_of(kobj, struct stats_orangefs_obj, kobj); + + if (!attribute->show) { + rc = -EIO; + goto out; + } + + rc = attribute->show(stats_orangefs_obj, attribute, buf); + +out: + return rc; +} + +static const struct sysfs_ops stats_orangefs_sysfs_ops = { + .show = stats_orangefs_attr_show, +}; + +static void orangefs_release(struct kobject *kobj) +{ + struct orangefs_obj *orangefs_obj; + + orangefs_obj = container_of(kobj, struct orangefs_obj, kobj); + kfree(orangefs_obj); +} + +static void acache_orangefs_release(struct kobject *kobj) +{ + struct acache_orangefs_obj *acache_orangefs_obj; + + acache_orangefs_obj = + container_of(kobj, struct acache_orangefs_obj, kobj); + kfree(acache_orangefs_obj); +} + +static void capcache_orangefs_release(struct kobject *kobj) +{ + struct capcache_orangefs_obj *capcache_orangefs_obj; + + capcache_orangefs_obj = + container_of(kobj, struct capcache_orangefs_obj, kobj); + kfree(capcache_orangefs_obj); +} + +static void ccache_orangefs_release(struct kobject *kobj) +{ + struct ccache_orangefs_obj *ccache_orangefs_obj; + + ccache_orangefs_obj = + container_of(kobj, struct ccache_orangefs_obj, kobj); + kfree(ccache_orangefs_obj); +} + +static void ncache_orangefs_release(struct kobject *kobj) +{ + struct ncache_orangefs_obj *ncache_orangefs_obj; + + ncache_orangefs_obj = + container_of(kobj, struct ncache_orangefs_obj, kobj); + kfree(ncache_orangefs_obj); +} + +static void pc_orangefs_release(struct kobject *kobj) +{ + struct pc_orangefs_obj *pc_orangefs_obj; + + pc_orangefs_obj = + container_of(kobj, struct pc_orangefs_obj, kobj); + kfree(pc_orangefs_obj); +} + +static void stats_orangefs_release(struct kobject *kobj) +{ + struct stats_orangefs_obj *stats_orangefs_obj; + + stats_orangefs_obj = + container_of(kobj, struct stats_orangefs_obj, kobj); + kfree(stats_orangefs_obj); +} + +static ssize_t sysfs_int_show(char *kobj_id, char *buf, void *attr) +{ + int rc = -EIO; + struct orangefs_attribute *orangefs_attr; + struct stats_orangefs_attribute *stats_orangefs_attr; + + gossip_debug(GOSSIP_SYSFS_DEBUG, "sysfs_int_show: id:%s:\n", kobj_id); + + if (!strcmp(kobj_id, ORANGEFS_KOBJ_ID)) { + orangefs_attr = (struct orangefs_attribute *)attr; + + if (!strcmp(orangefs_attr->attr.name, "op_timeout_secs")) { + rc = scnprintf(buf, + PAGE_SIZE, + "%d\n", + op_timeout_secs); + goto out; + } else if (!strcmp(orangefs_attr->attr.name, + "slot_timeout_secs")) { + rc = scnprintf(buf, + PAGE_SIZE, + "%d\n", + slot_timeout_secs); + goto out; + } else { + goto out; + } + + } else if (!strcmp(kobj_id, STATS_KOBJ_ID)) { + stats_orangefs_attr = (struct stats_orangefs_attribute *)attr; + + if (!strcmp(stats_orangefs_attr->attr.name, "reads")) { + rc = scnprintf(buf, + PAGE_SIZE, + "%lu\n", + g_pvfs2_stats.reads); + goto out; + } else if (!strcmp(stats_orangefs_attr->attr.name, "writes")) { + rc = scnprintf(buf, + PAGE_SIZE, + "%lu\n", + g_pvfs2_stats.writes); + goto out; + } else { + goto out; + } + } + +out: + + return rc; +} + +static ssize_t int_orangefs_show(struct orangefs_obj *orangefs_obj, + struct orangefs_attribute *attr, + char *buf) +{ + int rc; + + gossip_debug(GOSSIP_SYSFS_DEBUG, + "int_orangefs_show:start attr->attr.name:%s:\n", + attr->attr.name); + + rc = sysfs_int_show(ORANGEFS_KOBJ_ID, buf, (void *) attr); + + return rc; +} + +static ssize_t int_stats_show(struct stats_orangefs_obj *stats_orangefs_obj, + struct stats_orangefs_attribute *attr, + char *buf) +{ + int rc; + + gossip_debug(GOSSIP_SYSFS_DEBUG, + "int_stats_show:start attr->attr.name:%s:\n", + attr->attr.name); + + rc = sysfs_int_show(STATS_KOBJ_ID, buf, (void *) attr); + + return rc; +} + +static ssize_t int_store(struct orangefs_obj *orangefs_obj, + struct orangefs_attribute *attr, + const char *buf, + size_t count) +{ + int rc = 0; + + gossip_debug(GOSSIP_SYSFS_DEBUG, + "int_store: start attr->attr.name:%s: buf:%s:\n", + attr->attr.name, buf); + + if (!strcmp(attr->attr.name, "op_timeout_secs")) { + rc = kstrtoint(buf, 0, &op_timeout_secs); + goto out; + } else if (!strcmp(attr->attr.name, "slot_timeout_secs")) { + rc = kstrtoint(buf, 0, &slot_timeout_secs); + goto out; + } else { + goto out; + } + +out: + if (rc) + rc = -EINVAL; + else + rc = count; + + return rc; +} + +/* + * obtain attribute values from userspace with a service operation. + */ +int sysfs_service_op_show(char *kobj_id, char *buf, void *attr) +{ + struct pvfs2_kernel_op_s *new_op = NULL; + int rc = 0; + char *ser_op_type = NULL; + struct orangefs_attribute *orangefs_attr; + struct acache_orangefs_attribute *acache_attr; + struct capcache_orangefs_attribute *capcache_attr; + struct ccache_orangefs_attribute *ccache_attr; + struct ncache_orangefs_attribute *ncache_attr; + struct pc_orangefs_attribute *pc_attr; + __u32 op_alloc_type; + + gossip_debug(GOSSIP_SYSFS_DEBUG, + "sysfs_service_op_show: id:%s:\n", + kobj_id); + + if (strcmp(kobj_id, PC_KOBJ_ID)) + op_alloc_type = PVFS2_VFS_OP_PARAM; + else + op_alloc_type = PVFS2_VFS_OP_PERF_COUNT; + + new_op = op_alloc(op_alloc_type); + if (!new_op) { + rc = -ENOMEM; + goto out; + } + + /* Can't do a service_operation if the client is not running... */ + rc = is_daemon_in_service(); + if (rc) { + pr_info("%s: Client not running :%d:\n", + __func__, + is_daemon_in_service()); + goto out; + } + + if (strcmp(kobj_id, PC_KOBJ_ID)) + new_op->upcall.req.param.type = PVFS2_PARAM_REQUEST_GET; + + if (!strcmp(kobj_id, ORANGEFS_KOBJ_ID)) { + orangefs_attr = (struct orangefs_attribute *)attr; + + if (!strcmp(orangefs_attr->attr.name, "perf_history_size")) + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_PERF_HISTORY_SIZE; + else if (!strcmp(orangefs_attr->attr.name, + "perf_time_interval_secs")) + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_PERF_TIME_INTERVAL_SECS; + else if (!strcmp(orangefs_attr->attr.name, + "perf_counter_reset")) + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_PERF_RESET; + + } else if (!strcmp(kobj_id, ACACHE_KOBJ_ID)) { + acache_attr = (struct acache_orangefs_attribute *)attr; + + if (!strcmp(acache_attr->attr.name, "timeout_msecs")) + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_ACACHE_TIMEOUT_MSECS; + + if (!strcmp(acache_attr->attr.name, "hard_limit")) + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_ACACHE_HARD_LIMIT; + + if (!strcmp(acache_attr->attr.name, "soft_limit")) + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_ACACHE_SOFT_LIMIT; + + if (!strcmp(acache_attr->attr.name, "reclaim_percentage")) + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_ACACHE_RECLAIM_PERCENTAGE; + + } else if (!strcmp(kobj_id, CAPCACHE_KOBJ_ID)) { + capcache_attr = (struct capcache_orangefs_attribute *)attr; + + if (!strcmp(capcache_attr->attr.name, "timeout_secs")) + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_CAPCACHE_TIMEOUT_SECS; + + if (!strcmp(capcache_attr->attr.name, "hard_limit")) + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_CAPCACHE_HARD_LIMIT; + + if (!strcmp(capcache_attr->attr.name, "soft_limit")) + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_CAPCACHE_SOFT_LIMIT; + + if (!strcmp(capcache_attr->attr.name, "reclaim_percentage")) + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_CAPCACHE_RECLAIM_PERCENTAGE; + + } else if (!strcmp(kobj_id, CCACHE_KOBJ_ID)) { + ccache_attr = (struct ccache_orangefs_attribute *)attr; + + if (!strcmp(ccache_attr->attr.name, "timeout_secs")) + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_CCACHE_TIMEOUT_SECS; + + if (!strcmp(ccache_attr->attr.name, "hard_limit")) + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_CCACHE_HARD_LIMIT; + + if (!strcmp(ccache_attr->attr.name, "soft_limit")) + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_CCACHE_SOFT_LIMIT; + + if (!strcmp(ccache_attr->attr.name, "reclaim_percentage")) + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_CCACHE_RECLAIM_PERCENTAGE; + + } else if (!strcmp(kobj_id, NCACHE_KOBJ_ID)) { + ncache_attr = (struct ncache_orangefs_attribute *)attr; + + if (!strcmp(ncache_attr->attr.name, "timeout_msecs")) + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_NCACHE_TIMEOUT_MSECS; + + if (!strcmp(ncache_attr->attr.name, "hard_limit")) + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_NCACHE_HARD_LIMIT; + + if (!strcmp(ncache_attr->attr.name, "soft_limit")) + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_NCACHE_SOFT_LIMIT; + + if (!strcmp(ncache_attr->attr.name, "reclaim_percentage")) + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_NCACHE_RECLAIM_PERCENTAGE; + + } else if (!strcmp(kobj_id, PC_KOBJ_ID)) { + pc_attr = (struct pc_orangefs_attribute *)attr; + + if (!strcmp(pc_attr->attr.name, ACACHE_KOBJ_ID)) + new_op->upcall.req.perf_count.type = + PVFS2_PERF_COUNT_REQUEST_ACACHE; + + if (!strcmp(pc_attr->attr.name, CAPCACHE_KOBJ_ID)) + new_op->upcall.req.perf_count.type = + PVFS2_PERF_COUNT_REQUEST_CAPCACHE; + + if (!strcmp(pc_attr->attr.name, NCACHE_KOBJ_ID)) + new_op->upcall.req.perf_count.type = + PVFS2_PERF_COUNT_REQUEST_NCACHE; + + } else { + gossip_err("sysfs_service_op_show: unknown kobj_id:%s:\n", + kobj_id); + rc = -EINVAL; + goto out; + } + + + if (strcmp(kobj_id, PC_KOBJ_ID)) + ser_op_type = "pvfs2_param"; + else + ser_op_type = "pvfs2_perf_count"; + + /* + * The service_operation will return an errno return code on + * error, and zero on success. + */ + rc = service_operation(new_op, ser_op_type, PVFS2_OP_INTERRUPTIBLE); + +out: + if (!rc) { + if (strcmp(kobj_id, PC_KOBJ_ID)) { + rc = scnprintf(buf, + PAGE_SIZE, + "%d\n", + (int)new_op->downcall.resp.param.value); + } else { + rc = scnprintf( + buf, + PAGE_SIZE, + "%s", + new_op->downcall.resp.perf_count.buffer); + } + } + + /* + * if we got ENOMEM, then op_alloc probably failed... + */ + if (rc != -ENOMEM) + op_release(new_op); + + return rc; + +} + +static ssize_t service_orangefs_show(struct orangefs_obj *orangefs_obj, + struct orangefs_attribute *attr, + char *buf) +{ + int rc = 0; + + rc = sysfs_service_op_show(ORANGEFS_KOBJ_ID, buf, (void *)attr); + + return rc; +} + +static ssize_t + service_acache_show(struct acache_orangefs_obj *acache_orangefs_obj, + struct acache_orangefs_attribute *attr, + char *buf) +{ + int rc = 0; + + rc = sysfs_service_op_show(ACACHE_KOBJ_ID, buf, (void *)attr); + + return rc; +} + +static ssize_t service_capcache_show(struct capcache_orangefs_obj + *capcache_orangefs_obj, + struct capcache_orangefs_attribute *attr, + char *buf) +{ + int rc = 0; + + rc = sysfs_service_op_show(CAPCACHE_KOBJ_ID, buf, (void *)attr); + + return rc; +} + +static ssize_t service_ccache_show(struct ccache_orangefs_obj + *ccache_orangefs_obj, + struct ccache_orangefs_attribute *attr, + char *buf) +{ + int rc = 0; + + rc = sysfs_service_op_show(CCACHE_KOBJ_ID, buf, (void *)attr); + + return rc; +} + +static ssize_t + service_ncache_show(struct ncache_orangefs_obj *ncache_orangefs_obj, + struct ncache_orangefs_attribute *attr, + char *buf) +{ + int rc = 0; + + rc = sysfs_service_op_show(NCACHE_KOBJ_ID, buf, (void *)attr); + + return rc; +} + +static ssize_t + service_pc_show(struct pc_orangefs_obj *pc_orangefs_obj, + struct pc_orangefs_attribute *attr, + char *buf) +{ + int rc = 0; + + rc = sysfs_service_op_show(PC_KOBJ_ID, buf, (void *)attr); + + return rc; +} + +/* + * pass attribute values back to userspace with a service operation. + * + * We have to do a memory allocation, an sscanf and a service operation. + * And we have to evaluate what the user entered, to make sure the + * value is within the range supported by the attribute. So, there's + * a lot of return code checking and mapping going on here. + * + * We want to return 1 if we think everything went OK, and + * EINVAL if not. + */ +int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr) +{ + struct pvfs2_kernel_op_s *new_op = NULL; + int val = 0; + int rc = 0; + struct orangefs_attribute *orangefs_attr; + struct acache_orangefs_attribute *acache_attr; + struct capcache_orangefs_attribute *capcache_attr; + struct ccache_orangefs_attribute *ccache_attr; + struct ncache_orangefs_attribute *ncache_attr; + + gossip_debug(GOSSIP_SYSFS_DEBUG, + "sysfs_service_op_store: id:%s:\n", + kobj_id); + + new_op = op_alloc(PVFS2_VFS_OP_PARAM); + if (!new_op) { + rc = -ENOMEM; + goto out; + } + + /* Can't do a service_operation if the client is not running... */ + rc = is_daemon_in_service(); + if (rc) { + pr_info("%s: Client not running :%d:\n", + __func__, + is_daemon_in_service()); + goto out; + } + + /* + * The value we want to send back to userspace is in buf. + */ + rc = kstrtoint(buf, 0, &val); + if (rc) + goto out; + + if (!strcmp(kobj_id, ORANGEFS_KOBJ_ID)) { + orangefs_attr = (struct orangefs_attribute *)attr; + + if (!strcmp(orangefs_attr->attr.name, "perf_history_size")) { + if (val > 0) { + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_PERF_HISTORY_SIZE; + } else { + rc = 0; + goto out; + } + } else if (!strcmp(orangefs_attr->attr.name, + "perf_time_interval_secs")) { + if (val > 0) { + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_PERF_TIME_INTERVAL_SECS; + } else { + rc = 0; + goto out; + } + } else if (!strcmp(orangefs_attr->attr.name, + "perf_counter_reset")) { + if ((val == 0) || (val == 1)) { + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_PERF_RESET; + } else { + rc = 0; + goto out; + } + } + + } else if (!strcmp(kobj_id, ACACHE_KOBJ_ID)) { + acache_attr = (struct acache_orangefs_attribute *)attr; + + if (!strcmp(acache_attr->attr.name, "hard_limit")) { + if (val > -1) { + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_ACACHE_HARD_LIMIT; + } else { + rc = 0; + goto out; + } + } else if (!strcmp(acache_attr->attr.name, "soft_limit")) { + if (val > -1) { + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_ACACHE_SOFT_LIMIT; + } else { + rc = 0; + goto out; + } + } else if (!strcmp(acache_attr->attr.name, + "reclaim_percentage")) { + if ((val > -1) && (val < 101)) { + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_ACACHE_RECLAIM_PERCENTAGE; + } else { + rc = 0; + goto out; + } + } else if (!strcmp(acache_attr->attr.name, "timeout_msecs")) { + if (val > -1) { + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_ACACHE_TIMEOUT_MSECS; + } else { + rc = 0; + goto out; + } + } + + } else if (!strcmp(kobj_id, CAPCACHE_KOBJ_ID)) { + capcache_attr = (struct capcache_orangefs_attribute *)attr; + + if (!strcmp(capcache_attr->attr.name, "hard_limit")) { + if (val > -1) { + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_CAPCACHE_HARD_LIMIT; + } else { + rc = 0; + goto out; + } + } else if (!strcmp(capcache_attr->attr.name, "soft_limit")) { + if (val > -1) { + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_CAPCACHE_SOFT_LIMIT; + } else { + rc = 0; + goto out; + } + } else if (!strcmp(capcache_attr->attr.name, + "reclaim_percentage")) { + if ((val > -1) && (val < 101)) { + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_CAPCACHE_RECLAIM_PERCENTAGE; + } else { + rc = 0; + goto out; + } + } else if (!strcmp(capcache_attr->attr.name, "timeout_secs")) { + if (val > -1) { + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_CAPCACHE_TIMEOUT_SECS; + } else { + rc = 0; + goto out; + } + } + + } else if (!strcmp(kobj_id, CCACHE_KOBJ_ID)) { + ccache_attr = (struct ccache_orangefs_attribute *)attr; + + if (!strcmp(ccache_attr->attr.name, "hard_limit")) { + if (val > -1) { + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_CCACHE_HARD_LIMIT; + } else { + rc = 0; + goto out; + } + } else if (!strcmp(ccache_attr->attr.name, "soft_limit")) { + if (val > -1) { + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_CCACHE_SOFT_LIMIT; + } else { + rc = 0; + goto out; + } + } else if (!strcmp(ccache_attr->attr.name, + "reclaim_percentage")) { + if ((val > -1) && (val < 101)) { + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_CCACHE_RECLAIM_PERCENTAGE; + } else { + rc = 0; + goto out; + } + } else if (!strcmp(ccache_attr->attr.name, "timeout_secs")) { + if (val > -1) { + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_CCACHE_TIMEOUT_SECS; + } else { + rc = 0; + goto out; + } + } + + } else if (!strcmp(kobj_id, NCACHE_KOBJ_ID)) { + ncache_attr = (struct ncache_orangefs_attribute *)attr; + + if (!strcmp(ncache_attr->attr.name, "hard_limit")) { + if (val > -1) { + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_NCACHE_HARD_LIMIT; + } else { + rc = 0; + goto out; + } + } else if (!strcmp(ncache_attr->attr.name, "soft_limit")) { + if (val > -1) { + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_NCACHE_SOFT_LIMIT; + } else { + rc = 0; + goto out; + } + } else if (!strcmp(ncache_attr->attr.name, + "reclaim_percentage")) { + if ((val > -1) && (val < 101)) { + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_NCACHE_RECLAIM_PERCENTAGE; + } else { + rc = 0; + goto out; + } + } else if (!strcmp(ncache_attr->attr.name, "timeout_msecs")) { + if (val > -1) { + new_op->upcall.req.param.op = + PVFS2_PARAM_REQUEST_OP_NCACHE_TIMEOUT_MSECS; + } else { + rc = 0; + goto out; + } + } + + } else { + gossip_err("sysfs_service_op_store: unknown kobj_id:%s:\n", + kobj_id); + rc = -EINVAL; + goto out; + } + + new_op->upcall.req.param.type = PVFS2_PARAM_REQUEST_SET; + + new_op->upcall.req.param.value = val; + + /* + * The service_operation will return a errno return code on + * error, and zero on success. + */ + rc = service_operation(new_op, "pvfs2_param", PVFS2_OP_INTERRUPTIBLE); + + if (rc < 0) { + gossip_err("sysfs_service_op_store: service op returned:%d:\n", + rc); + rc = 0; + } else { + rc = 1; + } + +out: + /* + * if we got ENOMEM, then op_alloc probably failed... + */ + if (rc == -ENOMEM) + rc = 0; + else + op_release(new_op); + + if (rc == 0) + rc = -EINVAL; + + return rc; +} + +static ssize_t + service_orangefs_store(struct orangefs_obj *orangefs_obj, + struct orangefs_attribute *attr, + const char *buf, + size_t count) +{ + int rc = 0; + + rc = sysfs_service_op_store(ORANGEFS_KOBJ_ID, buf, (void *) attr); + + /* rc should have an errno value if the service_op went bad. */ + if (rc == 1) + rc = count; + + return rc; +} + +static ssize_t + service_acache_store(struct acache_orangefs_obj *acache_orangefs_obj, + struct acache_orangefs_attribute *attr, + const char *buf, + size_t count) +{ + int rc = 0; + + rc = sysfs_service_op_store(ACACHE_KOBJ_ID, buf, (void *) attr); + + /* rc should have an errno value if the service_op went bad. */ + if (rc == 1) + rc = count; + + return rc; +} + +static ssize_t + service_capcache_store(struct capcache_orangefs_obj + *capcache_orangefs_obj, + struct capcache_orangefs_attribute *attr, + const char *buf, + size_t count) +{ + int rc = 0; + + rc = sysfs_service_op_store(CAPCACHE_KOBJ_ID, buf, (void *) attr); + + /* rc should have an errno value if the service_op went bad. */ + if (rc == 1) + rc = count; + + return rc; +} + +static ssize_t service_ccache_store(struct ccache_orangefs_obj + *ccache_orangefs_obj, + struct ccache_orangefs_attribute *attr, + const char *buf, + size_t count) +{ + int rc = 0; + + rc = sysfs_service_op_store(CCACHE_KOBJ_ID, buf, (void *) attr); + + /* rc should have an errno value if the service_op went bad. */ + if (rc == 1) + rc = count; + + return rc; +} + +static ssize_t + service_ncache_store(struct ncache_orangefs_obj *ncache_orangefs_obj, + struct ncache_orangefs_attribute *attr, + const char *buf, + size_t count) +{ + int rc = 0; + + rc = sysfs_service_op_store(NCACHE_KOBJ_ID, buf, (void *) attr); + + /* rc should have an errno value if the service_op went bad. */ + if (rc == 1) + rc = count; + + return rc; +} + +static struct orangefs_attribute op_timeout_secs_attribute = + __ATTR(op_timeout_secs, 0664, int_orangefs_show, int_store); + +static struct orangefs_attribute slot_timeout_secs_attribute = + __ATTR(slot_timeout_secs, 0664, int_orangefs_show, int_store); + +static struct orangefs_attribute perf_counter_reset_attribute = + __ATTR(perf_counter_reset, + 0664, + service_orangefs_show, + service_orangefs_store); + +static struct orangefs_attribute perf_history_size_attribute = + __ATTR(perf_history_size, + 0664, + service_orangefs_show, + service_orangefs_store); + +static struct orangefs_attribute perf_time_interval_secs_attribute = + __ATTR(perf_time_interval_secs, + 0664, + service_orangefs_show, + service_orangefs_store); + +static struct attribute *orangefs_default_attrs[] = { + &op_timeout_secs_attribute.attr, + &slot_timeout_secs_attribute.attr, + &perf_counter_reset_attribute.attr, + &perf_history_size_attribute.attr, + &perf_time_interval_secs_attribute.attr, + NULL, +}; + +static struct kobj_type orangefs_ktype = { + .sysfs_ops = &orangefs_sysfs_ops, + .release = orangefs_release, + .default_attrs = orangefs_default_attrs, +}; + +static struct acache_orangefs_attribute acache_hard_limit_attribute = + __ATTR(hard_limit, + 0664, + service_acache_show, + service_acache_store); + +static struct acache_orangefs_attribute acache_reclaim_percent_attribute = + __ATTR(reclaim_percentage, + 0664, + service_acache_show, + service_acache_store); + +static struct acache_orangefs_attribute acache_soft_limit_attribute = + __ATTR(soft_limit, + 0664, + service_acache_show, + service_acache_store); + +static struct acache_orangefs_attribute acache_timeout_msecs_attribute = + __ATTR(timeout_msecs, + 0664, + service_acache_show, + service_acache_store); + +static struct attribute *acache_orangefs_default_attrs[] = { + &acache_hard_limit_attribute.attr, + &acache_reclaim_percent_attribute.attr, + &acache_soft_limit_attribute.attr, + &acache_timeout_msecs_attribute.attr, + NULL, +}; + +static struct kobj_type acache_orangefs_ktype = { + .sysfs_ops = &acache_orangefs_sysfs_ops, + .release = acache_orangefs_release, + .default_attrs = acache_orangefs_default_attrs, +}; + +static struct capcache_orangefs_attribute capcache_hard_limit_attribute = + __ATTR(hard_limit, + 0664, + service_capcache_show, + service_capcache_store); + +static struct capcache_orangefs_attribute capcache_reclaim_percent_attribute = + __ATTR(reclaim_percentage, + 0664, + service_capcache_show, + service_capcache_store); + +static struct capcache_orangefs_attribute capcache_soft_limit_attribute = + __ATTR(soft_limit, + 0664, + service_capcache_show, + service_capcache_store); + +static struct capcache_orangefs_attribute capcache_timeout_secs_attribute = + __ATTR(timeout_secs, + 0664, + service_capcache_show, + service_capcache_store); + +static struct attribute *capcache_orangefs_default_attrs[] = { + &capcache_hard_limit_attribute.attr, + &capcache_reclaim_percent_attribute.attr, + &capcache_soft_limit_attribute.attr, + &capcache_timeout_secs_attribute.attr, + NULL, +}; + +static struct kobj_type capcache_orangefs_ktype = { + .sysfs_ops = &capcache_orangefs_sysfs_ops, + .release = capcache_orangefs_release, + .default_attrs = capcache_orangefs_default_attrs, +}; + +static struct ccache_orangefs_attribute ccache_hard_limit_attribute = + __ATTR(hard_limit, + 0664, + service_ccache_show, + service_ccache_store); + +static struct ccache_orangefs_attribute ccache_reclaim_percent_attribute = + __ATTR(reclaim_percentage, + 0664, + service_ccache_show, + service_ccache_store); + +static struct ccache_orangefs_attribute ccache_soft_limit_attribute = + __ATTR(soft_limit, + 0664, + service_ccache_show, + service_ccache_store); + +static struct ccache_orangefs_attribute ccache_timeout_secs_attribute = + __ATTR(timeout_secs, + 0664, + service_ccache_show, + service_ccache_store); + +static struct attribute *ccache_orangefs_default_attrs[] = { + &ccache_hard_limit_attribute.attr, + &ccache_reclaim_percent_attribute.attr, + &ccache_soft_limit_attribute.attr, + &ccache_timeout_secs_attribute.attr, + NULL, +}; + +static struct kobj_type ccache_orangefs_ktype = { + .sysfs_ops = &ccache_orangefs_sysfs_ops, + .release = ccache_orangefs_release, + .default_attrs = ccache_orangefs_default_attrs, +}; + +static struct ncache_orangefs_attribute ncache_hard_limit_attribute = + __ATTR(hard_limit, + 0664, + service_ncache_show, + service_ncache_store); + +static struct ncache_orangefs_attribute ncache_reclaim_percent_attribute = + __ATTR(reclaim_percentage, + 0664, + service_ncache_show, + service_ncache_store); + +static struct ncache_orangefs_attribute ncache_soft_limit_attribute = + __ATTR(soft_limit, + 0664, + service_ncache_show, + service_ncache_store); + +static struct ncache_orangefs_attribute ncache_timeout_msecs_attribute = + __ATTR(timeout_msecs, + 0664, + service_ncache_show, + service_ncache_store); + +static struct attribute *ncache_orangefs_default_attrs[] = { + &ncache_hard_limit_attribute.attr, + &ncache_reclaim_percent_attribute.attr, + &ncache_soft_limit_attribute.attr, + &ncache_timeout_msecs_attribute.attr, + NULL, +}; + +static struct kobj_type ncache_orangefs_ktype = { + .sysfs_ops = &ncache_orangefs_sysfs_ops, + .release = ncache_orangefs_release, + .default_attrs = ncache_orangefs_default_attrs, +}; + +static struct pc_orangefs_attribute pc_acache_attribute = + __ATTR(acache, + 0664, + service_pc_show, + NULL); + +static struct pc_orangefs_attribute pc_capcache_attribute = + __ATTR(capcache, + 0664, + service_pc_show, + NULL); + +static struct pc_orangefs_attribute pc_ncache_attribute = + __ATTR(ncache, + 0664, + service_pc_show, + NULL); + +static struct attribute *pc_orangefs_default_attrs[] = { + &pc_acache_attribute.attr, + &pc_capcache_attribute.attr, + &pc_ncache_attribute.attr, + NULL, +}; + +static struct kobj_type pc_orangefs_ktype = { + .sysfs_ops = &pc_orangefs_sysfs_ops, + .release = pc_orangefs_release, + .default_attrs = pc_orangefs_default_attrs, +}; + +static struct stats_orangefs_attribute stats_reads_attribute = + __ATTR(reads, + 0664, + int_stats_show, + NULL); + +static struct stats_orangefs_attribute stats_writes_attribute = + __ATTR(writes, + 0664, + int_stats_show, + NULL); + +static struct attribute *stats_orangefs_default_attrs[] = { + &stats_reads_attribute.attr, + &stats_writes_attribute.attr, + NULL, +}; + +static struct kobj_type stats_orangefs_ktype = { + .sysfs_ops = &stats_orangefs_sysfs_ops, + .release = stats_orangefs_release, + .default_attrs = stats_orangefs_default_attrs, +}; + +static struct orangefs_obj *orangefs_obj; +static struct acache_orangefs_obj *acache_orangefs_obj; +static struct capcache_orangefs_obj *capcache_orangefs_obj; +static struct ccache_orangefs_obj *ccache_orangefs_obj; +static struct ncache_orangefs_obj *ncache_orangefs_obj; +static struct pc_orangefs_obj *pc_orangefs_obj; +static struct stats_orangefs_obj *stats_orangefs_obj; + +int orangefs_sysfs_init(void) +{ + int rc; + + gossip_debug(GOSSIP_SYSFS_DEBUG, "orangefs_sysfs_init: start\n"); + + /* create /sys/fs/orangefs. */ + orangefs_obj = kzalloc(sizeof(*orangefs_obj), GFP_KERNEL); + if (!orangefs_obj) { + rc = -EINVAL; + goto out; + } + + rc = kobject_init_and_add(&orangefs_obj->kobj, + &orangefs_ktype, + fs_kobj, + ORANGEFS_KOBJ_ID); + + if (rc) { + kobject_put(&orangefs_obj->kobj); + rc = -EINVAL; + goto out; + } + + kobject_uevent(&orangefs_obj->kobj, KOBJ_ADD); + + /* create /sys/fs/orangefs/acache. */ + acache_orangefs_obj = kzalloc(sizeof(*acache_orangefs_obj), GFP_KERNEL); + if (!acache_orangefs_obj) { + rc = -EINVAL; + goto out; + } + + rc = kobject_init_and_add(&acache_orangefs_obj->kobj, + &acache_orangefs_ktype, + &orangefs_obj->kobj, + ACACHE_KOBJ_ID); + + if (rc) { + kobject_put(&acache_orangefs_obj->kobj); + rc = -EINVAL; + goto out; + } + + kobject_uevent(&acache_orangefs_obj->kobj, KOBJ_ADD); + + /* create /sys/fs/orangefs/capcache. */ + capcache_orangefs_obj = + kzalloc(sizeof(*capcache_orangefs_obj), GFP_KERNEL); + if (!capcache_orangefs_obj) { + rc = -EINVAL; + goto out; + } + + rc = kobject_init_and_add(&capcache_orangefs_obj->kobj, + &capcache_orangefs_ktype, + &orangefs_obj->kobj, + CAPCACHE_KOBJ_ID); + if (rc) { + kobject_put(&capcache_orangefs_obj->kobj); + rc = -EINVAL; + goto out; + } + + kobject_uevent(&capcache_orangefs_obj->kobj, KOBJ_ADD); + + /* create /sys/fs/orangefs/ccache. */ + ccache_orangefs_obj = + kzalloc(sizeof(*ccache_orangefs_obj), GFP_KERNEL); + if (!ccache_orangefs_obj) { + rc = -EINVAL; + goto out; + } + + rc = kobject_init_and_add(&ccache_orangefs_obj->kobj, + &ccache_orangefs_ktype, + &orangefs_obj->kobj, + CCACHE_KOBJ_ID); + if (rc) { + kobject_put(&ccache_orangefs_obj->kobj); + rc = -EINVAL; + goto out; + } + + kobject_uevent(&ccache_orangefs_obj->kobj, KOBJ_ADD); + + /* create /sys/fs/orangefs/ncache. */ + ncache_orangefs_obj = kzalloc(sizeof(*ncache_orangefs_obj), GFP_KERNEL); + if (!ncache_orangefs_obj) { + rc = -EINVAL; + goto out; + } + + rc = kobject_init_and_add(&ncache_orangefs_obj->kobj, + &ncache_orangefs_ktype, + &orangefs_obj->kobj, + NCACHE_KOBJ_ID); + + if (rc) { + kobject_put(&ncache_orangefs_obj->kobj); + rc = -EINVAL; + goto out; + } + + kobject_uevent(&ncache_orangefs_obj->kobj, KOBJ_ADD); + + /* create /sys/fs/orangefs/perf_counters. */ + pc_orangefs_obj = kzalloc(sizeof(*pc_orangefs_obj), GFP_KERNEL); + if (!pc_orangefs_obj) { + rc = -EINVAL; + goto out; + } + + rc = kobject_init_and_add(&pc_orangefs_obj->kobj, + &pc_orangefs_ktype, + &orangefs_obj->kobj, + "perf_counters"); + + if (rc) { + kobject_put(&pc_orangefs_obj->kobj); + rc = -EINVAL; + goto out; + } + + kobject_uevent(&pc_orangefs_obj->kobj, KOBJ_ADD); + + /* create /sys/fs/orangefs/stats. */ + stats_orangefs_obj = kzalloc(sizeof(*stats_orangefs_obj), GFP_KERNEL); + if (!stats_orangefs_obj) { + rc = -EINVAL; + goto out; + } + + rc = kobject_init_and_add(&stats_orangefs_obj->kobj, + &stats_orangefs_ktype, + &orangefs_obj->kobj, + STATS_KOBJ_ID); + + if (rc) { + kobject_put(&stats_orangefs_obj->kobj); + rc = -EINVAL; + goto out; + } + + kobject_uevent(&stats_orangefs_obj->kobj, KOBJ_ADD); +out: + return rc; +} + +void orangefs_sysfs_exit(void) +{ + gossip_debug(GOSSIP_SYSFS_DEBUG, "orangefs_sysfs_exit: start\n"); + + kobject_put(&acache_orangefs_obj->kobj); + kobject_put(&capcache_orangefs_obj->kobj); + kobject_put(&ccache_orangefs_obj->kobj); + kobject_put(&ncache_orangefs_obj->kobj); + kobject_put(&pc_orangefs_obj->kobj); + kobject_put(&stats_orangefs_obj->kobj); + + kobject_put(&orangefs_obj->kobj); +} diff --git a/fs/orangefs/pvfs2-utils.c b/fs/orangefs/pvfs2-utils.c new file mode 100644 index 000000000000..107f425d2e90 --- /dev/null +++ b/fs/orangefs/pvfs2-utils.c @@ -0,0 +1,1128 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ +#include "protocol.h" +#include "pvfs2-kernel.h" +#include "pvfs2-dev-proto.h" +#include "pvfs2-bufmap.h" + +__s32 fsid_of_op(struct pvfs2_kernel_op_s *op) +{ + __s32 fsid = PVFS_FS_ID_NULL; + + if (op) { + switch (op->upcall.type) { + case PVFS2_VFS_OP_FILE_IO: + fsid = op->upcall.req.io.refn.fs_id; + break; + case PVFS2_VFS_OP_LOOKUP: + fsid = op->upcall.req.lookup.parent_refn.fs_id; + break; + case PVFS2_VFS_OP_CREATE: + fsid = op->upcall.req.create.parent_refn.fs_id; + break; + case PVFS2_VFS_OP_GETATTR: + fsid = op->upcall.req.getattr.refn.fs_id; + break; + case PVFS2_VFS_OP_REMOVE: + fsid = op->upcall.req.remove.parent_refn.fs_id; + break; + case PVFS2_VFS_OP_MKDIR: + fsid = op->upcall.req.mkdir.parent_refn.fs_id; + break; + case PVFS2_VFS_OP_READDIR: + fsid = op->upcall.req.readdir.refn.fs_id; + break; + case PVFS2_VFS_OP_SETATTR: + fsid = op->upcall.req.setattr.refn.fs_id; + break; + case PVFS2_VFS_OP_SYMLINK: + fsid = op->upcall.req.sym.parent_refn.fs_id; + break; + case PVFS2_VFS_OP_RENAME: + fsid = op->upcall.req.rename.old_parent_refn.fs_id; + break; + case PVFS2_VFS_OP_STATFS: + fsid = op->upcall.req.statfs.fs_id; + break; + case PVFS2_VFS_OP_TRUNCATE: + fsid = op->upcall.req.truncate.refn.fs_id; + break; + case PVFS2_VFS_OP_MMAP_RA_FLUSH: + fsid = op->upcall.req.ra_cache_flush.refn.fs_id; + break; + case PVFS2_VFS_OP_FS_UMOUNT: + fsid = op->upcall.req.fs_umount.fs_id; + break; + case PVFS2_VFS_OP_GETXATTR: + fsid = op->upcall.req.getxattr.refn.fs_id; + break; + case PVFS2_VFS_OP_SETXATTR: + fsid = op->upcall.req.setxattr.refn.fs_id; + break; + case PVFS2_VFS_OP_LISTXATTR: + fsid = op->upcall.req.listxattr.refn.fs_id; + break; + case PVFS2_VFS_OP_REMOVEXATTR: + fsid = op->upcall.req.removexattr.refn.fs_id; + break; + case PVFS2_VFS_OP_FSYNC: + fsid = op->upcall.req.fsync.refn.fs_id; + break; + default: + break; + } + } + return fsid; +} + +static void pvfs2_set_inode_flags(struct inode *inode, + struct PVFS_sys_attr_s *attrs) +{ + if (attrs->flags & PVFS_IMMUTABLE_FL) + inode->i_flags |= S_IMMUTABLE; + else + inode->i_flags &= ~S_IMMUTABLE; + + if (attrs->flags & PVFS_APPEND_FL) + inode->i_flags |= S_APPEND; + else + inode->i_flags &= ~S_APPEND; + + if (attrs->flags & PVFS_NOATIME_FL) + inode->i_flags |= S_NOATIME; + else + inode->i_flags &= ~S_NOATIME; + +} + +/* NOTE: symname is ignored unless the inode is a sym link */ +static int copy_attributes_to_inode(struct inode *inode, + struct PVFS_sys_attr_s *attrs, + char *symname) +{ + int ret = -1; + int perm_mode = 0; + struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); + loff_t inode_size = 0; + loff_t rounded_up_size = 0; + + + /* + arbitrarily set the inode block size; FIXME: we need to + resolve the difference between the reported inode blocksize + and the PAGE_CACHE_SIZE, since our block count will always + be wrong. + + For now, we're setting the block count to be the proper + number assuming the block size is 512 bytes, and the size is + rounded up to the nearest 4K. This is apparently required + to get proper size reports from the 'du' shell utility. + + changing the inode->i_blkbits to something other than + PAGE_CACHE_SHIFT breaks mmap/execution as we depend on that. + */ + gossip_debug(GOSSIP_UTILS_DEBUG, + "attrs->mask = %x (objtype = %s)\n", + attrs->mask, + attrs->objtype == PVFS_TYPE_METAFILE ? "file" : + attrs->objtype == PVFS_TYPE_DIRECTORY ? "directory" : + attrs->objtype == PVFS_TYPE_SYMLINK ? "symlink" : + "invalid/unknown"); + + switch (attrs->objtype) { + case PVFS_TYPE_METAFILE: + pvfs2_set_inode_flags(inode, attrs); + if (attrs->mask & PVFS_ATTR_SYS_SIZE) { + inode_size = (loff_t) attrs->size; + rounded_up_size = + (inode_size + (4096 - (inode_size % 4096))); + + pvfs2_lock_inode(inode); + inode->i_bytes = inode_size; + inode->i_blocks = + (unsigned long)(rounded_up_size / 512); + pvfs2_unlock_inode(inode); + + /* + * NOTE: make sure all the places we're called + * from have the inode->i_sem lock. We're fine + * in 99% of the cases since we're mostly + * called from a lookup. + */ + inode->i_size = inode_size; + } + break; + case PVFS_TYPE_SYMLINK: + if (symname != NULL) { + inode->i_size = (loff_t) strlen(symname); + break; + } + /*FALLTHRU*/ + default: + pvfs2_lock_inode(inode); + inode->i_bytes = PAGE_CACHE_SIZE; + inode->i_blocks = (unsigned long)(PAGE_CACHE_SIZE / 512); + pvfs2_unlock_inode(inode); + + inode->i_size = PAGE_CACHE_SIZE; + break; + } + + inode->i_uid = make_kuid(&init_user_ns, attrs->owner); + inode->i_gid = make_kgid(&init_user_ns, attrs->group); + inode->i_atime.tv_sec = (time_t) attrs->atime; + inode->i_mtime.tv_sec = (time_t) attrs->mtime; + inode->i_ctime.tv_sec = (time_t) attrs->ctime; + inode->i_atime.tv_nsec = 0; + inode->i_mtime.tv_nsec = 0; + inode->i_ctime.tv_nsec = 0; + + if (attrs->perms & PVFS_O_EXECUTE) + perm_mode |= S_IXOTH; + if (attrs->perms & PVFS_O_WRITE) + perm_mode |= S_IWOTH; + if (attrs->perms & PVFS_O_READ) + perm_mode |= S_IROTH; + + if (attrs->perms & PVFS_G_EXECUTE) + perm_mode |= S_IXGRP; + if (attrs->perms & PVFS_G_WRITE) + perm_mode |= S_IWGRP; + if (attrs->perms & PVFS_G_READ) + perm_mode |= S_IRGRP; + + if (attrs->perms & PVFS_U_EXECUTE) + perm_mode |= S_IXUSR; + if (attrs->perms & PVFS_U_WRITE) + perm_mode |= S_IWUSR; + if (attrs->perms & PVFS_U_READ) + perm_mode |= S_IRUSR; + + if (attrs->perms & PVFS_G_SGID) + perm_mode |= S_ISGID; + if (attrs->perms & PVFS_U_SUID) + perm_mode |= S_ISUID; + + inode->i_mode = perm_mode; + + if (is_root_handle(inode)) { + /* special case: mark the root inode as sticky */ + inode->i_mode |= S_ISVTX; + gossip_debug(GOSSIP_UTILS_DEBUG, + "Marking inode %pU as sticky\n", + get_khandle_from_ino(inode)); + } + + switch (attrs->objtype) { + case PVFS_TYPE_METAFILE: + inode->i_mode |= S_IFREG; + ret = 0; + break; + case PVFS_TYPE_DIRECTORY: + inode->i_mode |= S_IFDIR; + /* NOTE: we have no good way to keep nlink consistent + * for directories across clients; keep constant at 1. + * Why 1? If we go with 2, then find(1) gets confused + * and won't work properly withouth the -noleaf option + */ + set_nlink(inode, 1); + ret = 0; + break; + case PVFS_TYPE_SYMLINK: + inode->i_mode |= S_IFLNK; + + /* copy link target to inode private data */ + if (pvfs2_inode && symname) { + strncpy(pvfs2_inode->link_target, + symname, + PVFS_NAME_MAX); + gossip_debug(GOSSIP_UTILS_DEBUG, + "Copied attr link target %s\n", + pvfs2_inode->link_target); + } + gossip_debug(GOSSIP_UTILS_DEBUG, + "symlink mode %o\n", + inode->i_mode); + ret = 0; + break; + default: + gossip_err("pvfs2: copy_attributes_to_inode: got invalid attribute type %x\n", + attrs->objtype); + } + + gossip_debug(GOSSIP_UTILS_DEBUG, + "pvfs2: copy_attributes_to_inode: setting i_mode to %o, i_size to %lu\n", + inode->i_mode, + (unsigned long)i_size_read(inode)); + + return ret; +} + +/* + * NOTE: in kernel land, we never use the sys_attr->link_target for + * anything, so don't bother copying it into the sys_attr object here. + */ +static inline int copy_attributes_from_inode(struct inode *inode, + struct PVFS_sys_attr_s *attrs, + struct iattr *iattr) +{ + umode_t tmp_mode; + + if (!iattr || !inode || !attrs) { + gossip_err("NULL iattr (%p), inode (%p), attrs (%p) " + "in copy_attributes_from_inode!\n", + iattr, + inode, + attrs); + return -EINVAL; + } + /* + * We need to be careful to only copy the attributes out of the + * iattr object that we know are valid. + */ + attrs->mask = 0; + if (iattr->ia_valid & ATTR_UID) { + attrs->owner = from_kuid(current_user_ns(), iattr->ia_uid); + attrs->mask |= PVFS_ATTR_SYS_UID; + gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner); + } + if (iattr->ia_valid & ATTR_GID) { + attrs->group = from_kgid(current_user_ns(), iattr->ia_gid); + attrs->mask |= PVFS_ATTR_SYS_GID; + gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group); + } + + if (iattr->ia_valid & ATTR_ATIME) { + attrs->mask |= PVFS_ATTR_SYS_ATIME; + if (iattr->ia_valid & ATTR_ATIME_SET) { + attrs->atime = + pvfs2_convert_time_field((void *)&iattr->ia_atime); + attrs->mask |= PVFS_ATTR_SYS_ATIME_SET; + } + } + if (iattr->ia_valid & ATTR_MTIME) { + attrs->mask |= PVFS_ATTR_SYS_MTIME; + if (iattr->ia_valid & ATTR_MTIME_SET) { + attrs->mtime = + pvfs2_convert_time_field((void *)&iattr->ia_mtime); + attrs->mask |= PVFS_ATTR_SYS_MTIME_SET; + } + } + if (iattr->ia_valid & ATTR_CTIME) + attrs->mask |= PVFS_ATTR_SYS_CTIME; + + /* + * PVFS2 cannot set size with a setattr operation. Probably not likely + * to be requested through the VFS, but just in case, don't worry about + * ATTR_SIZE + */ + + if (iattr->ia_valid & ATTR_MODE) { + tmp_mode = iattr->ia_mode; + if (tmp_mode & (S_ISVTX)) { + if (is_root_handle(inode)) { + /* + * allow sticky bit to be set on root (since + * it shows up that way by default anyhow), + * but don't show it to the server + */ + tmp_mode -= S_ISVTX; + } else { + gossip_debug(GOSSIP_UTILS_DEBUG, + "User attempted to set sticky bit on non-root directory; returning EINVAL.\n"); + return -EINVAL; + } + } + + if (tmp_mode & (S_ISUID)) { + gossip_debug(GOSSIP_UTILS_DEBUG, + "Attempting to set setuid bit (not supported); returning EINVAL.\n"); + return -EINVAL; + } + + attrs->perms = PVFS_util_translate_mode(tmp_mode); + attrs->mask |= PVFS_ATTR_SYS_PERM; + } + + return 0; +} + +/* + * issues a pvfs2 getattr request and fills in the appropriate inode + * attributes if successful. returns 0 on success; -errno otherwise + */ +int pvfs2_inode_getattr(struct inode *inode, __u32 getattr_mask) +{ + struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); + struct pvfs2_kernel_op_s *new_op; + int ret = -EINVAL; + + gossip_debug(GOSSIP_UTILS_DEBUG, + "%s: called on inode %pU\n", + __func__, + get_khandle_from_ino(inode)); + + new_op = op_alloc(PVFS2_VFS_OP_GETATTR); + if (!new_op) + return -ENOMEM; + new_op->upcall.req.getattr.refn = pvfs2_inode->refn; + new_op->upcall.req.getattr.mask = getattr_mask; + + ret = service_operation(new_op, __func__, + get_interruptible_flag(inode)); + if (ret != 0) + goto out; + + if (copy_attributes_to_inode(inode, + &new_op->downcall.resp.getattr.attributes, + new_op->downcall.resp.getattr.link_target)) { + gossip_err("%s: failed to copy attributes\n", __func__); + ret = -ENOENT; + goto out; + } + + /* + * Store blksize in pvfs2 specific part of inode structure; we are + * only going to use this to report to stat to make sure it doesn't + * perturb any inode related code paths. + */ + if (new_op->downcall.resp.getattr.attributes.objtype == + PVFS_TYPE_METAFILE) { + pvfs2_inode->blksize = + new_op->downcall.resp.getattr.attributes.blksize; + } else { + /* mimic behavior of generic_fillattr() for other types. */ + pvfs2_inode->blksize = (1 << inode->i_blkbits); + + } + +out: + gossip_debug(GOSSIP_UTILS_DEBUG, + "Getattr on handle %pU, " + "fsid %d\n (inode ct = %d) returned %d\n", + &pvfs2_inode->refn.khandle, + pvfs2_inode->refn.fs_id, + (int)atomic_read(&inode->i_count), + ret); + + op_release(new_op); + return ret; +} + +/* + * issues a pvfs2 setattr request to make sure the new attribute values + * take effect if successful. returns 0 on success; -errno otherwise + */ +int pvfs2_inode_setattr(struct inode *inode, struct iattr *iattr) +{ + struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); + struct pvfs2_kernel_op_s *new_op; + int ret; + + new_op = op_alloc(PVFS2_VFS_OP_SETATTR); + if (!new_op) + return -ENOMEM; + + new_op->upcall.req.setattr.refn = pvfs2_inode->refn; + ret = copy_attributes_from_inode(inode, + &new_op->upcall.req.setattr.attributes, + iattr); + if (ret < 0) { + op_release(new_op); + return ret; + } + + ret = service_operation(new_op, __func__, + get_interruptible_flag(inode)); + + gossip_debug(GOSSIP_UTILS_DEBUG, + "pvfs2_inode_setattr: returning %d\n", + ret); + + /* when request is serviced properly, free req op struct */ + op_release(new_op); + + /* + * successful setattr should clear the atime, mtime and + * ctime flags. + */ + if (ret == 0) { + ClearAtimeFlag(pvfs2_inode); + ClearMtimeFlag(pvfs2_inode); + ClearCtimeFlag(pvfs2_inode); + ClearModeFlag(pvfs2_inode); + } + + return ret; +} + +int pvfs2_flush_inode(struct inode *inode) +{ + /* + * If it is a dirty inode, this function gets called. + * Gather all the information that needs to be setattr'ed + * Right now, this will only be used for mode, atime, mtime + * and/or ctime. + */ + struct iattr wbattr; + int ret; + int mtime_flag; + int ctime_flag; + int atime_flag; + int mode_flag; + struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); + + memset(&wbattr, 0, sizeof(wbattr)); + + /* + * check inode flags up front, and clear them if they are set. This + * will prevent multiple processes from all trying to flush the same + * inode if they call close() simultaneously + */ + mtime_flag = MtimeFlag(pvfs2_inode); + ClearMtimeFlag(pvfs2_inode); + ctime_flag = CtimeFlag(pvfs2_inode); + ClearCtimeFlag(pvfs2_inode); + atime_flag = AtimeFlag(pvfs2_inode); + ClearAtimeFlag(pvfs2_inode); + mode_flag = ModeFlag(pvfs2_inode); + ClearModeFlag(pvfs2_inode); + + /* -- Lazy atime,mtime and ctime update -- + * Note: all times are dictated by server in the new scheme + * and not by the clients + * + * Also mode updates are being handled now.. + */ + + if (mtime_flag) + wbattr.ia_valid |= ATTR_MTIME; + if (ctime_flag) + wbattr.ia_valid |= ATTR_CTIME; + if (atime_flag) + wbattr.ia_valid |= ATTR_ATIME; + + if (mode_flag) { + wbattr.ia_mode = inode->i_mode; + wbattr.ia_valid |= ATTR_MODE; + } + + gossip_debug(GOSSIP_UTILS_DEBUG, + "*********** pvfs2_flush_inode: %pU " + "(ia_valid %d)\n", + get_khandle_from_ino(inode), + wbattr.ia_valid); + if (wbattr.ia_valid == 0) { + gossip_debug(GOSSIP_UTILS_DEBUG, + "pvfs2_flush_inode skipping setattr()\n"); + return 0; + } + + gossip_debug(GOSSIP_UTILS_DEBUG, + "pvfs2_flush_inode (%pU) writing mode %o\n", + get_khandle_from_ino(inode), + inode->i_mode); + + ret = pvfs2_inode_setattr(inode, &wbattr); + + return ret; +} + +int pvfs2_unmount_sb(struct super_block *sb) +{ + int ret = -EINVAL; + struct pvfs2_kernel_op_s *new_op = NULL; + + gossip_debug(GOSSIP_UTILS_DEBUG, + "pvfs2_unmount_sb called on sb %p\n", + sb); + + new_op = op_alloc(PVFS2_VFS_OP_FS_UMOUNT); + if (!new_op) + return -ENOMEM; + new_op->upcall.req.fs_umount.id = PVFS2_SB(sb)->id; + new_op->upcall.req.fs_umount.fs_id = PVFS2_SB(sb)->fs_id; + strncpy(new_op->upcall.req.fs_umount.pvfs2_config_server, + PVFS2_SB(sb)->devname, + PVFS_MAX_SERVER_ADDR_LEN); + + gossip_debug(GOSSIP_UTILS_DEBUG, + "Attempting PVFS2 Unmount via host %s\n", + new_op->upcall.req.fs_umount.pvfs2_config_server); + + ret = service_operation(new_op, "pvfs2_fs_umount", 0); + + gossip_debug(GOSSIP_UTILS_DEBUG, + "pvfs2_unmount: got return value of %d\n", ret); + if (ret) + sb = ERR_PTR(ret); + else + PVFS2_SB(sb)->mount_pending = 1; + + op_release(new_op); + return ret; +} + +/* + * NOTE: on successful cancellation, be sure to return -EINTR, as + * that's the return value the caller expects + */ +int pvfs2_cancel_op_in_progress(__u64 tag) +{ + int ret = -EINVAL; + struct pvfs2_kernel_op_s *new_op = NULL; + + gossip_debug(GOSSIP_UTILS_DEBUG, + "pvfs2_cancel_op_in_progress called on tag %llu\n", + llu(tag)); + + new_op = op_alloc(PVFS2_VFS_OP_CANCEL); + if (!new_op) + return -ENOMEM; + new_op->upcall.req.cancel.op_tag = tag; + + gossip_debug(GOSSIP_UTILS_DEBUG, + "Attempting PVFS2 operation cancellation of tag %llu\n", + llu(new_op->upcall.req.cancel.op_tag)); + + ret = service_operation(new_op, "pvfs2_cancel", PVFS2_OP_CANCELLATION); + + gossip_debug(GOSSIP_UTILS_DEBUG, + "pvfs2_cancel_op_in_progress: got return value of %d\n", + ret); + + op_release(new_op); + return ret; +} + +void pvfs2_op_initialize(struct pvfs2_kernel_op_s *op) +{ + if (op) { + spin_lock(&op->lock); + op->io_completed = 0; + + op->upcall.type = PVFS2_VFS_OP_INVALID; + op->downcall.type = PVFS2_VFS_OP_INVALID; + op->downcall.status = -1; + + op->op_state = OP_VFS_STATE_UNKNOWN; + op->tag = 0; + spin_unlock(&op->lock); + } +} + +void pvfs2_make_bad_inode(struct inode *inode) +{ + if (is_root_handle(inode)) { + /* + * if this occurs, the pvfs2-client-core was killed but we + * can't afford to lose the inode operations and such + * associated with the root handle in any case. + */ + gossip_debug(GOSSIP_UTILS_DEBUG, + "*** NOT making bad root inode %pU\n", + get_khandle_from_ino(inode)); + } else { + gossip_debug(GOSSIP_UTILS_DEBUG, + "*** making bad inode %pU\n", + get_khandle_from_ino(inode)); + make_bad_inode(inode); + } +} + +/* this code is based on linux/net/sunrpc/clnt.c:rpc_clnt_sigmask */ +void mask_blocked_signals(sigset_t *orig_sigset) +{ + unsigned long sigallow = sigmask(SIGKILL); + unsigned long irqflags = 0; + struct k_sigaction *action = pvfs2_current_sigaction; + + sigallow |= ((action[SIGINT - 1].sa.sa_handler == SIG_DFL) ? + sigmask(SIGINT) : + 0); + sigallow |= ((action[SIGQUIT - 1].sa.sa_handler == SIG_DFL) ? + sigmask(SIGQUIT) : + 0); + + spin_lock_irqsave(&pvfs2_current_signal_lock, irqflags); + *orig_sigset = current->blocked; + siginitsetinv(¤t->blocked, sigallow & ~orig_sigset->sig[0]); + recalc_sigpending(); + spin_unlock_irqrestore(&pvfs2_current_signal_lock, irqflags); +} + +/* this code is based on linux/net/sunrpc/clnt.c:rpc_clnt_sigunmask */ +void unmask_blocked_signals(sigset_t *orig_sigset) +{ + unsigned long irqflags = 0; + + spin_lock_irqsave(&pvfs2_current_signal_lock, irqflags); + current->blocked = *orig_sigset; + recalc_sigpending(); + spin_unlock_irqrestore(&pvfs2_current_signal_lock, irqflags); +} + +__u64 pvfs2_convert_time_field(void *time_ptr) +{ + __u64 pvfs2_time; + struct timespec *tspec = (struct timespec *)time_ptr; + + pvfs2_time = (__u64) ((time_t) tspec->tv_sec); + return pvfs2_time; +} + +/* macro defined in include/pvfs2-types.h */ +DECLARE_ERRNO_MAPPING_AND_FN(); + +int pvfs2_normalize_to_errno(__s32 error_code) +{ + if (error_code > 0) { + gossip_err("pvfs2: error status receieved.\n"); + gossip_err("pvfs2: assuming error code is inverted.\n"); + error_code = -error_code; + } + + /* convert any error codes that are in pvfs2 format */ + if (IS_PVFS_NON_ERRNO_ERROR(-error_code)) { + if (PVFS_NON_ERRNO_ERROR_CODE(-error_code) == PVFS_ECANCEL) { + /* + * cancellation error codes generally correspond to + * a timeout from the client's perspective + */ + error_code = -ETIMEDOUT; + } else { + /* assume a default error code */ + gossip_err("pvfs2: warning: got error code without errno equivalent: %d.\n", + error_code); + error_code = -EINVAL; + } + } else if (IS_PVFS_ERROR(-error_code)) { + error_code = -PVFS_ERROR_TO_ERRNO(-error_code); + } + return error_code; +} + +#define NUM_MODES 11 +__s32 PVFS_util_translate_mode(int mode) +{ + int ret = 0; + int i = 0; + static int modes[NUM_MODES] = { + S_IXOTH, S_IWOTH, S_IROTH, + S_IXGRP, S_IWGRP, S_IRGRP, + S_IXUSR, S_IWUSR, S_IRUSR, + S_ISGID, S_ISUID + }; + static int pvfs2_modes[NUM_MODES] = { + PVFS_O_EXECUTE, PVFS_O_WRITE, PVFS_O_READ, + PVFS_G_EXECUTE, PVFS_G_WRITE, PVFS_G_READ, + PVFS_U_EXECUTE, PVFS_U_WRITE, PVFS_U_READ, + PVFS_G_SGID, PVFS_U_SUID + }; + + for (i = 0; i < NUM_MODES; i++) + if (mode & modes[i]) + ret |= pvfs2_modes[i]; + + return ret; +} +#undef NUM_MODES + +/* + * After obtaining a string representation of the client's debug + * keywords and their associated masks, this function is called to build an + * array of these values. + */ +int orangefs_prepare_cdm_array(char *debug_array_string) +{ + int i; + int rc = -EINVAL; + char *cds_head = NULL; + char *cds_delimiter = NULL; + int keyword_len = 0; + + gossip_debug(GOSSIP_UTILS_DEBUG, "%s: start\n", __func__); + + /* + * figure out how many elements the cdm_array needs. + */ + for (i = 0; i < strlen(debug_array_string); i++) + if (debug_array_string[i] == '\n') + cdm_element_count++; + + if (!cdm_element_count) { + pr_info("No elements in client debug array string!\n"); + goto out; + } + + cdm_array = + kzalloc(cdm_element_count * sizeof(struct client_debug_mask), + GFP_KERNEL); + if (!cdm_array) { + pr_info("malloc failed for cdm_array!\n"); + rc = -ENOMEM; + goto out; + } + + cds_head = debug_array_string; + + for (i = 0; i < cdm_element_count; i++) { + cds_delimiter = strchr(cds_head, '\n'); + *cds_delimiter = '\0'; + + keyword_len = strcspn(cds_head, " "); + + cdm_array[i].keyword = kzalloc(keyword_len + 1, GFP_KERNEL); + if (!cdm_array[i].keyword) { + rc = -ENOMEM; + goto out; + } + + sscanf(cds_head, + "%s %llx %llx", + cdm_array[i].keyword, + (unsigned long long *)&(cdm_array[i].mask1), + (unsigned long long *)&(cdm_array[i].mask2)); + + if (!strcmp(cdm_array[i].keyword, PVFS2_VERBOSE)) + client_verbose_index = i; + + if (!strcmp(cdm_array[i].keyword, PVFS2_ALL)) + client_all_index = i; + + cds_head = cds_delimiter + 1; + } + + rc = cdm_element_count; + + gossip_debug(GOSSIP_UTILS_DEBUG, "%s: rc:%d:\n", __func__, rc); + +out: + + return rc; + +} + +/* + * /sys/kernel/debug/orangefs/debug-help can be catted to + * see all the available kernel and client debug keywords. + * + * When the kernel boots, we have no idea what keywords the + * client supports, nor their associated masks. + * + * We pass through this function once at boot and stamp a + * boilerplate "we don't know" message for the client in the + * debug-help file. We pass through here again when the client + * starts and then we can fill out the debug-help file fully. + * + * The client might be restarted any number of times between + * reboots, we only build the debug-help file the first time. + */ +int orangefs_prepare_debugfs_help_string(int at_boot) +{ + int rc = -EINVAL; + int i; + int byte_count = 0; + char *client_title = "Client Debug Keywords:\n"; + char *kernel_title = "Kernel Debug Keywords:\n"; + + gossip_debug(GOSSIP_UTILS_DEBUG, "%s: start\n", __func__); + + if (at_boot) { + byte_count += strlen(HELP_STRING_UNINITIALIZED); + client_title = HELP_STRING_UNINITIALIZED; + } else { + /* + * fill the client keyword/mask array and remember + * how many elements there were. + */ + cdm_element_count = + orangefs_prepare_cdm_array(client_debug_array_string); + if (cdm_element_count <= 0) + goto out; + + /* Count the bytes destined for debug_help_string. */ + byte_count += strlen(client_title); + + for (i = 0; i < cdm_element_count; i++) { + byte_count += strlen(cdm_array[i].keyword + 2); + if (byte_count >= DEBUG_HELP_STRING_SIZE) { + pr_info("%s: overflow 1!\n", __func__); + goto out; + } + } + + gossip_debug(GOSSIP_UTILS_DEBUG, + "%s: cdm_element_count:%d:\n", + __func__, + cdm_element_count); + } + + byte_count += strlen(kernel_title); + for (i = 0; i < num_kmod_keyword_mask_map; i++) { + byte_count += + strlen(s_kmod_keyword_mask_map[i].keyword + 2); + if (byte_count >= DEBUG_HELP_STRING_SIZE) { + pr_info("%s: overflow 2!\n", __func__); + goto out; + } + } + + /* build debug_help_string. */ + debug_help_string = kzalloc(DEBUG_HELP_STRING_SIZE, GFP_KERNEL); + if (!debug_help_string) { + rc = -ENOMEM; + goto out; + } + + strcat(debug_help_string, client_title); + + if (!at_boot) { + for (i = 0; i < cdm_element_count; i++) { + strcat(debug_help_string, "\t"); + strcat(debug_help_string, cdm_array[i].keyword); + strcat(debug_help_string, "\n"); + } + } + + strcat(debug_help_string, "\n"); + strcat(debug_help_string, kernel_title); + + for (i = 0; i < num_kmod_keyword_mask_map; i++) { + strcat(debug_help_string, "\t"); + strcat(debug_help_string, s_kmod_keyword_mask_map[i].keyword); + strcat(debug_help_string, "\n"); + } + + rc = 0; + +out: + + return rc; + +} + +/* + * kernel = type 0 + * client = type 1 + */ +void debug_mask_to_string(void *mask, int type) +{ + int i; + int len = 0; + char *debug_string; + int element_count = 0; + + gossip_debug(GOSSIP_UTILS_DEBUG, "%s: start\n", __func__); + + if (type) { + debug_string = client_debug_string; + element_count = cdm_element_count; + } else { + debug_string = kernel_debug_string; + element_count = num_kmod_keyword_mask_map; + } + + memset(debug_string, 0, PVFS2_MAX_DEBUG_STRING_LEN); + + /* + * Some keywords, like "all" or "verbose", are amalgams of + * numerous other keywords. Make a special check for those + * before grinding through the whole mask only to find out + * later... + */ + if (check_amalgam_keyword(mask, type)) + goto out; + + /* Build the debug string. */ + for (i = 0; i < element_count; i++) + if (type) + do_c_string(mask, i); + else + do_k_string(mask, i); + + len = strlen(debug_string); + + if ((len) && (type)) + client_debug_string[len - 1] = '\0'; + else if (len) + kernel_debug_string[len - 1] = '\0'; + else if (type) + strcpy(client_debug_string, "none"); + else + strcpy(kernel_debug_string, "none"); + +out: +gossip_debug(GOSSIP_UTILS_DEBUG, "%s: string:%s:\n", __func__, debug_string); + + return; + +} + +void do_k_string(void *k_mask, int index) +{ + __u64 *mask = (__u64 *) k_mask; + + if (keyword_is_amalgam((char *) s_kmod_keyword_mask_map[index].keyword)) + goto out; + + if (*mask & s_kmod_keyword_mask_map[index].mask_val) { + if ((strlen(kernel_debug_string) + + strlen(s_kmod_keyword_mask_map[index].keyword)) + < PVFS2_MAX_DEBUG_STRING_LEN - 1) { + strcat(kernel_debug_string, + s_kmod_keyword_mask_map[index].keyword); + strcat(kernel_debug_string, ","); + } else { + gossip_err("%s: overflow!\n", __func__); + strcpy(kernel_debug_string, PVFS2_ALL); + goto out; + } + } + +out: + + return; +} + +void do_c_string(void *c_mask, int index) +{ + struct client_debug_mask *mask = (struct client_debug_mask *) c_mask; + + if (keyword_is_amalgam(cdm_array[index].keyword)) + goto out; + + if ((mask->mask1 & cdm_array[index].mask1) || + (mask->mask2 & cdm_array[index].mask2)) { + if ((strlen(client_debug_string) + + strlen(cdm_array[index].keyword) + 1) + < PVFS2_MAX_DEBUG_STRING_LEN - 2) { + strcat(client_debug_string, + cdm_array[index].keyword); + strcat(client_debug_string, ","); + } else { + gossip_err("%s: overflow!\n", __func__); + strcpy(client_debug_string, PVFS2_ALL); + goto out; + } + } +out: + return; +} + +int keyword_is_amalgam(char *keyword) +{ + int rc = 0; + + if ((!strcmp(keyword, PVFS2_ALL)) || (!strcmp(keyword, PVFS2_VERBOSE))) + rc = 1; + + return rc; +} + +/* + * kernel = type 0 + * client = type 1 + * + * return 1 if we found an amalgam. + */ +int check_amalgam_keyword(void *mask, int type) +{ + __u64 *k_mask; + struct client_debug_mask *c_mask; + int k_all_index = num_kmod_keyword_mask_map - 1; + int rc = 0; + + if (type) { + c_mask = (struct client_debug_mask *) mask; + + if ((c_mask->mask1 == cdm_array[client_all_index].mask1) && + (c_mask->mask2 == cdm_array[client_all_index].mask2)) { + strcpy(client_debug_string, PVFS2_ALL); + rc = 1; + goto out; + } + + if ((c_mask->mask1 == cdm_array[client_verbose_index].mask1) && + (c_mask->mask2 == cdm_array[client_verbose_index].mask2)) { + strcpy(client_debug_string, PVFS2_VERBOSE); + rc = 1; + goto out; + } + + } else { + k_mask = (__u64 *) mask; + + if (*k_mask >= s_kmod_keyword_mask_map[k_all_index].mask_val) { + strcpy(kernel_debug_string, PVFS2_ALL); + rc = 1; + goto out; + } + } + +out: + + return rc; +} + +/* + * kernel = type 0 + * client = type 1 + */ +void debug_string_to_mask(char *debug_string, void *mask, int type) +{ + char *unchecked_keyword; + int i; + char *strsep_fodder = kstrdup(debug_string, GFP_KERNEL); + int element_count = 0; + struct client_debug_mask *c_mask; + __u64 *k_mask; + + gossip_debug(GOSSIP_UTILS_DEBUG, "%s: start\n", __func__); + + if (type) { + c_mask = (struct client_debug_mask *)mask; + element_count = cdm_element_count; + } else { + k_mask = (__u64 *)mask; + *k_mask = 0; + element_count = num_kmod_keyword_mask_map; + } + + while ((unchecked_keyword = strsep(&strsep_fodder, ","))) + if (strlen(unchecked_keyword)) { + for (i = 0; i < element_count; i++) + if (type) + do_c_mask(i, + unchecked_keyword, + &c_mask); + else + do_k_mask(i, + unchecked_keyword, + &k_mask); + } + + kfree(strsep_fodder); +} + +void do_c_mask(int i, + char *unchecked_keyword, + struct client_debug_mask **sane_mask) +{ + + if (!strcmp(cdm_array[i].keyword, unchecked_keyword)) { + (**sane_mask).mask1 = (**sane_mask).mask1 | cdm_array[i].mask1; + (**sane_mask).mask2 = (**sane_mask).mask2 | cdm_array[i].mask2; + } +} + +void do_k_mask(int i, char *unchecked_keyword, __u64 **sane_mask) +{ + + if (!strcmp(s_kmod_keyword_mask_map[i].keyword, unchecked_keyword)) + **sane_mask = (**sane_mask) | + s_kmod_keyword_mask_map[i].mask_val; +} From 1182fca3bc00441d5b2dee2f0548a3b7f978f9e7 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Fri, 17 Jul 2015 10:38:15 -0400 Subject: [PATCH 005/174] Orangefs: kernel client part 5 Signed-off-by: Mike Marshall --- fs/orangefs/super.c | 558 ++++++++++++++++++++++++++++++++++++++++ fs/orangefs/symlink.c | 31 +++ fs/orangefs/waitqueue.c | 522 +++++++++++++++++++++++++++++++++++++ fs/orangefs/xattr.c | 532 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 1643 insertions(+) create mode 100644 fs/orangefs/super.c create mode 100644 fs/orangefs/symlink.c create mode 100644 fs/orangefs/waitqueue.c create mode 100644 fs/orangefs/xattr.c diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c new file mode 100644 index 000000000000..a854390fc0ea --- /dev/null +++ b/fs/orangefs/super.c @@ -0,0 +1,558 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#include "protocol.h" +#include "pvfs2-kernel.h" +#include "pvfs2-bufmap.h" + +#include + +/* a cache for pvfs2-inode objects (i.e. pvfs2 inode private data) */ +static struct kmem_cache *pvfs2_inode_cache; + +/* list for storing pvfs2 specific superblocks in use */ +LIST_HEAD(pvfs2_superblocks); + +DEFINE_SPINLOCK(pvfs2_superblocks_lock); + +enum { + Opt_intr, + Opt_acl, + Opt_local_lock, + + Opt_err +}; + +static const match_table_t tokens = { + { Opt_acl, "acl" }, + { Opt_intr, "intr" }, + { Opt_local_lock, "local_lock" }, + { Opt_err, NULL } +}; + + +static int parse_mount_options(struct super_block *sb, char *options, + int silent) +{ + struct pvfs2_sb_info_s *pvfs2_sb = PVFS2_SB(sb); + substring_t args[MAX_OPT_ARGS]; + char *p; + + /* + * Force any potential flags that might be set from the mount + * to zero, ie, initialize to unset. + */ + sb->s_flags &= ~MS_POSIXACL; + pvfs2_sb->flags &= ~PVFS2_OPT_INTR; + pvfs2_sb->flags &= ~PVFS2_OPT_LOCAL_LOCK; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_acl: + sb->s_flags |= MS_POSIXACL; + break; + case Opt_intr: + pvfs2_sb->flags |= PVFS2_OPT_INTR; + break; + case Opt_local_lock: + pvfs2_sb->flags |= PVFS2_OPT_LOCAL_LOCK; + break; + default: + goto fail; + } + } + + return 0; +fail: + if (!silent) + gossip_err("Error: mount option [%s] is not supported.\n", p); + return -EINVAL; +} + +static void pvfs2_inode_cache_ctor(void *req) +{ + struct pvfs2_inode_s *pvfs2_inode = req; + + inode_init_once(&pvfs2_inode->vfs_inode); + init_rwsem(&pvfs2_inode->xattr_sem); + + pvfs2_inode->vfs_inode.i_version = 1; +} + +static struct inode *pvfs2_alloc_inode(struct super_block *sb) +{ + struct pvfs2_inode_s *pvfs2_inode; + + pvfs2_inode = kmem_cache_alloc(pvfs2_inode_cache, + PVFS2_CACHE_ALLOC_FLAGS); + if (pvfs2_inode == NULL) { + gossip_err("Failed to allocate pvfs2_inode\n"); + return NULL; + } + + /* + * We want to clear everything except for rw_semaphore and the + * vfs_inode. + */ + memset(&pvfs2_inode->refn.khandle, 0, 16); + pvfs2_inode->refn.fs_id = PVFS_FS_ID_NULL; + pvfs2_inode->last_failed_block_index_read = 0; + memset(pvfs2_inode->link_target, 0, sizeof(pvfs2_inode->link_target)); + pvfs2_inode->pinode_flags = 0; + + gossip_debug(GOSSIP_SUPER_DEBUG, + "pvfs2_alloc_inode: allocated %p\n", + &pvfs2_inode->vfs_inode); + return &pvfs2_inode->vfs_inode; +} + +static void pvfs2_destroy_inode(struct inode *inode) +{ + struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); + + gossip_debug(GOSSIP_SUPER_DEBUG, + "%s: deallocated %p destroying inode %pU\n", + __func__, pvfs2_inode, get_khandle_from_ino(inode)); + + kmem_cache_free(pvfs2_inode_cache, pvfs2_inode); +} + +/* + * NOTE: information filled in here is typically reflected in the + * output of the system command 'df' +*/ +static int pvfs2_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + int ret = -ENOMEM; + struct pvfs2_kernel_op_s *new_op = NULL; + int flags = 0; + struct super_block *sb = NULL; + + sb = dentry->d_sb; + + gossip_debug(GOSSIP_SUPER_DEBUG, + "pvfs2_statfs: called on sb %p (fs_id is %d)\n", + sb, + (int)(PVFS2_SB(sb)->fs_id)); + + new_op = op_alloc(PVFS2_VFS_OP_STATFS); + if (!new_op) + return ret; + new_op->upcall.req.statfs.fs_id = PVFS2_SB(sb)->fs_id; + + if (PVFS2_SB(sb)->flags & PVFS2_OPT_INTR) + flags = PVFS2_OP_INTERRUPTIBLE; + + ret = service_operation(new_op, "pvfs2_statfs", flags); + + if (new_op->downcall.status < 0) + goto out_op_release; + + gossip_debug(GOSSIP_SUPER_DEBUG, + "pvfs2_statfs: got %ld blocks available | " + "%ld blocks total | %ld block size\n", + (long)new_op->downcall.resp.statfs.blocks_avail, + (long)new_op->downcall.resp.statfs.blocks_total, + (long)new_op->downcall.resp.statfs.block_size); + + buf->f_type = sb->s_magic; + memcpy(&buf->f_fsid, &PVFS2_SB(sb)->fs_id, sizeof(buf->f_fsid)); + buf->f_bsize = new_op->downcall.resp.statfs.block_size; + buf->f_namelen = PVFS2_NAME_LEN; + + buf->f_blocks = (sector_t) new_op->downcall.resp.statfs.blocks_total; + buf->f_bfree = (sector_t) new_op->downcall.resp.statfs.blocks_avail; + buf->f_bavail = (sector_t) new_op->downcall.resp.statfs.blocks_avail; + buf->f_files = (sector_t) new_op->downcall.resp.statfs.files_total; + buf->f_ffree = (sector_t) new_op->downcall.resp.statfs.files_avail; + buf->f_frsize = sb->s_blocksize; + +out_op_release: + op_release(new_op); + gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_statfs: returning %d\n", ret); + return ret; +} + +/* + * Remount as initiated by VFS layer. We just need to reparse the mount + * options, no need to signal pvfs2-client-core about it. + */ +static int pvfs2_remount_fs(struct super_block *sb, int *flags, char *data) +{ + gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_remount_fs: called\n"); + return parse_mount_options(sb, data, 1); +} + +/* + * Remount as initiated by pvfs2-client-core on restart. This is used to + * repopulate mount information left from previous pvfs2-client-core. + * + * the idea here is that given a valid superblock, we're + * re-initializing the user space client with the initial mount + * information specified when the super block was first initialized. + * this is very different than the first initialization/creation of a + * superblock. we use the special service_priority_operation to make + * sure that the mount gets ahead of any other pending operation that + * is waiting for servicing. this means that the pvfs2-client won't + * fail to start several times for all other pending operations before + * the client regains all of the mount information from us. + * NOTE: this function assumes that the request_mutex is already acquired! + */ +int pvfs2_remount(struct super_block *sb) +{ + struct pvfs2_kernel_op_s *new_op; + int ret = -EINVAL; + + gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_remount: called\n"); + + new_op = op_alloc(PVFS2_VFS_OP_FS_MOUNT); + if (!new_op) + return -ENOMEM; + strncpy(new_op->upcall.req.fs_mount.pvfs2_config_server, + PVFS2_SB(sb)->devname, + PVFS_MAX_SERVER_ADDR_LEN); + + gossip_debug(GOSSIP_SUPER_DEBUG, + "Attempting PVFS2 Remount via host %s\n", + new_op->upcall.req.fs_mount.pvfs2_config_server); + + /* + * we assume that the calling function has already acquire the + * request_mutex to prevent other operations from bypassing + * this one + */ + ret = service_operation(new_op, "pvfs2_remount", + PVFS2_OP_PRIORITY | PVFS2_OP_NO_SEMAPHORE); + gossip_debug(GOSSIP_SUPER_DEBUG, + "pvfs2_remount: mount got return value of %d\n", + ret); + if (ret == 0) { + /* + * store the id assigned to this sb -- it's just a + * short-lived mapping that the system interface uses + * to map this superblock to a particular mount entry + */ + PVFS2_SB(sb)->id = new_op->downcall.resp.fs_mount.id; + PVFS2_SB(sb)->mount_pending = 0; + } + + op_release(new_op); + return ret; +} + +int fsid_key_table_initialize(void) +{ + return 0; +} + +void fsid_key_table_finalize(void) +{ +} + +/* Called whenever the VFS dirties the inode in response to atime updates */ +static void pvfs2_dirty_inode(struct inode *inode, int flags) +{ + struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); + + gossip_debug(GOSSIP_SUPER_DEBUG, + "pvfs2_dirty_inode: %pU\n", + get_khandle_from_ino(inode)); + SetAtimeFlag(pvfs2_inode); +} + +struct super_operations pvfs2_s_ops = { + .alloc_inode = pvfs2_alloc_inode, + .destroy_inode = pvfs2_destroy_inode, + .dirty_inode = pvfs2_dirty_inode, + .drop_inode = generic_delete_inode, + .statfs = pvfs2_statfs, + .remount_fs = pvfs2_remount_fs, + .show_options = generic_show_options, +}; + +struct dentry *pvfs2_fh_to_dentry(struct super_block *sb, + struct fid *fid, + int fh_len, + int fh_type) +{ + struct pvfs2_object_kref refn; + + if (fh_len < 5 || fh_type > 2) + return NULL; + + PVFS_khandle_from(&(refn.khandle), fid->raw, 16); + refn.fs_id = (u32) fid->raw[4]; + gossip_debug(GOSSIP_SUPER_DEBUG, + "fh_to_dentry: handle %pU, fs_id %d\n", + &refn.khandle, + refn.fs_id); + + return d_obtain_alias(pvfs2_iget(sb, &refn)); +} + +int pvfs2_encode_fh(struct inode *inode, + __u32 *fh, + int *max_len, + struct inode *parent) +{ + int len = parent ? 10 : 5; + int type = 1; + struct pvfs2_object_kref refn; + + if (*max_len < len) { + gossip_lerr("fh buffer is too small for encoding\n"); + *max_len = len; + type = 255; + goto out; + } + + refn = PVFS2_I(inode)->refn; + PVFS_khandle_to(&refn.khandle, fh, 16); + fh[4] = refn.fs_id; + + gossip_debug(GOSSIP_SUPER_DEBUG, + "Encoding fh: handle %pU, fsid %u\n", + &refn.khandle, + refn.fs_id); + + + if (parent) { + refn = PVFS2_I(parent)->refn; + PVFS_khandle_to(&refn.khandle, (char *) fh + 20, 16); + fh[9] = refn.fs_id; + + type = 2; + gossip_debug(GOSSIP_SUPER_DEBUG, + "Encoding parent: handle %pU, fsid %u\n", + &refn.khandle, + refn.fs_id); + } + *max_len = len; + +out: + return type; +} + +static struct export_operations pvfs2_export_ops = { + .encode_fh = pvfs2_encode_fh, + .fh_to_dentry = pvfs2_fh_to_dentry, +}; + +int pvfs2_fill_sb(struct super_block *sb, void *data, int silent) +{ + int ret = -EINVAL; + struct inode *root = NULL; + struct dentry *root_dentry = NULL; + struct pvfs2_mount_sb_info_s *mount_sb_info = + (struct pvfs2_mount_sb_info_s *) data; + struct pvfs2_object_kref root_object; + + /* alloc and init our private pvfs2 sb info */ + sb->s_fs_info = + kmalloc(sizeof(struct pvfs2_sb_info_s), PVFS2_GFP_FLAGS); + if (!PVFS2_SB(sb)) + return -ENOMEM; + memset(sb->s_fs_info, 0, sizeof(struct pvfs2_sb_info_s)); + PVFS2_SB(sb)->sb = sb; + + PVFS2_SB(sb)->root_khandle = mount_sb_info->root_khandle; + PVFS2_SB(sb)->fs_id = mount_sb_info->fs_id; + PVFS2_SB(sb)->id = mount_sb_info->id; + + if (mount_sb_info->data) { + ret = parse_mount_options(sb, mount_sb_info->data, + silent); + if (ret) + return ret; + } + + /* Hang the xattr handlers off the superblock */ + sb->s_xattr = pvfs2_xattr_handlers; + sb->s_magic = PVFS2_SUPER_MAGIC; + sb->s_op = &pvfs2_s_ops; + sb->s_d_op = &pvfs2_dentry_operations; + + sb->s_blocksize = pvfs_bufmap_size_query(); + sb->s_blocksize_bits = pvfs_bufmap_shift_query(); + sb->s_maxbytes = MAX_LFS_FILESIZE; + + root_object.khandle = PVFS2_SB(sb)->root_khandle; + root_object.fs_id = PVFS2_SB(sb)->fs_id; + gossip_debug(GOSSIP_SUPER_DEBUG, + "get inode %pU, fsid %d\n", + &root_object.khandle, + root_object.fs_id); + + root = pvfs2_iget(sb, &root_object); + if (IS_ERR(root)) + return PTR_ERR(root); + + gossip_debug(GOSSIP_SUPER_DEBUG, + "Allocated root inode [%p] with mode %x\n", + root, + root->i_mode); + + /* allocates and places root dentry in dcache */ + root_dentry = d_make_root(root); + if (!root_dentry) { + iput(root); + return -ENOMEM; + } + + sb->s_export_op = &pvfs2_export_ops; + sb->s_root = root_dentry; + return 0; +} + +struct dentry *pvfs2_mount(struct file_system_type *fst, + int flags, + const char *devname, + void *data) +{ + int ret = -EINVAL; + struct super_block *sb = ERR_PTR(-EINVAL); + struct pvfs2_kernel_op_s *new_op; + struct pvfs2_mount_sb_info_s mount_sb_info; + struct dentry *mnt_sb_d = ERR_PTR(-EINVAL); + + gossip_debug(GOSSIP_SUPER_DEBUG, + "pvfs2_mount: called with devname %s\n", + devname); + + if (!devname) { + gossip_err("ERROR: device name not specified.\n"); + return ERR_PTR(-EINVAL); + } + + new_op = op_alloc(PVFS2_VFS_OP_FS_MOUNT); + if (!new_op) + return ERR_PTR(-ENOMEM); + + strncpy(new_op->upcall.req.fs_mount.pvfs2_config_server, + devname, + PVFS_MAX_SERVER_ADDR_LEN); + + gossip_debug(GOSSIP_SUPER_DEBUG, + "Attempting PVFS2 Mount via host %s\n", + new_op->upcall.req.fs_mount.pvfs2_config_server); + + ret = service_operation(new_op, "pvfs2_mount", 0); + gossip_debug(GOSSIP_SUPER_DEBUG, + "pvfs2_mount: mount got return value of %d\n", ret); + if (ret) + goto free_op; + + if (new_op->downcall.resp.fs_mount.fs_id == PVFS_FS_ID_NULL) { + gossip_err("ERROR: Retrieved null fs_id\n"); + ret = -EINVAL; + goto free_op; + } + + /* fill in temporary structure passed to fill_sb method */ + mount_sb_info.data = data; + mount_sb_info.root_khandle = + new_op->downcall.resp.fs_mount.root_khandle; + mount_sb_info.fs_id = new_op->downcall.resp.fs_mount.fs_id; + mount_sb_info.id = new_op->downcall.resp.fs_mount.id; + + /* + * the mount_sb_info structure looks odd, but it's used because + * the private sb info isn't allocated until we call + * pvfs2_fill_sb, yet we have the info we need to fill it with + * here. so we store it temporarily and pass all of the info + * to fill_sb where it's properly copied out + */ + mnt_sb_d = mount_nodev(fst, + flags, + (void *)&mount_sb_info, + pvfs2_fill_sb); + if (IS_ERR(mnt_sb_d)) { + sb = ERR_CAST(mnt_sb_d); + goto free_op; + } + + sb = mnt_sb_d->d_sb; + + /* + * on successful mount, store the devname and data + * used + */ + strncpy(PVFS2_SB(sb)->devname, + devname, + PVFS_MAX_SERVER_ADDR_LEN); + + /* mount_pending must be cleared */ + PVFS2_SB(sb)->mount_pending = 0; + + /* + * finally, add this sb to our list of known pvfs2 + * sb's + */ + add_pvfs2_sb(sb); + op_release(new_op); + return mnt_sb_d; + +free_op: + gossip_err("pvfs2_mount: mount request failed with %d\n", ret); + if (ret == -EINVAL) { + gossip_err("Ensure that all pvfs2-servers have the same FS configuration files\n"); + gossip_err("Look at pvfs2-client-core log file (typically /tmp/pvfs2-client.log) for more details\n"); + } + + op_release(new_op); + + gossip_debug(GOSSIP_SUPER_DEBUG, + "pvfs2_mount: returning dentry %p\n", + mnt_sb_d); + return mnt_sb_d; +} + +void pvfs2_kill_sb(struct super_block *sb) +{ + gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_kill_sb: called\n"); + + /* + * issue the unmount to userspace to tell it to remove the + * dynamic mount info it has for this superblock + */ + pvfs2_unmount_sb(sb); + + /* remove the sb from our list of pvfs2 specific sb's */ + remove_pvfs2_sb(sb); + + /* provided sb cleanup */ + kill_anon_super(sb); + + /* free the pvfs2 superblock private data */ + kfree(PVFS2_SB(sb)); +} + +int pvfs2_inode_cache_initialize(void) +{ + pvfs2_inode_cache = kmem_cache_create("pvfs2_inode_cache", + sizeof(struct pvfs2_inode_s), + 0, + PVFS2_CACHE_CREATE_FLAGS, + pvfs2_inode_cache_ctor); + + if (!pvfs2_inode_cache) { + gossip_err("Cannot create pvfs2_inode_cache\n"); + return -ENOMEM; + } + return 0; +} + +int pvfs2_inode_cache_finalize(void) +{ + kmem_cache_destroy(pvfs2_inode_cache); + return 0; +} diff --git a/fs/orangefs/symlink.c b/fs/orangefs/symlink.c new file mode 100644 index 000000000000..2adfceff7730 --- /dev/null +++ b/fs/orangefs/symlink.c @@ -0,0 +1,31 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#include "protocol.h" +#include "pvfs2-kernel.h" +#include "pvfs2-bufmap.h" + +static const char *pvfs2_follow_link(struct dentry *dentry, void **cookie) +{ + char *target = PVFS2_I(dentry->d_inode)->link_target; + + gossip_debug(GOSSIP_INODE_DEBUG, + "%s: called on %s (target is %p)\n", + __func__, (char *)dentry->d_name.name, target); + + *cookie = target; + + return target; +} + +struct inode_operations pvfs2_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = pvfs2_follow_link, + .setattr = pvfs2_setattr, + .getattr = pvfs2_getattr, + .listxattr = pvfs2_listxattr, + .setxattr = generic_setxattr, +}; diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c new file mode 100644 index 000000000000..9b32286a7dc4 --- /dev/null +++ b/fs/orangefs/waitqueue.c @@ -0,0 +1,522 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * (C) 2011 Omnibond Systems + * + * Changes by Acxiom Corporation to implement generic service_operation() + * function, Copyright Acxiom Corporation, 2005. + * + * See COPYING in top-level directory. + */ + +/* + * In-kernel waitqueue operations. + */ + +#include "protocol.h" +#include "pvfs2-kernel.h" +#include "pvfs2-bufmap.h" + +/* + * What we do in this function is to walk the list of operations that are + * present in the request queue and mark them as purged. + * NOTE: This is called from the device close after client-core has + * guaranteed that no new operations could appear on the list since the + * client-core is anyway going to exit. + */ +void purge_waiting_ops(void) +{ + struct pvfs2_kernel_op_s *op; + + spin_lock(&pvfs2_request_list_lock); + list_for_each_entry(op, &pvfs2_request_list, list) { + gossip_debug(GOSSIP_WAIT_DEBUG, + "pvfs2-client-core: purging op tag %llu %s\n", + llu(op->tag), + get_opname_string(op)); + spin_lock(&op->lock); + set_op_state_purged(op); + spin_unlock(&op->lock); + wake_up_interruptible(&op->waitq); + } + spin_unlock(&pvfs2_request_list_lock); +} + +/* + * submits a PVFS2 operation and waits for it to complete + * + * Note op->downcall.status will contain the status of the operation (in + * errno format), whether provided by pvfs2-client or a result of failure to + * service the operation. If the caller wishes to distinguish, then + * op->state can be checked to see if it was serviced or not. + * + * Returns contents of op->downcall.status for convenience + */ +int service_operation(struct pvfs2_kernel_op_s *op, + const char *op_name, + int flags) +{ + /* flags to modify behavior */ + sigset_t orig_sigset; + int ret = 0; + + /* irqflags and wait_entry are only used IF the client-core aborts */ + unsigned long irqflags; + + DECLARE_WAITQUEUE(wait_entry, current); + + op->upcall.tgid = current->tgid; + op->upcall.pid = current->pid; + +retry_servicing: + op->downcall.status = 0; + gossip_debug(GOSSIP_WAIT_DEBUG, + "pvfs2: service_operation: %s %p\n", + op_name, + op); + gossip_debug(GOSSIP_WAIT_DEBUG, + "pvfs2: operation posted by process: %s, pid: %i\n", + current->comm, + current->pid); + + /* mask out signals if this operation is not to be interrupted */ + if (!(flags & PVFS2_OP_INTERRUPTIBLE)) + mask_blocked_signals(&orig_sigset); + + if (!(flags & PVFS2_OP_NO_SEMAPHORE)) { + ret = mutex_lock_interruptible(&request_mutex); + /* + * check to see if we were interrupted while waiting for + * semaphore + */ + if (ret < 0) { + if (!(flags & PVFS2_OP_INTERRUPTIBLE)) + unmask_blocked_signals(&orig_sigset); + op->downcall.status = ret; + gossip_debug(GOSSIP_WAIT_DEBUG, + "pvfs2: service_operation interrupted.\n"); + return ret; + } + } + + gossip_debug(GOSSIP_WAIT_DEBUG, + "%s:About to call is_daemon_in_service().\n", + __func__); + + if (is_daemon_in_service() < 0) { + /* + * By incrementing the per-operation attempt counter, we + * directly go into the timeout logic while waiting for + * the matching downcall to be read + */ + gossip_debug(GOSSIP_WAIT_DEBUG, + "%s:client core is NOT in service(%d).\n", + __func__, + is_daemon_in_service()); + op->attempts++; + } + + /* queue up the operation */ + if (flags & PVFS2_OP_PRIORITY) { + add_priority_op_to_request_list(op); + } else { + gossip_debug(GOSSIP_WAIT_DEBUG, + "%s:About to call add_op_to_request_list().\n", + __func__); + add_op_to_request_list(op); + } + + if (!(flags & PVFS2_OP_NO_SEMAPHORE)) + mutex_unlock(&request_mutex); + + /* + * If we are asked to service an asynchronous operation from + * VFS perspective, we are done. + */ + if (flags & PVFS2_OP_ASYNC) + return 0; + + if (flags & PVFS2_OP_CANCELLATION) { + gossip_debug(GOSSIP_WAIT_DEBUG, + "%s:" + "About to call wait_for_cancellation_downcall.\n", + __func__); + ret = wait_for_cancellation_downcall(op); + } else { + ret = wait_for_matching_downcall(op); + } + + if (ret < 0) { + /* failed to get matching downcall */ + if (ret == -ETIMEDOUT) { + gossip_err("pvfs2: %s -- wait timed out; aborting attempt.\n", + op_name); + } + op->downcall.status = ret; + } else { + /* got matching downcall; make sure status is in errno format */ + op->downcall.status = + pvfs2_normalize_to_errno(op->downcall.status); + ret = op->downcall.status; + } + + if (!(flags & PVFS2_OP_INTERRUPTIBLE)) + unmask_blocked_signals(&orig_sigset); + + BUG_ON(ret != op->downcall.status); + /* retry if operation has not been serviced and if requested */ + if (!op_state_serviced(op) && op->downcall.status == -EAGAIN) { + gossip_debug(GOSSIP_WAIT_DEBUG, + "pvfs2: tag %llu (%s)" + " -- operation to be retried (%d attempt)\n", + llu(op->tag), + op_name, + op->attempts + 1); + + if (!op->uses_shared_memory) + /* + * this operation doesn't use the shared memory + * system + */ + goto retry_servicing; + + /* op uses shared memory */ + if (get_bufmap_init() == 0) { + /* + * This operation uses the shared memory system AND + * the system is not yet ready. This situation occurs + * when the client-core is restarted AND there were + * operations waiting to be processed or were already + * in process. + */ + gossip_debug(GOSSIP_WAIT_DEBUG, + "uses_shared_memory is true.\n"); + gossip_debug(GOSSIP_WAIT_DEBUG, + "Client core in-service status(%d).\n", + is_daemon_in_service()); + gossip_debug(GOSSIP_WAIT_DEBUG, "bufmap_init:%d.\n", + get_bufmap_init()); + gossip_debug(GOSSIP_WAIT_DEBUG, + "operation's status is 0x%0x.\n", + op->op_state); + + /* + * let process sleep for a few seconds so shared + * memory system can be initialized. + */ + spin_lock_irqsave(&op->lock, irqflags); + add_wait_queue(&pvfs2_bufmap_init_waitq, &wait_entry); + spin_unlock_irqrestore(&op->lock, irqflags); + + set_current_state(TASK_INTERRUPTIBLE); + + /* + * Wait for pvfs_bufmap_initialize() to wake me up + * within the allotted time. + */ + ret = schedule_timeout(MSECS_TO_JIFFIES + (1000 * PVFS2_BUFMAP_WAIT_TIMEOUT_SECS)); + + gossip_debug(GOSSIP_WAIT_DEBUG, + "Value returned from schedule_timeout:" + "%d.\n", + ret); + gossip_debug(GOSSIP_WAIT_DEBUG, + "Is shared memory available? (%d).\n", + get_bufmap_init()); + + spin_lock_irqsave(&op->lock, irqflags); + remove_wait_queue(&pvfs2_bufmap_init_waitq, + &wait_entry); + spin_unlock_irqrestore(&op->lock, irqflags); + + if (get_bufmap_init() == 0) { + gossip_err("%s:The shared memory system has not started in %d seconds after the client core restarted. Aborting user's request(%s).\n", + __func__, + PVFS2_BUFMAP_WAIT_TIMEOUT_SECS, + get_opname_string(op)); + return -EIO; + } + + /* + * Return to the calling function and re-populate a + * shared memory buffer. + */ + return -EAGAIN; + } + } + + gossip_debug(GOSSIP_WAIT_DEBUG, + "pvfs2: service_operation %s returning: %d for %p.\n", + op_name, + ret, + op); + return ret; +} + +void pvfs2_clean_up_interrupted_operation(struct pvfs2_kernel_op_s *op) +{ + /* + * handle interrupted cases depending on what state we were in when + * the interruption is detected. there is a coarse grained lock + * across the operation. + * + * NOTE: be sure not to reverse lock ordering by locking an op lock + * while holding the request_list lock. Here, we first lock the op + * and then lock the appropriate list. + */ + if (!op) { + gossip_debug(GOSSIP_WAIT_DEBUG, + "%s: op is null, ignoring\n", + __func__); + return; + } + + /* + * one more sanity check, make sure it's in one of the possible states + * or don't try to cancel it + */ + if (!(op_state_waiting(op) || + op_state_in_progress(op) || + op_state_serviced(op) || + op_state_purged(op))) { + gossip_debug(GOSSIP_WAIT_DEBUG, + "%s: op %p not in a valid state (%0x), " + "ignoring\n", + __func__, + op, + op->op_state); + return; + } + + spin_lock(&op->lock); + + if (op_state_waiting(op)) { + /* + * upcall hasn't been read; remove op from upcall request + * list. + */ + spin_unlock(&op->lock); + remove_op_from_request_list(op); + gossip_debug(GOSSIP_WAIT_DEBUG, + "Interrupted: Removed op %p from request_list\n", + op); + } else if (op_state_in_progress(op)) { + /* op must be removed from the in progress htable */ + spin_unlock(&op->lock); + spin_lock(&htable_ops_in_progress_lock); + list_del(&op->list); + spin_unlock(&htable_ops_in_progress_lock); + gossip_debug(GOSSIP_WAIT_DEBUG, + "Interrupted: Removed op %p" + " from htable_ops_in_progress\n", + op); + } else if (!op_state_serviced(op)) { + spin_unlock(&op->lock); + gossip_err("interrupted operation is in a weird state 0x%x\n", + op->op_state); + } +} + +/* + * sleeps on waitqueue waiting for matching downcall. + * if client-core finishes servicing, then we are good to go. + * else if client-core exits, we get woken up here, and retry with a timeout + * + * Post when this call returns to the caller, the specified op will no + * longer be on any list or htable. + * + * Returns 0 on success and -errno on failure + * Errors are: + * EAGAIN in case we want the caller to requeue and try again.. + * EINTR/EIO/ETIMEDOUT indicating we are done trying to service this + * operation since client-core seems to be exiting too often + * or if we were interrupted. + */ +int wait_for_matching_downcall(struct pvfs2_kernel_op_s *op) +{ + int ret = -EINVAL; + DECLARE_WAITQUEUE(wait_entry, current); + + spin_lock(&op->lock); + add_wait_queue(&op->waitq, &wait_entry); + spin_unlock(&op->lock); + + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + + spin_lock(&op->lock); + if (op_state_serviced(op)) { + spin_unlock(&op->lock); + ret = 0; + break; + } + spin_unlock(&op->lock); + + if (!signal_pending(current)) { + /* + * if this was our first attempt and client-core + * has not purged our operation, we are happy to + * simply wait + */ + spin_lock(&op->lock); + if (op->attempts == 0 && !op_state_purged(op)) { + spin_unlock(&op->lock); + schedule(); + } else { + spin_unlock(&op->lock); + /* + * subsequent attempts, we retry exactly once + * with timeouts + */ + if (!schedule_timeout(MSECS_TO_JIFFIES + (1000 * op_timeout_secs))) { + gossip_debug(GOSSIP_WAIT_DEBUG, + "*** %s:" + " operation timed out (tag" + " %llu, %p, att %d)\n", + __func__, + llu(op->tag), + op, + op->attempts); + ret = -ETIMEDOUT; + pvfs2_clean_up_interrupted_operation + (op); + break; + } + } + spin_lock(&op->lock); + op->attempts++; + /* + * if the operation was purged in the meantime, it + * is better to requeue it afresh but ensure that + * we have not been purged repeatedly. This could + * happen if client-core crashes when an op + * is being serviced, so we requeue the op, client + * core crashes again so we requeue the op, client + * core starts, and so on... + */ + if (op_state_purged(op)) { + ret = (op->attempts < PVFS2_PURGE_RETRY_COUNT) ? + -EAGAIN : + -EIO; + spin_unlock(&op->lock); + gossip_debug(GOSSIP_WAIT_DEBUG, + "*** %s:" + " operation purged (tag " + "%llu, %p, att %d)\n", + __func__, + llu(op->tag), + op, + op->attempts); + pvfs2_clean_up_interrupted_operation(op); + break; + } + spin_unlock(&op->lock); + continue; + } + + gossip_debug(GOSSIP_WAIT_DEBUG, + "*** %s:" + " operation interrupted by a signal (tag " + "%llu, op %p)\n", + __func__, + llu(op->tag), + op); + pvfs2_clean_up_interrupted_operation(op); + ret = -EINTR; + break; + } + + set_current_state(TASK_RUNNING); + + spin_lock(&op->lock); + remove_wait_queue(&op->waitq, &wait_entry); + spin_unlock(&op->lock); + + return ret; +} + +/* + * similar to wait_for_matching_downcall(), but used in the special case + * of I/O cancellations. + * + * Note we need a special wait function because if this is called we already + * know that a signal is pending in current and need to service the + * cancellation upcall anyway. the only way to exit this is to either + * timeout or have the cancellation be serviced properly. + */ +int wait_for_cancellation_downcall(struct pvfs2_kernel_op_s *op) +{ + int ret = -EINVAL; + DECLARE_WAITQUEUE(wait_entry, current); + + spin_lock(&op->lock); + add_wait_queue(&op->waitq, &wait_entry); + spin_unlock(&op->lock); + + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + + spin_lock(&op->lock); + if (op_state_serviced(op)) { + gossip_debug(GOSSIP_WAIT_DEBUG, + "%s:op-state is SERVICED.\n", + __func__); + spin_unlock(&op->lock); + ret = 0; + break; + } + spin_unlock(&op->lock); + + if (signal_pending(current)) { + gossip_debug(GOSSIP_WAIT_DEBUG, + "%s:operation interrupted by a signal (tag" + " %llu, op %p)\n", + __func__, + llu(op->tag), + op); + pvfs2_clean_up_interrupted_operation(op); + ret = -EINTR; + break; + } + + gossip_debug(GOSSIP_WAIT_DEBUG, + "%s:About to call schedule_timeout.\n", + __func__); + ret = + schedule_timeout(MSECS_TO_JIFFIES(1000 * op_timeout_secs)); + + gossip_debug(GOSSIP_WAIT_DEBUG, + "%s:Value returned from schedule_timeout(%d).\n", + __func__, + ret); + if (!ret) { + gossip_debug(GOSSIP_WAIT_DEBUG, + "%s:*** operation timed out: %p\n", + __func__, + op); + pvfs2_clean_up_interrupted_operation(op); + ret = -ETIMEDOUT; + break; + } + + gossip_debug(GOSSIP_WAIT_DEBUG, + "%s:Breaking out of loop, regardless of value returned by schedule_timeout.\n", + __func__); + ret = -ETIMEDOUT; + break; + } + + set_current_state(TASK_RUNNING); + + spin_lock(&op->lock); + remove_wait_queue(&op->waitq, &wait_entry); + spin_unlock(&op->lock); + + gossip_debug(GOSSIP_WAIT_DEBUG, + "%s:returning ret(%d)\n", + __func__, + ret); + + return ret; +} diff --git a/fs/orangefs/xattr.c b/fs/orangefs/xattr.c new file mode 100644 index 000000000000..2766090f5ca4 --- /dev/null +++ b/fs/orangefs/xattr.c @@ -0,0 +1,532 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/* + * Linux VFS extended attribute operations. + */ + +#include "protocol.h" +#include "pvfs2-kernel.h" +#include "pvfs2-bufmap.h" +#include +#include + + +#define SYSTEM_PVFS2_KEY "system.pvfs2." +#define SYSTEM_PVFS2_KEY_LEN 13 + +/* + * this function returns + * 0 if the key corresponding to name is not meant to be printed as part + * of a listxattr. + * 1 if the key corresponding to name is meant to be returned as part of + * a listxattr. + * The ones that start SYSTEM_PVFS2_KEY are the ones to avoid printing. + */ +static int is_reserved_key(const char *key, size_t size) +{ + + if (size < SYSTEM_PVFS2_KEY_LEN) + return 1; + + return strncmp(key, SYSTEM_PVFS2_KEY, SYSTEM_PVFS2_KEY_LEN) ? 1 : 0; +} + +static inline int convert_to_internal_xattr_flags(int setxattr_flags) +{ + int internal_flag = 0; + + if (setxattr_flags & XATTR_REPLACE) { + /* Attribute must exist! */ + internal_flag = PVFS_XATTR_REPLACE; + } else if (setxattr_flags & XATTR_CREATE) { + /* Attribute must not exist */ + internal_flag = PVFS_XATTR_CREATE; + } + return internal_flag; +} + + +/* + * Tries to get a specified key's attributes of a given + * file into a user-specified buffer. Note that the getxattr + * interface allows for the users to probe the size of an + * extended attribute by passing in a value of 0 to size. + * Thus our return value is always the size of the attribute + * unless the key does not exist for the file and/or if + * there were errors in fetching the attribute value. + */ +ssize_t pvfs2_inode_getxattr(struct inode *inode, const char *prefix, + const char *name, void *buffer, size_t size) +{ + struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); + struct pvfs2_kernel_op_s *new_op = NULL; + ssize_t ret = -ENOMEM; + ssize_t length = 0; + int fsuid; + int fsgid; + + gossip_debug(GOSSIP_XATTR_DEBUG, + "%s: prefix %s name %s, buffer_size %zd\n", + __func__, prefix, name, size); + + if (name == NULL || (size > 0 && buffer == NULL)) { + gossip_err("pvfs2_inode_getxattr: bogus NULL pointers\n"); + return -EINVAL; + } + if (size < 0 || + (strlen(name) + strlen(prefix)) >= PVFS_MAX_XATTR_NAMELEN) { + gossip_err("Invalid size (%d) or key length (%d)\n", + (int)size, + (int)(strlen(name) + strlen(prefix))); + return -EINVAL; + } + + fsuid = from_kuid(current_user_ns(), current_fsuid()); + fsgid = from_kgid(current_user_ns(), current_fsgid()); + + gossip_debug(GOSSIP_XATTR_DEBUG, + "getxattr on inode %pU, name %s " + "(uid %o, gid %o)\n", + get_khandle_from_ino(inode), + name, + fsuid, + fsgid); + + down_read(&pvfs2_inode->xattr_sem); + + new_op = op_alloc(PVFS2_VFS_OP_GETXATTR); + if (!new_op) + goto out_unlock; + + new_op->upcall.req.getxattr.refn = pvfs2_inode->refn; + ret = snprintf((char *)new_op->upcall.req.getxattr.key, + PVFS_MAX_XATTR_NAMELEN, "%s%s", prefix, name); + + /* + * NOTE: Although keys are meant to be NULL terminated textual + * strings, I am going to explicitly pass the length just in case + * we change this later on... + */ + new_op->upcall.req.getxattr.key_sz = ret + 1; + + ret = service_operation(new_op, "pvfs2_inode_getxattr", + get_interruptible_flag(inode)); + if (ret != 0) { + if (ret == -ENOENT) { + ret = -ENODATA; + gossip_debug(GOSSIP_XATTR_DEBUG, + "pvfs2_inode_getxattr: inode %pU key %s" + " does not exist!\n", + get_khandle_from_ino(inode), + (char *)new_op->upcall.req.getxattr.key); + } + goto out_release_op; + } + + /* + * Length returned includes null terminator. + */ + length = new_op->downcall.resp.getxattr.val_sz; + + /* + * Just return the length of the queried attribute. + */ + if (size == 0) { + ret = length; + goto out_release_op; + } + + /* + * Check to see if key length is > provided buffer size. + */ + if (length > size) { + ret = -ERANGE; + goto out_release_op; + } + + memset(buffer, 0, size); + memcpy(buffer, new_op->downcall.resp.getxattr.val, length); + gossip_debug(GOSSIP_XATTR_DEBUG, + "pvfs2_inode_getxattr: inode %pU " + "key %s key_sz %d, val_len %d\n", + get_khandle_from_ino(inode), + (char *)new_op-> + upcall.req.getxattr.key, + (int)new_op-> + upcall.req.getxattr.key_sz, + (int)ret); + + ret = length; + +out_release_op: + op_release(new_op); +out_unlock: + up_read(&pvfs2_inode->xattr_sem); + return ret; +} + +static int pvfs2_inode_removexattr(struct inode *inode, + const char *prefix, + const char *name, + int flags) +{ + struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); + struct pvfs2_kernel_op_s *new_op = NULL; + int ret = -ENOMEM; + + down_write(&pvfs2_inode->xattr_sem); + new_op = op_alloc(PVFS2_VFS_OP_REMOVEXATTR); + if (!new_op) + goto out_unlock; + + new_op->upcall.req.removexattr.refn = pvfs2_inode->refn; + /* + * NOTE: Although keys are meant to be NULL terminated + * textual strings, I am going to explicitly pass the + * length just in case we change this later on... + */ + ret = snprintf((char *)new_op->upcall.req.removexattr.key, + PVFS_MAX_XATTR_NAMELEN, + "%s%s", + (prefix ? prefix : ""), + name); + new_op->upcall.req.removexattr.key_sz = ret + 1; + + gossip_debug(GOSSIP_XATTR_DEBUG, + "pvfs2_inode_removexattr: key %s, key_sz %d\n", + (char *)new_op->upcall.req.removexattr.key, + (int)new_op->upcall.req.removexattr.key_sz); + + ret = service_operation(new_op, + "pvfs2_inode_removexattr", + get_interruptible_flag(inode)); + if (ret == -ENOENT) { + /* + * Request to replace a non-existent attribute is an error. + */ + if (flags & XATTR_REPLACE) + ret = -ENODATA; + else + ret = 0; + } + + gossip_debug(GOSSIP_XATTR_DEBUG, + "pvfs2_inode_removexattr: returning %d\n", ret); + + op_release(new_op); +out_unlock: + up_write(&pvfs2_inode->xattr_sem); + return ret; +} + +/* + * Tries to set an attribute for a given key on a file. + * + * Returns a -ve number on error and 0 on success. Key is text, but value + * can be binary! + */ +int pvfs2_inode_setxattr(struct inode *inode, const char *prefix, + const char *name, const void *value, size_t size, int flags) +{ + struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); + struct pvfs2_kernel_op_s *new_op; + int internal_flag = 0; + int ret = -ENOMEM; + + gossip_debug(GOSSIP_XATTR_DEBUG, + "%s: prefix %s, name %s, buffer_size %zd\n", + __func__, prefix, name, size); + + if (size < 0 || + size >= PVFS_MAX_XATTR_VALUELEN || + flags < 0) { + gossip_err("pvfs2_inode_setxattr: bogus values of size(%d), flags(%d)\n", + (int)size, + flags); + return -EINVAL; + } + + if (name == NULL || + (size > 0 && value == NULL)) { + gossip_err("pvfs2_inode_setxattr: bogus NULL pointers!\n"); + return -EINVAL; + } + + internal_flag = convert_to_internal_xattr_flags(flags); + + if (prefix) { + if (strlen(name) + strlen(prefix) >= PVFS_MAX_XATTR_NAMELEN) { + gossip_err + ("pvfs2_inode_setxattr: bogus key size (%d)\n", + (int)(strlen(name) + strlen(prefix))); + return -EINVAL; + } + } else { + if (strlen(name) >= PVFS_MAX_XATTR_NAMELEN) { + gossip_err + ("pvfs2_inode_setxattr: bogus key size (%d)\n", + (int)(strlen(name))); + return -EINVAL; + } + } + + /* This is equivalent to a removexattr */ + if (size == 0 && value == NULL) { + gossip_debug(GOSSIP_XATTR_DEBUG, + "removing xattr (%s%s)\n", + prefix, + name); + return pvfs2_inode_removexattr(inode, prefix, name, flags); + } + + gossip_debug(GOSSIP_XATTR_DEBUG, + "setxattr on inode %pU, name %s\n", + get_khandle_from_ino(inode), + name); + + down_write(&pvfs2_inode->xattr_sem); + new_op = op_alloc(PVFS2_VFS_OP_SETXATTR); + if (!new_op) + goto out_unlock; + + + new_op->upcall.req.setxattr.refn = pvfs2_inode->refn; + new_op->upcall.req.setxattr.flags = internal_flag; + /* + * NOTE: Although keys are meant to be NULL terminated textual + * strings, I am going to explicitly pass the length just in + * case we change this later on... + */ + ret = snprintf((char *)new_op->upcall.req.setxattr.keyval.key, + PVFS_MAX_XATTR_NAMELEN, + "%s%s", + prefix, name); + new_op->upcall.req.setxattr.keyval.key_sz = ret + 1; + memcpy(new_op->upcall.req.setxattr.keyval.val, value, size); + new_op->upcall.req.setxattr.keyval.val_sz = size; + + gossip_debug(GOSSIP_XATTR_DEBUG, + "pvfs2_inode_setxattr: key %s, key_sz %d " + " value size %zd\n", + (char *)new_op->upcall.req.setxattr.keyval.key, + (int)new_op->upcall.req.setxattr.keyval.key_sz, + size); + + ret = service_operation(new_op, + "pvfs2_inode_setxattr", + get_interruptible_flag(inode)); + + gossip_debug(GOSSIP_XATTR_DEBUG, + "pvfs2_inode_setxattr: returning %d\n", + ret); + + /* when request is serviced properly, free req op struct */ + op_release(new_op); +out_unlock: + up_write(&pvfs2_inode->xattr_sem); + return ret; +} + +/* + * Tries to get a specified object's keys into a user-specified buffer of a + * given size. Note that like the previous instances of xattr routines, this + * also allows you to pass in a NULL pointer and 0 size to probe the size for + * subsequent memory allocations. Thus our return value is always the size of + * all the keys unless there were errors in fetching the keys! + */ +ssize_t pvfs2_listxattr(struct dentry *dentry, char *buffer, size_t size) +{ + struct inode *inode = dentry->d_inode; + struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); + struct pvfs2_kernel_op_s *new_op; + __u64 token = PVFS_ITERATE_START; + ssize_t ret = -ENOMEM; + ssize_t total = 0; + ssize_t length = 0; + int count_keys = 0; + int key_size; + int i = 0; + + if (size > 0 && buffer == NULL) { + gossip_err("%s: bogus NULL pointers\n", __func__); + return -EINVAL; + } + if (size < 0) { + gossip_err("Invalid size (%d)\n", (int)size); + return -EINVAL; + } + + down_read(&pvfs2_inode->xattr_sem); + new_op = op_alloc(PVFS2_VFS_OP_LISTXATTR); + if (!new_op) + goto out_unlock; + + if (buffer && size > 0) + memset(buffer, 0, size); + +try_again: + key_size = 0; + new_op->upcall.req.listxattr.refn = pvfs2_inode->refn; + new_op->upcall.req.listxattr.token = token; + new_op->upcall.req.listxattr.requested_count = + (size == 0) ? 0 : PVFS_MAX_XATTR_LISTLEN; + ret = service_operation(new_op, __func__, + get_interruptible_flag(inode)); + if (ret != 0) + goto done; + + if (size == 0) { + /* + * This is a bit of a big upper limit, but I did not want to + * spend too much time getting this correct, since users end + * up allocating memory rather than us... + */ + total = new_op->downcall.resp.listxattr.returned_count * + PVFS_MAX_XATTR_NAMELEN; + goto done; + } + + length = new_op->downcall.resp.listxattr.keylen; + if (length == 0) + goto done; + + /* + * Check to see how much can be fit in the buffer. Fit only whole keys. + */ + for (i = 0; i < new_op->downcall.resp.listxattr.returned_count; i++) { + if (total + new_op->downcall.resp.listxattr.lengths[i] > size) + goto done; + + /* + * Since many dumb programs try to setxattr() on our reserved + * xattrs this is a feeble attempt at defeating those by not + * listing them in the output of listxattr.. sigh + */ + if (is_reserved_key(new_op->downcall.resp.listxattr.key + + key_size, + new_op->downcall.resp. + listxattr.lengths[i])) { + gossip_debug(GOSSIP_XATTR_DEBUG, "Copying key %d -> %s\n", + i, new_op->downcall.resp.listxattr.key + + key_size); + memcpy(buffer + total, + new_op->downcall.resp.listxattr.key + key_size, + new_op->downcall.resp.listxattr.lengths[i]); + total += new_op->downcall.resp.listxattr.lengths[i]; + count_keys++; + } else { + gossip_debug(GOSSIP_XATTR_DEBUG, "[RESERVED] key %d -> %s\n", + i, new_op->downcall.resp.listxattr.key + + key_size); + } + key_size += new_op->downcall.resp.listxattr.lengths[i]; + } + + /* + * Since the buffer was large enough, we might have to continue + * fetching more keys! + */ + token = new_op->downcall.resp.listxattr.token; + if (token != PVFS_ITERATE_END) + goto try_again; + +done: + gossip_debug(GOSSIP_XATTR_DEBUG, "%s: returning %d" + " [size of buffer %ld] (filled in %d keys)\n", + __func__, + ret ? (int)ret : (int)total, + (long)size, + count_keys); + op_release(new_op); + if (ret == 0) + ret = total; +out_unlock: + up_read(&pvfs2_inode->xattr_sem); + return ret; +} + +int pvfs2_xattr_set_default(struct dentry *dentry, + const char *name, + const void *buffer, + size_t size, + int flags, + int handler_flags) +{ + return pvfs2_inode_setxattr(dentry->d_inode, + PVFS2_XATTR_NAME_DEFAULT_PREFIX, + name, + buffer, + size, + flags); +} + +int pvfs2_xattr_get_default(struct dentry *dentry, + const char *name, + void *buffer, + size_t size, + int handler_flags) +{ + return pvfs2_inode_getxattr(dentry->d_inode, + PVFS2_XATTR_NAME_DEFAULT_PREFIX, + name, + buffer, + size); + +} + +static int pvfs2_xattr_set_trusted(struct dentry *dentry, + const char *name, + const void *buffer, + size_t size, + int flags, + int handler_flags) +{ + return pvfs2_inode_setxattr(dentry->d_inode, + PVFS2_XATTR_NAME_TRUSTED_PREFIX, + name, + buffer, + size, + flags); +} + +static int pvfs2_xattr_get_trusted(struct dentry *dentry, + const char *name, + void *buffer, + size_t size, + int handler_flags) +{ + return pvfs2_inode_getxattr(dentry->d_inode, + PVFS2_XATTR_NAME_TRUSTED_PREFIX, + name, + buffer, + size); +} + +static struct xattr_handler pvfs2_xattr_trusted_handler = { + .prefix = PVFS2_XATTR_NAME_TRUSTED_PREFIX, + .get = pvfs2_xattr_get_trusted, + .set = pvfs2_xattr_set_trusted, +}; + +static struct xattr_handler pvfs2_xattr_default_handler = { + /* + * NOTE: this is set to be the empty string. + * so that all un-prefixed xattrs keys get caught + * here! + */ + .prefix = PVFS2_XATTR_NAME_DEFAULT_PREFIX, + .get = pvfs2_xattr_get_default, + .set = pvfs2_xattr_set_default, +}; + +const struct xattr_handler *pvfs2_xattr_handlers[] = { + &posix_acl_access_xattr_handler, + &posix_acl_default_xattr_handler, + &pvfs2_xattr_trusted_handler, + &pvfs2_xattr_default_handler, + NULL +}; From 74a552a133ab4c9673476d26edadae13f5d54408 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Fri, 17 Jul 2015 10:38:16 -0400 Subject: [PATCH 006/174] Orangefs: kernel client part 6 Signed-off-by: Mike Marshall --- Documentation/ABI/stable/sysfs-fs-orangefs | 87 +++++++++++++ Documentation/filesystems/orangefs.txt | 137 +++++++++++++++++++++ 2 files changed, 224 insertions(+) create mode 100644 Documentation/ABI/stable/sysfs-fs-orangefs create mode 100644 Documentation/filesystems/orangefs.txt diff --git a/Documentation/ABI/stable/sysfs-fs-orangefs b/Documentation/ABI/stable/sysfs-fs-orangefs new file mode 100644 index 000000000000..affdb114bd33 --- /dev/null +++ b/Documentation/ABI/stable/sysfs-fs-orangefs @@ -0,0 +1,87 @@ +What: /sys/fs/orangefs/perf_counters/* +Date: Jun 2015 +Contact: Mike Marshall +Description: + Counters and settings for various caches. + Read only. + + +What: /sys/fs/orangefs/perf_counter_reset +Date: June 2015 +Contact: Mike Marshall +Description: + echo a 0 or a 1 into perf_counter_reset to + reset all the counters in + /sys/fs/orangefs/perf_counters + except ones with PINT_PERF_PRESERVE set. + + +What: /sys/fs/orangefs/perf_time_interval_secs +Date: Jun 2015 +Contact: Mike Marshall +Description: + Length of perf counter intervals in + seconds. + + +What: /sys/fs/orangefs/perf_history_size +Date: Jun 2015 +Contact: Mike Marshall +Description: + The perf_counters cache statistics have N, or + perf_history_size, samples. The default is + one. + + Every perf_time_interval_secs the (first) + samples are reset. + + If N is greater than one, the "current" set + of samples is reset, and the samples from the + other N-1 intervals remain available. + + +What: /sys/fs/orangefs/op_timeout_secs +Date: Jun 2015 +Contact: Mike Marshall +Description: + Service operation timeout in seconds. + + +What: /sys/fs/orangefs/slot_timeout_secs +Date: Jun 2015 +Contact: Mike Marshall +Description: + "Slot" timeout in seconds. A "slot" + is an indexed buffer in the shared + memory segment used for communication + between the kernel module and userspace. + Slots are requested and waited for, + the wait times out after slot_timeout_secs. + + +What: /sys/fs/orangefs/acache/* +Date: Jun 2015 +Contact: Mike Marshall +Description: + Attribute cache configurable settings. + + +What: /sys/fs/orangefs/ncache/* +Date: Jun 2015 +Contact: Mike Marshall +Description: + Name cache configurable settings. + + +What: /sys/fs/orangefs/capcache/* +Date: Jun 2015 +Contact: Mike Marshall +Description: + Capability cache configurable settings. + + +What: /sys/fs/orangefs/ccache/* +Date: Jun 2015 +Contact: Mike Marshall +Description: + Credential cache configurable settings. diff --git a/Documentation/filesystems/orangefs.txt b/Documentation/filesystems/orangefs.txt new file mode 100644 index 000000000000..ec9c8416427e --- /dev/null +++ b/Documentation/filesystems/orangefs.txt @@ -0,0 +1,137 @@ +ORANGEFS +======== + +OrangeFS is an LGPL userspace scale-out parallel storage system. It is ideal +for large storage problems faced by HPC, BigData, Streaming Video, +Genomics, Bioinformatics. + +Orangefs, originally called PVFS, was first developed in 1993 by +Walt Ligon and Eric Blumer as a parallel file system for Parallel +Virtual Machine (PVM) as part of a NASA grant to study the I/O patterns +of parallel programs. + +Orangefs features include: + + * Distributes file data among multiple file servers + * Supports simultaneous access by multiple clients + * Stores file data and metadata on servers using local file system + and access methods + * Userspace implementation is easy to install and maintain + * Direct MPI support + * Stateless + + +MAILING LIST +============ + +http://beowulf-underground.org/mailman/listinfo/pvfs2-users + + +DOCUMENTATION +============= + +http://www.orangefs.org/documentation/ + + +USERSPACE FILESYSTEM SOURCE +=========================== + +http://www.orangefs.org/download + +Orangefs versions prior to 2.9.3 would not be compatible with the +upstream version of the kernel client. + + +BUILDING THE USERSPACE FILESYSTEM ON A SINGLE SERVER +==================================================== + +When Orangefs is upstream, "--with-kernel" shouldn't be needed, but +until then the path to where the kernel with the Orangefs kernel client +patch was built is needed to ensure that pvfs2-client-core (the bridge +between kernel space and user space) will build properly. You can omit +--prefix if you don't care that things are sprinkled around in +/usr/local. + +./configure --prefix=/opt/ofs --with-kernel=/path/to/orangefs/kernel + +make + +make install + +Create an orangefs config file: +/opt/ofs/bin/pvfs2-genconfig /etc/pvfs2.conf + + for "Enter hostnames", use the hostname, don't let it default to + localhost. + +create a pvfs2tab file in /etc: +cat /etc/pvfs2tab +tcp://myhostname:3334/orangefs /mymountpoint pvfs2 defaults,noauto 0 0 + +create the mount point you specified in the tab file if needed: +mkdir /mymountpoint + +bootstrap the server: +/opt/ofs/sbin/pvfs2-server /etc/pvfs2.conf -f + +start the server: +/opt/osf/sbin/pvfs2-server /etc/pvfs2.conf + +Now the server is running. At this point you might like to +prove things are working with: + +/opt/osf/bin/pvfs2-ls /mymountpoint + +You might not want to enforce selinux, it doesn't seem to matter by +linux 3.11... + +If stuff seems to be working, turn on the client core: +/opt/osf/sbin/pvfs2-client -p /opt/osf/sbin/pvfs2-client-core + +Mount your filesystem. +mount -t pvfs2 tcp://myhostname:3334/orangefs /mymountpoint + + +OPTIONS +======= + +The following mount options are accepted: + + acl + Allow the use of Access Control Lists on files and directories. + + intr + Some operations between the kernel client and the user space + filesystem can be interruptible, such as changes in debug levels + and the setting of tunable parameters. + + local_lock + Enable posix locking from the perspective of "this" kernel. The + default file_operations lock action is to return ENOSYS. Posix + locking kicks in if the filesystem is mounted with -o local_lock. + Distributed locking is being worked on for the future. + + +DEBUGGING +========= + +If you want the debug (GOSSIP) statments in a particular +source file (inode.c for example) go to syslog: + + echo inode > /sys/kernel/debug/orangefs/kernel-debug + +No debugging (the default): + + echo none > /sys/kernel/debug/orangefs/kernel-debug + +Debugging from several source files: + + echo inode,dir > /sys/kernel/debug/orangefs/kernel-debug + +All debugging: + + echo all > /sys/kernel/debug/orangefs/kernel-debug + +Get a list of all debugging keywords: + + cat /sys/kernel/debug/orangefs/debug-help From 07f01962e3d37bd2c5bbcdf87f29c9fe78feb6e0 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Fri, 17 Jul 2015 10:38:17 -0400 Subject: [PATCH 007/174] Orangefs: kernel client part 7 Signed-off-by: Mike Marshall --- fs/Kconfig | 1 + fs/Makefile | 1 + fs/orangefs/Kconfig | 6 ++++++ fs/orangefs/Makefile | 10 ++++++++++ 4 files changed, 18 insertions(+) create mode 100644 fs/orangefs/Kconfig create mode 100644 fs/orangefs/Makefile diff --git a/fs/Kconfig b/fs/Kconfig index da3f32f1a4e4..7d1bef1999af 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -190,6 +190,7 @@ menuconfig MISC_FILESYSTEMS if MISC_FILESYSTEMS +source "fs/orangefs/Kconfig" source "fs/adfs/Kconfig" source "fs/affs/Kconfig" source "fs/ecryptfs/Kconfig" diff --git a/fs/Makefile b/fs/Makefile index f79cf4043e60..b4406d6f7da1 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -104,6 +104,7 @@ obj-$(CONFIG_AUTOFS4_FS) += autofs4/ obj-$(CONFIG_ADFS_FS) += adfs/ obj-$(CONFIG_FUSE_FS) += fuse/ obj-$(CONFIG_OVERLAY_FS) += overlayfs/ +obj-$(CONFIG_ORANGEFS_FS) += orangefs/ obj-$(CONFIG_UDF_FS) += udf/ obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ obj-$(CONFIG_OMFS_FS) += omfs/ diff --git a/fs/orangefs/Kconfig b/fs/orangefs/Kconfig new file mode 100644 index 000000000000..1554c02489de --- /dev/null +++ b/fs/orangefs/Kconfig @@ -0,0 +1,6 @@ +config ORANGEFS_FS + tristate "ORANGEFS (Powered by PVFS) support" + select FS_POSIX_ACL + help + Orange is a parallel file system designed for use on high end + computing (HEC) systems. diff --git a/fs/orangefs/Makefile b/fs/orangefs/Makefile new file mode 100644 index 000000000000..828b36a6916d --- /dev/null +++ b/fs/orangefs/Makefile @@ -0,0 +1,10 @@ +# +# Makefile for the ORANGEFS filesystem. +# + +obj-$(CONFIG_ORANGEFS_FS) += orangefs.o + +orangefs-objs := acl.o file.o pvfs2-cache.o pvfs2-utils.o xattr.o dcache.o \ + inode.o pvfs2-sysfs.o pvfs2-mod.o super.o devpvfs2-req.o \ + namei.o symlink.o dir.o pvfs2-bufmap.o \ + pvfs2-debugfs.o waitqueue.o From 2c590d5fb6987e6579a82285b742a318cc1fdb50 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Fri, 24 Jul 2015 10:37:15 -0400 Subject: [PATCH 008/174] Orangefs: kernel client update 1. Stephen Rothwell noticed that orangefs would not compile on powerpc... Signed-off-by: Mike Marshall --- fs/orangefs/devpvfs2-req.c | 17 ++++++++++++----- fs/orangefs/protocol.h | 1 + 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/fs/orangefs/devpvfs2-req.c b/fs/orangefs/devpvfs2-req.c index 3e450228f3dc..7e60fd047f28 100644 --- a/fs/orangefs/devpvfs2-req.c +++ b/fs/orangefs/devpvfs2-req.c @@ -36,7 +36,7 @@ do { \ static int hash_func(__u64 tag, int table_size) { - return tag % ((unsigned int)table_size); + return do_div(tag, (unsigned int)table_size); } static void pvfs2_devreq_add_op(struct pvfs2_kernel_op_s *op) @@ -279,7 +279,7 @@ static ssize_t pvfs2_devreq_writev(struct file *file, /* Either there is a trailer or there isn't */ if (count != notrailer_count && count != (notrailer_count + 1)) { - gossip_err("Error: Number of iov vectors is (%ld) and notrailer count is %d\n", + gossip_err("Error: Number of iov vectors is (%zu) and notrailer count is %d\n", count, notrailer_count); return -EPROTO; @@ -356,7 +356,7 @@ static ssize_t pvfs2_devreq_writev(struct file *file, "writev: trailer size %ld\n", (unsigned long)op->downcall.trailer_size); if (count != (notrailer_count + 1)) { - gossip_err("Error: trailer size (%ld) is non-zero, no trailer elements though? (%ld)\n", (unsigned long)op->downcall.trailer_size, count); + gossip_err("Error: trailer size (%ld) is non-zero, no trailer elements though? (%zu)\n", (unsigned long)op->downcall.trailer_size, count); dev_req_release(buffer); put_op(op); return -EPROTO; @@ -908,6 +908,14 @@ static long pvfs2_devreq_compat_ioctl(struct file *filp, unsigned int cmd, return dispatch_ioctl_command(cmd, arg); } +#endif /* CONFIG_COMPAT is in .config */ + +/* + * The following two ioctl32 functions had been refactored into the above + * CONFIG_COMPAT ifdef, but that was an over simplification that was + * not noticed until we tried to compile on power pc... + */ +#if (defined(CONFIG_COMPAT) && !defined(HAVE_REGISTER_IOCTL32_CONVERSION)) || !defined(CONFIG_COMPAT) static int pvfs2_ioctl32_init(void) { return 0; @@ -917,8 +925,7 @@ static void pvfs2_ioctl32_cleanup(void) { return; } - -#endif /* CONFIG_COMPAT is in .config */ +#endif /* the assigned character device major number */ static int pvfs2_dev_major; diff --git a/fs/orangefs/protocol.h b/fs/orangefs/protocol.h index 2fb3a63ae9ab..8e0c8a6158f7 100644 --- a/fs/orangefs/protocol.h +++ b/fs/orangefs/protocol.h @@ -1,6 +1,7 @@ #include #include #include +#include extern struct client_debug_mask *cdm_array; extern char *debug_help_string; From 84d02150dea7571dc32176e35d65eecde82631a9 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Tue, 28 Jul 2015 13:27:51 -0400 Subject: [PATCH 009/174] Orangefs: sooth most sparse complaints Signed-off-by: Mike Marshall --- fs/orangefs/devpvfs2-req.c | 2 +- fs/orangefs/file.c | 13 ++++++++----- fs/orangefs/inode.c | 2 +- fs/orangefs/protocol.h | 12 +----------- fs/orangefs/pvfs2-bufmap.c | 10 +++++++--- fs/orangefs/pvfs2-debugfs.c | 2 +- fs/orangefs/pvfs2-kernel.h | 2 +- fs/orangefs/pvfs2-mod.c | 1 - fs/orangefs/pvfs2-sysfs.c | 4 ++-- fs/orangefs/super.c | 8 ++++---- fs/orangefs/waitqueue.c | 7 +++++++ 11 files changed, 33 insertions(+), 30 deletions(-) diff --git a/fs/orangefs/devpvfs2-req.c b/fs/orangefs/devpvfs2-req.c index 7e60fd047f28..13878cac49ed 100644 --- a/fs/orangefs/devpvfs2-req.c +++ b/fs/orangefs/devpvfs2-req.c @@ -857,7 +857,7 @@ static unsigned long translate_dev_map26(unsigned long args, long *error) */ struct PVFS_dev_map_desc __user *p = compat_alloc_user_space(sizeof(*p)); - u32 addr; + compat_uptr_t addr; *error = 0; /* get the ptr from the 32 bit user-space */ diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 8e26f9fac289..4ba1b6c48aa7 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -799,7 +799,7 @@ out: /* * Perform a miscellaneous operation on a file. */ -long pvfs2_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +static long pvfs2_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { int ret = -ENOTTY; __u64 val = 0; @@ -885,7 +885,7 @@ static int pvfs2_file_mmap(struct file *file, struct vm_area_struct *vma) * * \note Not called when each file is closed. */ -int pvfs2_file_release(struct inode *inode, struct file *file) +static int pvfs2_file_release(struct inode *inode, struct file *file) { gossip_debug(GOSSIP_FILE_DEBUG, "pvfs2_file_release: called on %s\n", @@ -909,7 +909,10 @@ int pvfs2_file_release(struct inode *inode, struct file *file) /* * Push all data for a specific file onto permanent storage. */ -int pvfs2_fsync(struct file *file, loff_t start, loff_t end, int datasync) +static int pvfs2_fsync(struct file *file, + loff_t start, + loff_t end, + int datasync) { int ret = -EINVAL; struct pvfs2_inode_s *pvfs2_inode = @@ -947,7 +950,7 @@ int pvfs2_fsync(struct file *file, loff_t start, loff_t end, int datasync) * Future upgrade could support SEEK_DATA and SEEK_HOLE but would * require much changes to the FS */ -loff_t pvfs2_file_llseek(struct file *file, loff_t offset, int origin) +static loff_t pvfs2_file_llseek(struct file *file, loff_t offset, int origin) { int ret = -EINVAL; struct inode *inode = file->f_path.dentry->d_inode; @@ -989,7 +992,7 @@ loff_t pvfs2_file_llseek(struct file *file, loff_t offset, int origin) * Support local locks (locks that only this kernel knows about) * if Orangefs was mounted -o local_lock. */ -int pvfs2_lock(struct file *filp, int cmd, struct file_lock *fl) +static int pvfs2_lock(struct file *filp, int cmd, struct file_lock *fl) { int rc = -ENOLCK; diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index feda00fcdd7d..9ff6b2985240 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -33,7 +33,7 @@ static int read_one_page(struct page *page) loff_t blockptr_offset = (((loff_t) page->index) << blockbits); bytes_read = pvfs2_inode_read(inode, - page_data, + (char __user *) page_data, blocksize, &blockptr_offset, inode->i_size); diff --git a/fs/orangefs/protocol.h b/fs/orangefs/protocol.h index 8e0c8a6158f7..ca7cef9590d3 100644 --- a/fs/orangefs/protocol.h +++ b/fs/orangefs/protocol.h @@ -341,7 +341,7 @@ __s32 PINT_non_errno_mapping[] = { \ extern __s32 PINT_errno_mapping[]; \ extern __s32 PINT_non_errno_mapping[]; \ extern const char *PINT_non_errno_strerror_mapping[]; \ -__s32 PVFS_get_errno_mapping(__s32 error) \ +static __s32 PVFS_get_errno_mapping(__s32 error) \ { \ __s32 ret = error, mask = 0; \ __s32 positive = ((error > -1) ? 1 : 0); \ @@ -364,16 +364,6 @@ __s32 PVFS_get_errno_mapping(__s32 error) \ } \ return ret; \ } \ -__s32 PVFS_errno_to_error(int err) \ -{ \ - __s32 e = 0; \ - \ - for (; e < PVFS_ERRNO_MAX; ++e) \ - if (PINT_errno_mapping[e] == err) \ - return e | PVFS_ERROR_BIT; \ - \ - return err; \ -} \ DECLARE_ERRNO_MAPPING() /* permission bits */ diff --git a/fs/orangefs/pvfs2-bufmap.c b/fs/orangefs/pvfs2-bufmap.c index aa14c37d0216..a439163f8d7c 100644 --- a/fs/orangefs/pvfs2-bufmap.c +++ b/fs/orangefs/pvfs2-bufmap.c @@ -9,7 +9,7 @@ DECLARE_WAIT_QUEUE_HEAD(pvfs2_bufmap_init_waitq); -struct pvfs2_bufmap { +static struct pvfs2_bufmap { atomic_t refcnt; int desc_size; @@ -663,6 +663,7 @@ int pvfs_bufmap_copy_iovec_from_kernel(struct pvfs2_bufmap *bufmap, int to_page_index = 0; void *to_kaddr = NULL; void *from_kaddr = NULL; + struct kvec *iv = NULL; struct iovec *copied_iovec = NULL; struct pvfs_bufmap_desc *to; unsigned int seg; @@ -708,9 +709,10 @@ int pvfs_bufmap_copy_iovec_from_kernel(struct pvfs2_bufmap *bufmap, * buffer into the mapped buffer one page at a time though */ while (amt_copied < size) { - struct iovec *iv = &copied_iovec[seg]; int inc_to_page_index; + iv = (struct kvec *) &copied_iovec[seg]; + if (iv->iov_len < (PAGE_SIZE - to_page_offset)) { cur_copy_size = PVFS_util_min(iv->iov_len, size - amt_copied); @@ -885,6 +887,7 @@ int pvfs_bufmap_copy_to_kernel_iovec(struct pvfs2_bufmap *bufmap, int from_page_index = 0; void *from_kaddr = NULL; void *to_kaddr = NULL; + struct kvec *iv; struct iovec *copied_iovec = NULL; struct pvfs_bufmap_desc *from; unsigned int seg; @@ -930,9 +933,10 @@ int pvfs_bufmap_copy_to_kernel_iovec(struct pvfs2_bufmap *bufmap, * but make sure that we do so one page at a time. */ while (amt_copied < size) { - struct iovec *iv = &copied_iovec[seg]; int inc_from_page_index; + iv = (struct kvec *) &copied_iovec[seg]; + if (iv->iov_len < (PAGE_SIZE - from_page_offset)) { cur_copy_size = PVFS_util_min(iv->iov_len, size - amt_copied); diff --git a/fs/orangefs/pvfs2-debugfs.c b/fs/orangefs/pvfs2-debugfs.c index 8d118da9b88f..ba5bfef7a3f3 100644 --- a/fs/orangefs/pvfs2-debugfs.c +++ b/fs/orangefs/pvfs2-debugfs.c @@ -70,7 +70,7 @@ static const struct seq_operations help_debug_ops = { * Used to protect data in ORANGEFS_KMOD_DEBUG_FILE and * ORANGEFS_KMOD_DEBUG_FILE. */ -DEFINE_MUTEX(orangefs_debug_lock); +static DEFINE_MUTEX(orangefs_debug_lock); int orangefs_debug_open(struct inode *, struct file *); diff --git a/fs/orangefs/pvfs2-kernel.h b/fs/orangefs/pvfs2-kernel.h index 6c787c4797d0..be30111b40d2 100644 --- a/fs/orangefs/pvfs2-kernel.h +++ b/fs/orangefs/pvfs2-kernel.h @@ -608,7 +608,7 @@ struct inode *pvfs2_iget(struct super_block *sb, struct pvfs2_object_kref *ref); ssize_t pvfs2_inode_read(struct inode *inode, - char *buf, + char __user *buf, size_t count, loff_t *offset, loff_t readahead_size); diff --git a/fs/orangefs/pvfs2-mod.c b/fs/orangefs/pvfs2-mod.c index 9cbc992731d6..69289c5d838c 100644 --- a/fs/orangefs/pvfs2-mod.c +++ b/fs/orangefs/pvfs2-mod.c @@ -47,7 +47,6 @@ struct client_debug_mask client_debug_mask = { NULL, 0, 0 }; unsigned int kernel_mask_set_mod_init; /* implicitly false */ int op_timeout_secs = PVFS2_DEFAULT_OP_TIMEOUT_SECS; int slot_timeout_secs = PVFS2_DEFAULT_SLOT_TIMEOUT_SECS; -__u32 DEBUG_LINE = 50; MODULE_LICENSE("GPL"); MODULE_AUTHOR("PVFS2 Development Team"); diff --git a/fs/orangefs/pvfs2-sysfs.c b/fs/orangefs/pvfs2-sysfs.c index 6d0e18b7239f..ea635b5e431b 100644 --- a/fs/orangefs/pvfs2-sysfs.c +++ b/fs/orangefs/pvfs2-sysfs.c @@ -750,7 +750,7 @@ out: /* * obtain attribute values from userspace with a service operation. */ -int sysfs_service_op_show(char *kobj_id, char *buf, void *attr) +static int sysfs_service_op_show(char *kobj_id, char *buf, void *attr) { struct pvfs2_kernel_op_s *new_op = NULL; int rc = 0; @@ -1023,7 +1023,7 @@ static ssize_t * We want to return 1 if we think everything went OK, and * EINVAL if not. */ -int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr) +static int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr) { struct pvfs2_kernel_op_s *new_op = NULL; int val = 0; diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index a854390fc0ea..90c7a1c9f201 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -269,7 +269,7 @@ static void pvfs2_dirty_inode(struct inode *inode, int flags) SetAtimeFlag(pvfs2_inode); } -struct super_operations pvfs2_s_ops = { +static const struct super_operations pvfs2_s_ops = { .alloc_inode = pvfs2_alloc_inode, .destroy_inode = pvfs2_destroy_inode, .dirty_inode = pvfs2_dirty_inode, @@ -279,7 +279,7 @@ struct super_operations pvfs2_s_ops = { .show_options = generic_show_options, }; -struct dentry *pvfs2_fh_to_dentry(struct super_block *sb, +static struct dentry *pvfs2_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) @@ -299,7 +299,7 @@ struct dentry *pvfs2_fh_to_dentry(struct super_block *sb, return d_obtain_alias(pvfs2_iget(sb, &refn)); } -int pvfs2_encode_fh(struct inode *inode, +static int pvfs2_encode_fh(struct inode *inode, __u32 *fh, int *max_len, struct inode *parent) @@ -347,7 +347,7 @@ static struct export_operations pvfs2_export_ops = { .fh_to_dentry = pvfs2_fh_to_dentry, }; -int pvfs2_fill_sb(struct super_block *sb, void *data, int silent) +static int pvfs2_fill_sb(struct super_block *sb, void *data, int silent) { int ret = -EINVAL; struct inode *root = NULL; diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index 9b32286a7dc4..ad79e534dc8e 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -314,6 +314,13 @@ void pvfs2_clean_up_interrupted_operation(struct pvfs2_kernel_op_s *op) spin_unlock(&op->lock); gossip_err("interrupted operation is in a weird state 0x%x\n", op->op_state); + } else { + /* + * It is not intended for execution to flow here, + * but having this unlock here makes sparse happy. + */ + gossip_err("%s: can't get here.\n", __func__); + spin_unlock(&op->lock); } } From eeaa3d448c5d35ad0dc16a981aacd64139c53eee Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Wed, 29 Jul 2015 13:36:37 -0400 Subject: [PATCH 010/174] Orangefs: address problems found by static checker Don't check for negative rc from boolean. Don't pointlessly initialize variables, it short-circuits gcc's uninitialized variable warnings. And max_new_nr_segs can never be zero, so don't check for it. Preserve original kstrdup pointer for freeing later. Don't check for negative value in unsigned variable. Signed-off-by: Mike Marshall --- fs/orangefs/dir.c | 16 ---------------- fs/orangefs/file.c | 18 ++++++------------ fs/orangefs/pvfs2-utils.c | 4 +++- fs/orangefs/xattr.c | 6 ++---- 4 files changed, 11 insertions(+), 33 deletions(-) diff --git a/fs/orangefs/dir.c b/fs/orangefs/dir.c index 9b5f4bb17874..c126c0fc6e0f 100644 --- a/fs/orangefs/dir.c +++ b/fs/orangefs/dir.c @@ -104,7 +104,6 @@ static void readdir_handle_dtor(struct pvfs2_bufmap *bufmap, * * \param dir_emit callback function called for each entry read. * - * \retval <0 on error * \retval 0 when directory has been completely traversed * \retval >0 if we don't call dir_emit for all entries * @@ -253,8 +252,6 @@ get_new_buffer_index: __func__, llu(pos)); ret = dir_emit(ctx, ".", 1, ino, DT_DIR); - if (ret < 0) - goto out_destroy_handle; ctx->pos++; gossip_ldebug(GOSSIP_DIR_DEBUG, "%s: ctx->pos:%lld\n", @@ -270,8 +267,6 @@ get_new_buffer_index: __func__, llu(pos)); ret = dir_emit(ctx, "..", 2, ino, DT_DIR); - if (ret < 0) - goto out_destroy_handle; ctx->pos++; gossip_ldebug(GOSSIP_DIR_DEBUG, "%s: ctx->pos:%lld\n", @@ -293,17 +288,6 @@ get_new_buffer_index: (unsigned long)pos); ret = dir_emit(ctx, current_entry, len, current_ino, DT_UNKNOWN); - if (ret < 0) { - gossip_debug(GOSSIP_DIR_DEBUG, - "dir_emit() failed. ret:%d\n", - ret); - if (i < 2) { - gossip_err("dir_emit failed on one of the first two true PVFS directory entries.\n"); - gossip_err("Duplicate entries may appear.\n"); - } - buffer_full = 1; - break; - } ctx->pos++; gossip_ldebug(GOSSIP_DIR_DEBUG, "%s: ctx->pos:%lld\n", diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 4ba1b6c48aa7..013a07c8bdfd 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -463,12 +463,12 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, unsigned int to_free; size_t count; unsigned long seg; - unsigned long new_nr_segs = 0; - unsigned long max_new_nr_segs = 0; - unsigned long seg_count = 0; - unsigned long *seg_array = NULL; - struct iovec *iovecptr = NULL; - struct iovec *ptr = NULL; + unsigned long new_nr_segs; + unsigned long max_new_nr_segs; + unsigned long seg_count; + unsigned long *seg_array; + struct iovec *iovecptr; + struct iovec *ptr; total_count = 0; ret = -EINVAL; @@ -477,12 +477,6 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, /* Compute total and max number of segments after split */ max_new_nr_segs = bound_max_iovecs(iov, nr_segs, &count); - if (max_new_nr_segs < 0) { - gossip_lerr("%s: could not bound iovec %lu\n", - __func__, - max_new_nr_segs); - goto out; - } gossip_debug(GOSSIP_FILE_DEBUG, "%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n", diff --git a/fs/orangefs/pvfs2-utils.c b/fs/orangefs/pvfs2-utils.c index 107f425d2e90..8d4411ca118f 100644 --- a/fs/orangefs/pvfs2-utils.c +++ b/fs/orangefs/pvfs2-utils.c @@ -1077,6 +1077,7 @@ void debug_string_to_mask(char *debug_string, void *mask, int type) char *unchecked_keyword; int i; char *strsep_fodder = kstrdup(debug_string, GFP_KERNEL); + char *original_pointer; int element_count = 0; struct client_debug_mask *c_mask; __u64 *k_mask; @@ -1092,6 +1093,7 @@ void debug_string_to_mask(char *debug_string, void *mask, int type) element_count = num_kmod_keyword_mask_map; } + original_pointer = strsep_fodder; while ((unchecked_keyword = strsep(&strsep_fodder, ","))) if (strlen(unchecked_keyword)) { for (i = 0; i < element_count; i++) @@ -1105,7 +1107,7 @@ void debug_string_to_mask(char *debug_string, void *mask, int type) &k_mask); } - kfree(strsep_fodder); + kfree(original_pointer); } void do_c_mask(int i, diff --git a/fs/orangefs/xattr.c b/fs/orangefs/xattr.c index 2766090f5ca4..227eaa47b1e1 100644 --- a/fs/orangefs/xattr.c +++ b/fs/orangefs/xattr.c @@ -77,10 +77,8 @@ ssize_t pvfs2_inode_getxattr(struct inode *inode, const char *prefix, gossip_err("pvfs2_inode_getxattr: bogus NULL pointers\n"); return -EINVAL; } - if (size < 0 || - (strlen(name) + strlen(prefix)) >= PVFS_MAX_XATTR_NAMELEN) { - gossip_err("Invalid size (%d) or key length (%d)\n", - (int)size, + if ((strlen(name) + strlen(prefix)) >= PVFS_MAX_XATTR_NAMELEN) { + gossip_err("Invalid key length (%d)\n", (int)(strlen(name) + strlen(prefix))); return -EINVAL; } From c36316b74e0d98fdc5492104b5dd8bde697f0f81 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Thu, 30 Jul 2015 15:34:04 -0400 Subject: [PATCH 011/174] Orangefs: large integer implicitly truncated to unsigned type make.cross ARCH=tile doesn't like "inode->i_bytes = PAGE_CACHE_SIZE;", so cast PAGE_CACHE_SIZE to unsigned short. Signed-off-by: Mike Marshall --- fs/orangefs/pvfs2-utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/orangefs/pvfs2-utils.c b/fs/orangefs/pvfs2-utils.c index 8d4411ca118f..6eecb1861aca 100644 --- a/fs/orangefs/pvfs2-utils.c +++ b/fs/orangefs/pvfs2-utils.c @@ -163,7 +163,7 @@ static int copy_attributes_to_inode(struct inode *inode, /*FALLTHRU*/ default: pvfs2_lock_inode(inode); - inode->i_bytes = PAGE_CACHE_SIZE; + inode->i_bytes = (unsigned short)PAGE_CACHE_SIZE; inode->i_blocks = (unsigned long)(PAGE_CACHE_SIZE / 512); pvfs2_unlock_inode(inode); From f0566532fe19c7f75a3c0c908152ea0e24e6ead2 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Wed, 5 Aug 2015 13:46:28 -0400 Subject: [PATCH 012/174] Orangefs: use inode_set_bytes for directories Signed-off-by: Mike Marshall --- fs/orangefs/pvfs2-utils.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/orangefs/pvfs2-utils.c b/fs/orangefs/pvfs2-utils.c index 6eecb1861aca..11ee073ecd19 100644 --- a/fs/orangefs/pvfs2-utils.c +++ b/fs/orangefs/pvfs2-utils.c @@ -162,12 +162,11 @@ static int copy_attributes_to_inode(struct inode *inode, } /*FALLTHRU*/ default: - pvfs2_lock_inode(inode); - inode->i_bytes = (unsigned short)PAGE_CACHE_SIZE; - inode->i_blocks = (unsigned long)(PAGE_CACHE_SIZE / 512); - pvfs2_unlock_inode(inode); - inode->i_size = PAGE_CACHE_SIZE; + + pvfs2_lock_inode(inode); + inode_set_bytes(inode, inode->i_size); + pvfs2_unlock_inode(inode); break; } From cb987f3cbe3ced82496c802565b263844abfb0b9 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 3 Aug 2015 00:08:59 -0400 Subject: [PATCH 013/174] fs: orangefs: remove execute priviliges from module params This makes no sense and causes warnings on boot. Signed-off-by: Sasha Levin Signed-off-by: Mike Marshall --- fs/orangefs/pvfs2-mod.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/orangefs/pvfs2-mod.c b/fs/orangefs/pvfs2-mod.c index 69289c5d838c..d80537dadcd8 100644 --- a/fs/orangefs/pvfs2-mod.c +++ b/fs/orangefs/pvfs2-mod.c @@ -65,7 +65,7 @@ static struct file_system_type pvfs2_fs_type = { }; module_param(hash_table_size, int, 0); -module_param(module_parm_debug_mask, ulong, 0755); +module_param(module_parm_debug_mask, ulong, 0644); module_param(op_timeout_secs, int, 0); module_param(slot_timeout_secs, int, 0); From 81b784b11ea65c5c591f4d963daed2111a1b4280 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 1 Aug 2015 18:29:37 -0700 Subject: [PATCH 014/174] Orangefs: Swap order of include files spinlock_types.h requires types from linux/types.h. Including spinlock_types.h first may result in the following build errors, as seen with arm:allmodconfig. arch/arm/include/asm/spinlock_types.h:12:3: error: unknown type name 'u32' arch/arm/include/asm/spinlock_types.h:16:4: error: unknown type name 'u16' Fixes: deb4fb58ff73 ("Orangefs: kernel client part 2") Cc: Mark Brown Cc: Mike Marshall Signed-off-by: Guenter Roeck Signed-off-by: Mike Marshall --- fs/orangefs/protocol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/orangefs/protocol.h b/fs/orangefs/protocol.h index ca7cef9590d3..f571be21f66a 100644 --- a/fs/orangefs/protocol.h +++ b/fs/orangefs/protocol.h @@ -1,5 +1,5 @@ -#include #include +#include #include #include From 4d1c44043b26e99dd70f379cdbe80c64f43fd123 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Fri, 4 Sep 2015 10:31:16 -0400 Subject: [PATCH 015/174] Orangefs: use iov_iter interface replace opencoded pvfs_bufmap_copy_to_kernel_iovec, pvfs_bufmap_copy_to_user_iovec, pvfs_bufmap_copy_iovec_from_kernel, and pvfs_bufmap_copy_iovec_from_user with pvfs_bufmap_copy_to_iovec and pvfs_bufmap_copy_from_iovec, which both use the iov_iter interface. Signed-off-by: Mike Marshall --- fs/orangefs/file.c | 77 +++--- fs/orangefs/pvfs2-bufmap.c | 474 +++---------------------------------- fs/orangefs/pvfs2-bufmap.h | 29 +-- 3 files changed, 71 insertions(+), 509 deletions(-) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 013a07c8bdfd..3e5fc1a2c82f 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -33,31 +33,30 @@ static int precopy_buffers(struct pvfs2_bufmap *bufmap, int buffer_index, const struct iovec *vec, unsigned long nr_segs, - size_t total_size, - int from_user) + size_t total_size) { int ret = 0; + struct iov_iter iter; /* * copy data from application/kernel by pulling it out * of the iovec. */ - /* Are we copying from User Virtual Addresses? */ - if (from_user) - ret = pvfs_bufmap_copy_iovec_from_user( - bufmap, - buffer_index, - vec, - nr_segs, - total_size); - /* Are we copying from Kernel Virtual Addresses? */ - else - ret = pvfs_bufmap_copy_iovec_from_kernel( - bufmap, - buffer_index, - vec, - nr_segs, - total_size); + + + if (total_size) { + iov_iter_init(&iter, WRITE, vec, nr_segs, total_size); + ret = pvfs_bufmap_copy_from_iovec(bufmap, + &iter, + buffer_index, + total_size); + if (ret < 0) + gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n", + __func__, + (long)ret); + + } + if (ret < 0) gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n", __func__, @@ -76,35 +75,24 @@ static int postcopy_buffers(struct pvfs2_bufmap *bufmap, int buffer_index, const struct iovec *vec, int nr_segs, - size_t total_size, - int to_user) + size_t total_size) { int ret = 0; + struct iov_iter iter; + /* * copy data to application/kernel by pushing it out to * the iovec. NOTE; target buffers can be addresses or * struct page pointers. */ if (total_size) { - /* Are we copying to User Virtual Addresses? */ - if (to_user) - ret = pvfs_bufmap_copy_to_user_iovec( - bufmap, - buffer_index, - vec, - nr_segs, - total_size); - /* Are we copying to Kern Virtual Addresses? */ - else - ret = pvfs_bufmap_copy_to_kernel_iovec( - bufmap, - buffer_index, - vec, - nr_segs, - total_size); + iov_iter_init(&iter, READ, vec, nr_segs, total_size); + ret = pvfs_bufmap_copy_to_iovec(bufmap, + &iter, + buffer_index); if (ret < 0) - gossip_err("%s: Failed to copy-out buffers. Please make sure that the pvfs2-client is running (%ld)\n", + gossip_err("%s: Failed to copy-out buffers. Please make sure that the pvfs2-client is running (%ld)\n", __func__, (long)ret); } @@ -116,7 +104,7 @@ static int postcopy_buffers(struct pvfs2_bufmap *bufmap, */ static ssize_t wait_for_direct_io(enum PVFS_io_type type, struct inode *inode, loff_t *offset, struct iovec *vec, unsigned long nr_segs, - size_t total_size, loff_t readahead_size, int to_user) + size_t total_size, loff_t readahead_size) { struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); struct pvfs2_khandle *handle = &pvfs2_inode->refn.khandle; @@ -158,10 +146,9 @@ populate_shared_memory: new_op->upcall.req.io.offset = *offset; gossip_debug(GOSSIP_FILE_DEBUG, - "%s(%pU): copy_to_user %d nr_segs %lu, offset: %llu total_size: %zd\n", + "%s(%pU): nr_segs %lu, offset: %llu total_size: %zd\n", __func__, handle, - to_user, nr_segs, llu(*offset), total_size); @@ -174,8 +161,7 @@ populate_shared_memory: buffer_index, vec, nr_segs, - total_size, - to_user); + total_size); if (ret < 0) goto out; } @@ -239,8 +225,7 @@ populate_shared_memory: buffer_index, vec, nr_segs, - new_op->downcall.resp.io.amt_complete, - to_user); + new_op->downcall.resp.io.amt_complete); if (ret < 0) { /* * put error codes in downcall so that handle_io_error() @@ -606,7 +591,7 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, (int)*offset); ret = wait_for_direct_io(type, inode, offset, ptr, - seg_array[seg], each_count, 0, 1); + seg_array[seg], each_count, 0); gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): return from wait_for_io:%d\n", __func__, @@ -699,7 +684,7 @@ ssize_t pvfs2_inode_read(struct inode *inode, llu(*offset)); ret = wait_for_direct_io(PVFS_IO_READ, inode, offset, &vec, 1, - count, readahead_size, 0); + count, readahead_size); if (ret > 0) *offset += ret; diff --git a/fs/orangefs/pvfs2-bufmap.c b/fs/orangefs/pvfs2-bufmap.c index a439163f8d7c..e01e220fd5d7 100644 --- a/fs/orangefs/pvfs2-bufmap.c +++ b/fs/orangefs/pvfs2-bufmap.c @@ -507,468 +507,60 @@ void readdir_index_put(struct pvfs2_bufmap *bufmap, int buffer_index) pvfs2_bufmap_unref(bufmap); } -/* - * pvfs_bufmap_copy_iovec_from_user() - * - * copies data from several user space address's in an iovec - * to a mapped buffer - * - * Note that the mapped buffer is a series of pages and therefore - * the copies have to be split by PAGE_SIZE bytes at a time. - * Note that this routine checks that summation of iov_len - * across all the elements of iov is equal to size. - * - * returns 0 on success, -errno on failure - */ -int pvfs_bufmap_copy_iovec_from_user(struct pvfs2_bufmap *bufmap, - int buffer_index, - const struct iovec *iov, - unsigned long nr_segs, - size_t size) +int pvfs_bufmap_copy_from_iovec(struct pvfs2_bufmap *bufmap, + struct iov_iter *iter, + int buffer_index, + size_t size) { - size_t ret = 0; - size_t amt_copied = 0; - size_t cur_copy_size = 0; - unsigned int to_page_offset = 0; - unsigned int to_page_index = 0; - void *to_kaddr = NULL; - void __user *from_addr = NULL; - struct iovec *copied_iovec = NULL; struct pvfs_bufmap_desc *to; - unsigned int seg; - char *tmp_printer = NULL; - int tmp_int = 0; + struct page *page; + size_t copied; + int i; gossip_debug(GOSSIP_BUFMAP_DEBUG, - "pvfs_bufmap_copy_iovec_from_user: index %d, " - "size %zd\n", - buffer_index, - size); + "%s: buffer_index:%d: size:%lu:\n", + __func__, buffer_index, size); to = &bufmap->desc_array[buffer_index]; - /* - * copy the passed in iovec so that we can change some of its fields - */ - copied_iovec = kmalloc_array(nr_segs, - sizeof(*copied_iovec), - PVFS2_BUFMAP_GFP_FLAGS); - if (copied_iovec == NULL) - return -ENOMEM; - - memcpy(copied_iovec, iov, nr_segs * sizeof(*copied_iovec)); - /* - * Go through each segment in the iovec and make sure that - * the summation of iov_len matches the given size. - */ - for (seg = 0, amt_copied = 0; seg < nr_segs; seg++) - amt_copied += copied_iovec[seg].iov_len; - if (amt_copied != size) { - gossip_err( - "pvfs2_bufmap_copy_iovec_from_user: computed total (" - "%zd) is not equal to (%zd)\n", - amt_copied, - size); - kfree(copied_iovec); - return -EINVAL; + for (i = 0; size; i++) { + page = to->page_array[i]; + copied = copy_page_from_iter(page, 0, PAGE_SIZE, iter); + size -= copied; + if ((copied == 0) && (size)) + break; } - to_page_index = 0; - to_page_offset = 0; - amt_copied = 0; - seg = 0; - /* - * Go through each segment in the iovec and copy its - * buffer into the mapped buffer one page at a time though - */ - while (amt_copied < size) { - struct iovec *iv = &copied_iovec[seg]; - int inc_to_page_index; + return size ? -EFAULT : 0; - if (iv->iov_len < (PAGE_SIZE - to_page_offset)) { - cur_copy_size = - PVFS_util_min(iv->iov_len, size - amt_copied); - seg++; - from_addr = iv->iov_base; - inc_to_page_index = 0; - } else if (iv->iov_len == (PAGE_SIZE - to_page_offset)) { - cur_copy_size = - PVFS_util_min(iv->iov_len, size - amt_copied); - seg++; - from_addr = iv->iov_base; - inc_to_page_index = 1; - } else { - cur_copy_size = - PVFS_util_min(PAGE_SIZE - to_page_offset, - size - amt_copied); - from_addr = iv->iov_base; - iv->iov_base += cur_copy_size; - iv->iov_len -= cur_copy_size; - inc_to_page_index = 1; - } - to_kaddr = pvfs2_kmap(to->page_array[to_page_index]); - ret = - copy_from_user(to_kaddr + to_page_offset, - from_addr, - cur_copy_size); - if (!PageReserved(to->page_array[to_page_index])) - SetPageDirty(to->page_array[to_page_index]); - - if (!tmp_printer) { - tmp_printer = (char *)(to_kaddr + to_page_offset); - tmp_int += tmp_printer[0]; - gossip_debug(GOSSIP_BUFMAP_DEBUG, - "First character (integer value) in pvfs_bufmap_copy_from_user: %d\n", - tmp_int); - } - - pvfs2_kunmap(to->page_array[to_page_index]); - if (ret) { - gossip_err("Failed to copy data from user space\n"); - kfree(copied_iovec); - return -EFAULT; - } - - amt_copied += cur_copy_size; - if (inc_to_page_index) { - to_page_offset = 0; - to_page_index++; - } else { - to_page_offset += cur_copy_size; - } - } - kfree(copied_iovec); - return 0; } /* - * pvfs_bufmap_copy_iovec_from_kernel() + * Iterate through the array of pages containing the bytes from + * a file being read. * - * copies data from several kernel space address's in an iovec - * to a mapped buffer - * - * Note that the mapped buffer is a series of pages and therefore - * the copies have to be split by PAGE_SIZE bytes at a time. - * Note that this routine checks that summation of iov_len - * across all the elements of iov is equal to size. - * - * returns 0 on success, -errno on failure */ -int pvfs_bufmap_copy_iovec_from_kernel(struct pvfs2_bufmap *bufmap, - int buffer_index, const struct iovec *iov, - unsigned long nr_segs, size_t size) +int pvfs_bufmap_copy_to_iovec(struct pvfs2_bufmap *bufmap, + struct iov_iter *iter, + int buffer_index) { - size_t amt_copied = 0; - size_t cur_copy_size = 0; - int to_page_index = 0; - void *to_kaddr = NULL; - void *from_kaddr = NULL; - struct kvec *iv = NULL; - struct iovec *copied_iovec = NULL; - struct pvfs_bufmap_desc *to; - unsigned int seg; - unsigned to_page_offset = 0; - - gossip_debug(GOSSIP_BUFMAP_DEBUG, - "pvfs_bufmap_copy_iovec_from_kernel: index %d, " - "size %zd\n", - buffer_index, - size); - - to = &bufmap->desc_array[buffer_index]; - /* - * copy the passed in iovec so that we can change some of its fields - */ - copied_iovec = kmalloc_array(nr_segs, - sizeof(*copied_iovec), - PVFS2_BUFMAP_GFP_FLAGS); - if (copied_iovec == NULL) - return -ENOMEM; - - memcpy(copied_iovec, iov, nr_segs * sizeof(*copied_iovec)); - /* - * Go through each segment in the iovec and make sure that - * the summation of iov_len matches the given size. - */ - for (seg = 0, amt_copied = 0; seg < nr_segs; seg++) - amt_copied += copied_iovec[seg].iov_len; - if (amt_copied != size) { - gossip_err("pvfs2_bufmap_copy_iovec_from_kernel: computed total(%zd) is not equal to (%zd)\n", - amt_copied, - size); - kfree(copied_iovec); - return -EINVAL; - } - - to_page_index = 0; - amt_copied = 0; - seg = 0; - to_page_offset = 0; - /* - * Go through each segment in the iovec and copy its - * buffer into the mapped buffer one page at a time though - */ - while (amt_copied < size) { - int inc_to_page_index; - - iv = (struct kvec *) &copied_iovec[seg]; - - if (iv->iov_len < (PAGE_SIZE - to_page_offset)) { - cur_copy_size = - PVFS_util_min(iv->iov_len, size - amt_copied); - seg++; - from_kaddr = iv->iov_base; - inc_to_page_index = 0; - } else if (iv->iov_len == (PAGE_SIZE - to_page_offset)) { - cur_copy_size = - PVFS_util_min(iv->iov_len, size - amt_copied); - seg++; - from_kaddr = iv->iov_base; - inc_to_page_index = 1; - } else { - cur_copy_size = - PVFS_util_min(PAGE_SIZE - to_page_offset, - size - amt_copied); - from_kaddr = iv->iov_base; - iv->iov_base += cur_copy_size; - iv->iov_len -= cur_copy_size; - inc_to_page_index = 1; - } - to_kaddr = pvfs2_kmap(to->page_array[to_page_index]); - memcpy(to_kaddr + to_page_offset, from_kaddr, cur_copy_size); - if (!PageReserved(to->page_array[to_page_index])) - SetPageDirty(to->page_array[to_page_index]); - pvfs2_kunmap(to->page_array[to_page_index]); - amt_copied += cur_copy_size; - if (inc_to_page_index) { - to_page_offset = 0; - to_page_index++; - } else { - to_page_offset += cur_copy_size; - } - } - kfree(copied_iovec); - return 0; -} - -/* - * pvfs_bufmap_copy_to_user_iovec() - * - * copies data to several user space address's in an iovec - * from a mapped buffer - * - * returns 0 on success, -errno on failure - */ -int pvfs_bufmap_copy_to_user_iovec(struct pvfs2_bufmap *bufmap, - int buffer_index, const struct iovec *iov, - unsigned long nr_segs, size_t size) -{ - size_t ret = 0; - size_t amt_copied = 0; - size_t cur_copy_size = 0; - int from_page_index = 0; - void *from_kaddr = NULL; - void __user *to_addr = NULL; - struct iovec *copied_iovec = NULL; struct pvfs_bufmap_desc *from; - unsigned int seg; - unsigned from_page_offset = 0; - char *tmp_printer = NULL; - int tmp_int = 0; + struct page *page; + int i; + size_t written; gossip_debug(GOSSIP_BUFMAP_DEBUG, - "pvfs_bufmap_copy_to_user_iovec: index %d, size %zd\n", - buffer_index, - size); + "%s: buffer_index:%d: iov_iter_count(iter):%lu:\n", + __func__, buffer_index, iov_iter_count(iter)); - from = &bufmap->desc_array[buffer_index]; - /* - * copy the passed in iovec so that we can change some of its fields - */ - copied_iovec = kmalloc_array(nr_segs, - sizeof(*copied_iovec), - PVFS2_BUFMAP_GFP_FLAGS); - if (copied_iovec == NULL) - return -ENOMEM; + from = &bufmap->desc_array[buffer_index]; - memcpy(copied_iovec, iov, nr_segs * sizeof(*copied_iovec)); - /* - * Go through each segment in the iovec and make sure that - * the summation of iov_len is greater than the given size. - */ - for (seg = 0, amt_copied = 0; seg < nr_segs; seg++) - amt_copied += copied_iovec[seg].iov_len; - if (amt_copied < size) { - gossip_err("pvfs2_bufmap_copy_to_user_iovec: computed total (%zd) is less than (%zd)\n", - amt_copied, - size); - kfree(copied_iovec); - return -EINVAL; + for (i = 0; iov_iter_count(iter); i++) { + page = from->page_array[i]; + written = copy_page_to_iter(page, 0, PAGE_SIZE, iter); + if ((written == 0) && (iov_iter_count(iter))) + break; } - from_page_index = 0; - amt_copied = 0; - seg = 0; - from_page_offset = 0; - /* - * Go through each segment in the iovec and copy from the mapper buffer, - * but make sure that we do so one page at a time. - */ - while (amt_copied < size) { - struct iovec *iv = &copied_iovec[seg]; - int inc_from_page_index; - - if (iv->iov_len < (PAGE_SIZE - from_page_offset)) { - cur_copy_size = - PVFS_util_min(iv->iov_len, size - amt_copied); - seg++; - to_addr = iv->iov_base; - inc_from_page_index = 0; - } else if (iv->iov_len == (PAGE_SIZE - from_page_offset)) { - cur_copy_size = - PVFS_util_min(iv->iov_len, size - amt_copied); - seg++; - to_addr = iv->iov_base; - inc_from_page_index = 1; - } else { - cur_copy_size = - PVFS_util_min(PAGE_SIZE - from_page_offset, - size - amt_copied); - to_addr = iv->iov_base; - iv->iov_base += cur_copy_size; - iv->iov_len -= cur_copy_size; - inc_from_page_index = 1; - } - from_kaddr = pvfs2_kmap(from->page_array[from_page_index]); - if (!tmp_printer) { - tmp_printer = (char *)(from_kaddr + from_page_offset); - tmp_int += tmp_printer[0]; - gossip_debug(GOSSIP_BUFMAP_DEBUG, - "First character (integer value) in pvfs_bufmap_copy_to_user_iovec: %d\n", - tmp_int); - } - ret = - copy_to_user(to_addr, - from_kaddr + from_page_offset, - cur_copy_size); - pvfs2_kunmap(from->page_array[from_page_index]); - if (ret) { - gossip_err("Failed to copy data to user space\n"); - kfree(copied_iovec); - return -EFAULT; - } - - amt_copied += cur_copy_size; - if (inc_from_page_index) { - from_page_offset = 0; - from_page_index++; - } else { - from_page_offset += cur_copy_size; - } - } - kfree(copied_iovec); - return 0; -} - -/* - * pvfs_bufmap_copy_to_kernel_iovec() - * - * copies data to several kernel space address's in an iovec - * from a mapped buffer - * - * returns 0 on success, -errno on failure - */ -int pvfs_bufmap_copy_to_kernel_iovec(struct pvfs2_bufmap *bufmap, - int buffer_index, const struct iovec *iov, - unsigned long nr_segs, size_t size) -{ - size_t amt_copied = 0; - size_t cur_copy_size = 0; - int from_page_index = 0; - void *from_kaddr = NULL; - void *to_kaddr = NULL; - struct kvec *iv; - struct iovec *copied_iovec = NULL; - struct pvfs_bufmap_desc *from; - unsigned int seg; - unsigned int from_page_offset = 0; - - gossip_debug(GOSSIP_BUFMAP_DEBUG, - "pvfs_bufmap_copy_to_kernel_iovec: index %d, size %zd\n", - buffer_index, - size); - - from = &bufmap->desc_array[buffer_index]; - /* - * copy the passed in iovec so that we can change some of its fields - */ - copied_iovec = kmalloc_array(nr_segs, - sizeof(*copied_iovec), - PVFS2_BUFMAP_GFP_FLAGS); - if (copied_iovec == NULL) - return -ENOMEM; - - memcpy(copied_iovec, iov, nr_segs * sizeof(*copied_iovec)); - /* - * Go through each segment in the iovec and make sure that - * the summation of iov_len is greater than the given size. - */ - for (seg = 0, amt_copied = 0; seg < nr_segs; seg++) - amt_copied += copied_iovec[seg].iov_len; - - if (amt_copied < size) { - gossip_err("pvfs2_bufmap_copy_to_kernel_iovec: computed total (%zd) is less than (%zd)\n", - amt_copied, - size); - kfree(copied_iovec); - return -EINVAL; - } - - from_page_index = 0; - amt_copied = 0; - seg = 0; - from_page_offset = 0; - /* - * Go through each segment in the iovec and copy from the mapper buffer, - * but make sure that we do so one page at a time. - */ - while (amt_copied < size) { - int inc_from_page_index; - - iv = (struct kvec *) &copied_iovec[seg]; - - if (iv->iov_len < (PAGE_SIZE - from_page_offset)) { - cur_copy_size = - PVFS_util_min(iv->iov_len, size - amt_copied); - seg++; - to_kaddr = iv->iov_base; - inc_from_page_index = 0; - } else if (iv->iov_len == (PAGE_SIZE - from_page_offset)) { - cur_copy_size = - PVFS_util_min(iv->iov_len, size - amt_copied); - seg++; - to_kaddr = iv->iov_base; - inc_from_page_index = 1; - } else { - cur_copy_size = - PVFS_util_min(PAGE_SIZE - from_page_offset, - size - amt_copied); - to_kaddr = iv->iov_base; - iv->iov_base += cur_copy_size; - iv->iov_len -= cur_copy_size; - inc_from_page_index = 1; - } - from_kaddr = pvfs2_kmap(from->page_array[from_page_index]); - memcpy(to_kaddr, from_kaddr + from_page_offset, cur_copy_size); - pvfs2_kunmap(from->page_array[from_page_index]); - amt_copied += cur_copy_size; - if (inc_from_page_index) { - from_page_offset = 0; - from_page_index++; - } else { - from_page_offset += cur_copy_size; - } - } - kfree(copied_iovec); - return 0; + return iov_iter_count(iter) ? -EFAULT : 0; } diff --git a/fs/orangefs/pvfs2-bufmap.h b/fs/orangefs/pvfs2-bufmap.h index e269deafbb74..a0f84c045d73 100644 --- a/fs/orangefs/pvfs2-bufmap.h +++ b/fs/orangefs/pvfs2-bufmap.h @@ -42,29 +42,14 @@ int readdir_index_get(struct pvfs2_bufmap **mapp, int *buffer_index); void readdir_index_put(struct pvfs2_bufmap *bufmap, int buffer_index); -int pvfs_bufmap_copy_iovec_from_user(struct pvfs2_bufmap *bufmap, - int buffer_index, - const struct iovec *iov, - unsigned long nr_segs, - size_t size); +int pvfs_bufmap_copy_from_iovec(struct pvfs2_bufmap *bufmap, + struct iov_iter *iter, + int buffer_index, + size_t size); -int pvfs_bufmap_copy_iovec_from_kernel(struct pvfs2_bufmap *bufmap, - int buffer_index, - const struct iovec *iov, - unsigned long nr_segs, - size_t size); - -int pvfs_bufmap_copy_to_user_iovec(struct pvfs2_bufmap *bufmap, - int buffer_index, - const struct iovec *iov, - unsigned long nr_segs, - size_t size); - -int pvfs_bufmap_copy_to_kernel_iovec(struct pvfs2_bufmap *bufmap, - int buffer_index, - const struct iovec *iov, - unsigned long nr_segs, - size_t size); +int pvfs_bufmap_copy_to_iovec(struct pvfs2_bufmap *bufmap, + struct iov_iter *iter, + int buffer_index); size_t pvfs_bufmap_copy_to_user_task_iovec(struct task_struct *tsk, struct iovec *iovec, From 88309aae3ddb62e6d02a8f1002a4f4fc41b423ad Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Wed, 23 Sep 2015 16:48:40 -0400 Subject: [PATCH 016/174] Orangefs: fix dir_emit code in pvfs2_readdir. Al Viro glanced at readdir and surmised that getdents would misbehave the way it was written... and sure enough. Signed-off-by: Mike Marshall --- fs/orangefs/dir.c | 131 +++++++++++++++-------------------------- fs/orangefs/protocol.h | 1 + 2 files changed, 50 insertions(+), 82 deletions(-) diff --git a/fs/orangefs/dir.c b/fs/orangefs/dir.c index c126c0fc6e0f..3870e78f5ecf 100644 --- a/fs/orangefs/dir.c +++ b/fs/orangefs/dir.c @@ -95,26 +95,16 @@ static void readdir_handle_dtor(struct pvfs2_bufmap *bufmap, /* * Read directory entries from an instance of an open directory. - * - * \note This routine was converted for the readdir to iterate change - * in "struct file_operations". "converted" mostly amounts to - * changing occurrences of "readdir" and "filldir" in the - * comments to "iterate" and "dir_emit". Also filldir calls - * were changed to dir_emit calls. - * - * \param dir_emit callback function called for each entry read. - * - * \retval 0 when directory has been completely traversed - * \retval >0 if we don't call dir_emit for all entries - * - * \note If the dir_emit call-back returns non-zero, then iterate should - * assume that it has had enough, and should return as well. */ static int pvfs2_readdir(struct file *file, struct dir_context *ctx) { struct pvfs2_bufmap *bufmap = NULL; int ret = 0; int buffer_index; + /* + * ptoken supports Orangefs' distributed directory logic, added + * in 2.9.2. + */ __u64 *ptoken = file->private_data; __u64 pos = 0; ino_t ino = 0; @@ -129,11 +119,11 @@ static int pvfs2_readdir(struct file *file, struct dir_context *ctx) char *current_entry = NULL; long bytes_decoded; - gossip_ldebug(GOSSIP_DIR_DEBUG, - "%s: ctx->pos:%lld, token = %llu\n", - __func__, - lld(ctx->pos), - llu(*ptoken)); + gossip_debug(GOSSIP_DIR_DEBUG, + "%s: ctx->pos:%lld, ptoken = %llu\n", + __func__, + lld(ctx->pos), + llu(*ptoken)); pos = (__u64) ctx->pos; @@ -165,16 +155,6 @@ static int pvfs2_readdir(struct file *file, struct dir_context *ctx) __func__, &new_op->upcall.req.readdir.refn.khandle); - /* - * NOTE: the position we send to the readdir upcall is out of - * sync with ctx->pos since: - * 1. pvfs2 doesn't include the "." and ".." entries that are - * added below. - * 2. the introduction of distributed directory logic makes token no - * longer be related to f_pos and pos. Instead an independent - * variable is used inside the function and stored in the - * private_data of the file structure. - */ new_op->upcall.req.readdir.token = *ptoken; get_new_buffer_index: @@ -238,13 +218,18 @@ get_new_buffer_index: } if (bytes_decoded != new_op->downcall.trailer_size) { - gossip_err("pvfs2_readdir: # bytes decoded (%ld) != trailer size (%ld)\n", - bytes_decoded, - (long)new_op->downcall.trailer_size); + gossip_err("pvfs2_readdir: # bytes decoded (%ld) " + "!= trailer size (%ld)\n", + bytes_decoded, + (long)new_op->downcall.trailer_size); ret = -EINVAL; goto out_destroy_handle; } + /* + * pvfs2 doesn't actually store dot and dot-dot, but + * we need to have them represented. + */ if (pos == 0) { ino = get_ino_from_khandle(dentry->d_inode); gossip_debug(GOSSIP_DIR_DEBUG, @@ -252,12 +237,7 @@ get_new_buffer_index: __func__, llu(pos)); ret = dir_emit(ctx, ".", 1, ino, DT_DIR); - ctx->pos++; - gossip_ldebug(GOSSIP_DIR_DEBUG, - "%s: ctx->pos:%lld\n", - __func__, - lld(ctx->pos)); - pos++; + pos += 1; } if (pos == 1) { @@ -267,62 +247,55 @@ get_new_buffer_index: __func__, llu(pos)); ret = dir_emit(ctx, "..", 2, ino, DT_DIR); - ctx->pos++; - gossip_ldebug(GOSSIP_DIR_DEBUG, - "%s: ctx->pos:%lld\n", - __func__, - lld(ctx->pos)); - pos++; + pos += 1; } - for (i = 0; i < rhandle.readdir_response.pvfs_dirent_outcount; i++) { + /* + * we stored PVFS_ITERATE_NEXT in ctx->pos last time around + * to prevent "finding" dot and dot-dot on any iteration + * other than the first. + */ + if (ctx->pos == PVFS_ITERATE_NEXT) + ctx->pos = 0; + + for (i = ctx->pos; + i < rhandle.readdir_response.pvfs_dirent_outcount; + i++) { len = rhandle.readdir_response.dirent_array[i].d_length; current_entry = rhandle.readdir_response.dirent_array[i].d_name; current_ino = pvfs2_khandle_to_ino( &(rhandle.readdir_response.dirent_array[i].khandle)); gossip_debug(GOSSIP_DIR_DEBUG, - "calling dir_emit for %s with len %d, pos %ld\n", + "calling dir_emit for %s with len %d" + ", ctx->pos %ld\n", current_entry, len, - (unsigned long)pos); + (unsigned long)ctx->pos); + /* + * type is unknown. We don't return object type + * in the dirent_array. This leaves getdents + * clueless about type. + */ ret = dir_emit(ctx, current_entry, len, current_ino, DT_UNKNOWN); + if (!ret) + break; ctx->pos++; - gossip_ldebug(GOSSIP_DIR_DEBUG, + gossip_debug(GOSSIP_DIR_DEBUG, "%s: ctx->pos:%lld\n", __func__, lld(ctx->pos)); - pos++; } - /* this means that all of the dir_emit calls succeeded */ - if (i == rhandle.readdir_response.pvfs_dirent_outcount) { - /* update token */ + /* + * we ran all the way through the last batch, set up for + * getting another batch... + */ + if (ret) { *ptoken = rhandle.readdir_response.token; - } else { - /* this means a dir_emit call failed */ - if (rhandle.readdir_response.token == PVFS_READDIR_END) { - /* - * If PVFS hit end of directory, then there - * is no way to do math on the token that it - * returned. Instead we go by ctx->pos but - * back up to account for the artificial . - * and .. entries. - */ - ctx->pos -= 3; - } else { - /* - * this means a dir_emit call failed. !!! need to set - * back to previous ctx->pos, no middle value allowed - */ - pos -= (i - 1); - ctx->pos -= (i - 1); - } - gossip_debug(GOSSIP_DIR_DEBUG, - "at least one dir_emit call failed. Setting ctx->pos to: %lld\n", - lld(ctx->pos)); + ctx->pos = PVFS_ITERATE_NEXT; } /* @@ -330,17 +303,11 @@ get_new_buffer_index: */ if (rhandle.readdir_response.token == PVFS_READDIR_END && !buffer_full) { - gossip_debug(GOSSIP_DIR_DEBUG, "End of dir detected; setting ctx->pos to PVFS_READDIR_END.\n"); + gossip_debug(GOSSIP_DIR_DEBUG, + "End of dir detected; setting ctx->pos to PVFS_READDIR_END.\n"); ctx->pos = PVFS_READDIR_END; } - gossip_debug(GOSSIP_DIR_DEBUG, - "pos = %llu, token = %llu" - ", ctx->pos should have been %lld\n", - llu(pos), - llu(*ptoken), - lld(ctx->pos)); - out_destroy_handle: readdir_handle_dtor(bufmap, &rhandle); out_free_op: diff --git a/fs/orangefs/protocol.h b/fs/orangefs/protocol.h index f571be21f66a..cae9cc0f9d18 100644 --- a/fs/orangefs/protocol.h +++ b/fs/orangefs/protocol.h @@ -384,6 +384,7 @@ DECLARE_ERRNO_MAPPING() #define INT32_MAX (2147483647) #define PVFS_ITERATE_START (INT32_MAX - 1) #define PVFS_ITERATE_END (INT32_MAX - 2) +#define PVFS_ITERATE_NEXT (INT32_MAX - 3) #define PVFS_READDIR_START PVFS_ITERATE_START #define PVFS_READDIR_END PVFS_ITERATE_END #define PVFS_IMMUTABLE_FL FS_IMMUTABLE_FL From d6fe654b7b580720fee632c8d526c6a159111d50 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Thu, 24 Sep 2015 12:38:38 -0400 Subject: [PATCH 017/174] Orangefs: put PVFS_util_min out of its misery. Signed-off-by: Mike Marshall --- fs/orangefs/protocol.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/orangefs/protocol.h b/fs/orangefs/protocol.h index cae9cc0f9d18..c50c6e30f5a8 100644 --- a/fs/orangefs/protocol.h +++ b/fs/orangefs/protocol.h @@ -572,7 +572,6 @@ struct dev_mask2_info_s { }; /* pvfs2-util.h *************************************************************/ -#define PVFS_util_min(x1, x2) (((x1) > (x2)) ? (x2) : (x1)) __s32 PVFS_util_translate_mode(int mode); /* pvfs2-debug.h ************************************************************/ From f957ae2dec09b63b44df9ec06765cbdc52666eec Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Thu, 24 Sep 2015 12:53:05 -0400 Subject: [PATCH 018/174] Orangefs: choose return codes from among the expected ones. Signed-off-by: Mike Marshall --- fs/orangefs/file.c | 2 +- fs/orangefs/namei.c | 12 ++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 3e5fc1a2c82f..53e58c3f2121 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -973,7 +973,7 @@ static loff_t pvfs2_file_llseek(struct file *file, loff_t offset, int origin) */ static int pvfs2_lock(struct file *filp, int cmd, struct file_lock *fl) { - int rc = -ENOLCK; + int rc = -EINVAL; if (PVFS2_SB(filp->f_inode->i_sb)->flags & PVFS2_OPT_LOCAL_LOCK) { if (cmd == F_GETLK) { diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index 747fe6a690af..05f6feadfd0d 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -243,28 +243,24 @@ static int pvfs2_unlink(struct inode *dir, struct dentry *dentry) } /* - * pvfs2_link() is only implemented here to make sure that we return a - * reasonable error code (the kernel will return a misleading EPERM - * otherwise). PVFS2 does not support hard links. + * PVFS2 does not support hard links. */ static int pvfs2_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { - return -EOPNOTSUPP; + return -EPERM; } /* - * pvfs2_mknod() is only implemented here to make sure that we return a - * reasonable error code (the kernel will return a misleading EPERM - * otherwise). PVFS2 does not support special files such as fifos or devices. + * PVFS2 does not support special files. */ static int pvfs2_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { - return -EOPNOTSUPP; + return -EPERM; } static int pvfs2_symlink(struct inode *dir, From 50e01586f4b10dc7aa534bbfcd1707586e7b32e0 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Tue, 29 Sep 2015 11:17:26 -0400 Subject: [PATCH 019/174] Orangefs: Don't opencode memcpy. Signed-off-by: Mike Marshall --- fs/orangefs/protocol.h | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/fs/orangefs/protocol.h b/fs/orangefs/protocol.h index c50c6e30f5a8..cdbde64f8e39 100644 --- a/fs/orangefs/protocol.h +++ b/fs/orangefs/protocol.h @@ -70,30 +70,21 @@ static inline int PVFS_khandle_cmp(const struct pvfs2_khandle *kh1, return 0; } -/* copy a khandle to a field of arbitrary size */ static inline void PVFS_khandle_to(const struct pvfs2_khandle *kh, void *p, int size) { - int i; - unsigned char *c = p; memset(p, 0, size); + memcpy(p, kh->u, 16); - for (i = 0; i < 16 && i < size; i++) - c[i] = kh->u[i]; } -/* copy a khandle from a field of arbitrary size */ static inline void PVFS_khandle_from(struct pvfs2_khandle *kh, void *p, int size) { - int i; - unsigned char *c = p; - memset(kh, 0, 16); + memcpy(kh->u, p, 16); - for (i = 0; i < 16 && i < size; i++) - kh->u[i] = c[i]; } /* pvfs2-types.h ************************************************************/ From 8c3905adea92c79e32b02120c724dfd4cf84dd85 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Tue, 29 Sep 2015 12:07:46 -0400 Subject: [PATCH 020/174] Orangefs: update signal blocking code before Oleg sees it. Signed-off-by: Mike Marshall --- fs/orangefs/pvfs2-kernel.h | 4 ++-- fs/orangefs/pvfs2-utils.c | 37 +++++++++++++------------------------ fs/orangefs/waitqueue.c | 6 +++--- 3 files changed, 18 insertions(+), 29 deletions(-) diff --git a/fs/orangefs/pvfs2-kernel.h b/fs/orangefs/pvfs2-kernel.h index be30111b40d2..299b48c37cab 100644 --- a/fs/orangefs/pvfs2-kernel.h +++ b/fs/orangefs/pvfs2-kernel.h @@ -649,9 +649,9 @@ void pvfs2_op_initialize(struct pvfs2_kernel_op_s *op); void pvfs2_make_bad_inode(struct inode *inode); -void mask_blocked_signals(sigset_t *orig_sigset); +void block_signals(sigset_t *); -void unmask_blocked_signals(sigset_t *orig_sigset); +void set_signals(sigset_t *); int pvfs2_unmount_sb(struct super_block *sb); diff --git a/fs/orangefs/pvfs2-utils.c b/fs/orangefs/pvfs2-utils.c index 11ee073ecd19..834e06674d0a 100644 --- a/fs/orangefs/pvfs2-utils.c +++ b/fs/orangefs/pvfs2-utils.c @@ -632,36 +632,25 @@ void pvfs2_make_bad_inode(struct inode *inode) } } -/* this code is based on linux/net/sunrpc/clnt.c:rpc_clnt_sigmask */ -void mask_blocked_signals(sigset_t *orig_sigset) +/* Block all blockable signals... */ +void block_signals(sigset_t *orig_sigset) { - unsigned long sigallow = sigmask(SIGKILL); - unsigned long irqflags = 0; - struct k_sigaction *action = pvfs2_current_sigaction; + sigset_t mask; - sigallow |= ((action[SIGINT - 1].sa.sa_handler == SIG_DFL) ? - sigmask(SIGINT) : - 0); - sigallow |= ((action[SIGQUIT - 1].sa.sa_handler == SIG_DFL) ? - sigmask(SIGQUIT) : - 0); + /* + * Initialize all entries in the signal set to the + * inverse of the given mask. + */ + siginitsetinv(&mask, sigmask(SIGKILL)); - spin_lock_irqsave(&pvfs2_current_signal_lock, irqflags); - *orig_sigset = current->blocked; - siginitsetinv(¤t->blocked, sigallow & ~orig_sigset->sig[0]); - recalc_sigpending(); - spin_unlock_irqrestore(&pvfs2_current_signal_lock, irqflags); + /* Block 'em Danno... */ + sigprocmask(SIG_BLOCK, &mask, orig_sigset); } -/* this code is based on linux/net/sunrpc/clnt.c:rpc_clnt_sigunmask */ -void unmask_blocked_signals(sigset_t *orig_sigset) +/* set the signal mask to the given template... */ +void set_signals(sigset_t *sigset) { - unsigned long irqflags = 0; - - spin_lock_irqsave(&pvfs2_current_signal_lock, irqflags); - current->blocked = *orig_sigset; - recalc_sigpending(); - spin_unlock_irqrestore(&pvfs2_current_signal_lock, irqflags); + sigprocmask(SIG_SETMASK, sigset, NULL); } __u64 pvfs2_convert_time_field(void *time_ptr) diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index ad79e534dc8e..d7b0eba043ab 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -80,7 +80,7 @@ retry_servicing: /* mask out signals if this operation is not to be interrupted */ if (!(flags & PVFS2_OP_INTERRUPTIBLE)) - mask_blocked_signals(&orig_sigset); + block_signals(&orig_sigset); if (!(flags & PVFS2_OP_NO_SEMAPHORE)) { ret = mutex_lock_interruptible(&request_mutex); @@ -90,7 +90,7 @@ retry_servicing: */ if (ret < 0) { if (!(flags & PVFS2_OP_INTERRUPTIBLE)) - unmask_blocked_signals(&orig_sigset); + set_signals(&orig_sigset); op->downcall.status = ret; gossip_debug(GOSSIP_WAIT_DEBUG, "pvfs2: service_operation interrupted.\n"); @@ -160,7 +160,7 @@ retry_servicing: } if (!(flags & PVFS2_OP_INTERRUPTIBLE)) - unmask_blocked_signals(&orig_sigset); + set_signals(&orig_sigset); BUG_ON(ret != op->downcall.status); /* retry if operation has not been serviced and if requested */ From 1be21f865aa5a94b178bf22e749567001cf5ef9b Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Tue, 29 Sep 2015 15:26:37 -0400 Subject: [PATCH 021/174] Orangefs: don't use mount_nodev, use sget directly. Signed-off-by: Mike Marshall --- fs/orangefs/super.c | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 90c7a1c9f201..9dee95293599 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -422,7 +422,7 @@ struct dentry *pvfs2_mount(struct file_system_type *fst, struct super_block *sb = ERR_PTR(-EINVAL); struct pvfs2_kernel_op_s *new_op; struct pvfs2_mount_sb_info_s mount_sb_info; - struct dentry *mnt_sb_d = ERR_PTR(-EINVAL); + struct dentry *d = ERR_PTR(-EINVAL); gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_mount: called with devname %s\n", @@ -464,23 +464,21 @@ struct dentry *pvfs2_mount(struct file_system_type *fst, mount_sb_info.fs_id = new_op->downcall.resp.fs_mount.fs_id; mount_sb_info.id = new_op->downcall.resp.fs_mount.id; - /* - * the mount_sb_info structure looks odd, but it's used because - * the private sb info isn't allocated until we call - * pvfs2_fill_sb, yet we have the info we need to fill it with - * here. so we store it temporarily and pass all of the info - * to fill_sb where it's properly copied out - */ - mnt_sb_d = mount_nodev(fst, - flags, - (void *)&mount_sb_info, - pvfs2_fill_sb); - if (IS_ERR(mnt_sb_d)) { - sb = ERR_CAST(mnt_sb_d); + sb = sget(fst, NULL, set_anon_super, flags, NULL); + + if (IS_ERR(sb)) { + d = ERR_CAST(sb); goto free_op; } - sb = mnt_sb_d->d_sb; + ret = pvfs2_fill_sb(sb, + (void *)&mount_sb_info, + flags & MS_SILENT ? 1 : 0); + + if (ret) { + d = ERR_PTR(ret); + goto free_op; + } /* * on successful mount, store the devname and data @@ -499,7 +497,7 @@ struct dentry *pvfs2_mount(struct file_system_type *fst, */ add_pvfs2_sb(sb); op_release(new_op); - return mnt_sb_d; + return dget(sb->s_root); free_op: gossip_err("pvfs2_mount: mount request failed with %d\n", ret); @@ -510,10 +508,7 @@ free_op: op_release(new_op); - gossip_debug(GOSSIP_SUPER_DEBUG, - "pvfs2_mount: returning dentry %p\n", - mnt_sb_d); - return mnt_sb_d; + return d; } void pvfs2_kill_sb(struct super_block *sb) From 353908035f699bc6b769c4cd351c3125553d63c1 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Wed, 30 Sep 2015 13:11:54 -0400 Subject: [PATCH 022/174] Orangefs: Use readonly mmap since writepage is not implemented. Previously the code silently failed to update the disk. Now it will not allow writable and shared mmaps. Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/file.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 53e58c3f2121..87f718163d1b 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -853,7 +853,9 @@ static int pvfs2_file_mmap(struct file *file, struct vm_area_struct *vma) /* set the sequential readahead hint */ vma->vm_flags |= VM_SEQ_READ; vma->vm_flags &= ~VM_RAND_READ; - return generic_file_mmap(file, vma); + + /* Use readonly mmap since we cannot support writable maps. */ + return generic_file_readonly_mmap(file, vma); } #define mapping_nrpages(idata) ((idata)->nrpages) From 894ac432b48bb64fabae31fd2f373b2b8659350f Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Fri, 2 Oct 2015 12:11:19 -0400 Subject: [PATCH 023/174] Orangefs: Clean up error decoding. Errors from the server need to be decoded. A bunch of code was imported from the server to do this but much of it is convoluted and not even needed. The result is better but still as convoluted as required by the protocol. Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/protocol.h | 280 ++++---------------------------------- fs/orangefs/pvfs2-utils.c | 58 ++++++-- 2 files changed, 77 insertions(+), 261 deletions(-) diff --git a/fs/orangefs/protocol.h b/fs/orangefs/protocol.h index cdbde64f8e39..b374c4b2009e 100644 --- a/fs/orangefs/protocol.h +++ b/fs/orangefs/protocol.h @@ -100,262 +100,40 @@ typedef __s32 PVFS_error; typedef __s64 PVFS_offset; #define PVFS2_SUPER_MAGIC 0x20030528 -#define PVFS_ERROR_BIT (1 << 30) + +/* PVFS2 error codes are a signed 32-bit integer. Error codes are negative, but + * the sign is stripped before decoding. */ + +/* Bit 31 is not used since it is the sign. */ + +/* Bit 30 specifies that this is a PVFS2 error. A PVFS2 error is either an + * encoded errno value or a PVFS2 protocol error. */ +#define PVFS_ERROR_BIT (1 << 30) + +/* Bit 29 specifies that this is a PVFS2 protocol error and not an encoded + * errno value. */ #define PVFS_NON_ERRNO_ERROR_BIT (1 << 29) -#define IS_PVFS_ERROR(__error) ((__error)&(PVFS_ERROR_BIT)) -#define IS_PVFS_NON_ERRNO_ERROR(__error) \ -(((__error)&(PVFS_NON_ERRNO_ERROR_BIT)) && IS_PVFS_ERROR(__error)) -#define PVFS_ERROR_TO_ERRNO(__error) PVFS_get_errno_mapping(__error) -/* 7 bits are used for the errno mapped error codes */ -#define PVFS_ERROR_CODE(__error) \ -((__error) & (__s32)(0x7f|PVFS_ERROR_BIT)) -#define PVFS_ERROR_CLASS(__error) \ -((__error) & ~((__s32)(0x7f|PVFS_ERROR_BIT|PVFS_NON_ERRNO_ERROR_BIT))) -#define PVFS_NON_ERRNO_ERROR_CODE(__error) \ -((__error) & (__s32)(127|PVFS_ERROR_BIT|PVFS_NON_ERRNO_ERROR_BIT)) +/* Bits 9, 8, and 7 specify the error class, which encodes the section of + * server code the error originated in for logging purposes. It is not used + * in the kernel except to be masked out. */ +#define PVFS_ERROR_CLASS_BITS 0x380 -/* PVFS2 error codes, compliments of asm/errno.h */ -#define PVFS_EPERM E(1) /* Operation not permitted */ -#define PVFS_ENOENT E(2) /* No such file or directory */ -#define PVFS_EINTR E(3) /* Interrupted system call */ -#define PVFS_EIO E(4) /* I/O error */ -#define PVFS_ENXIO E(5) /* No such device or address */ -#define PVFS_EBADF E(6) /* Bad file number */ -#define PVFS_EAGAIN E(7) /* Try again */ -#define PVFS_ENOMEM E(8) /* Out of memory */ -#define PVFS_EFAULT E(9) /* Bad address */ -#define PVFS_EBUSY E(10) /* Device or resource busy */ -#define PVFS_EEXIST E(11) /* File exists */ -#define PVFS_ENODEV E(12) /* No such device */ -#define PVFS_ENOTDIR E(13) /* Not a directory */ -#define PVFS_EISDIR E(14) /* Is a directory */ -#define PVFS_EINVAL E(15) /* Invalid argument */ -#define PVFS_EMFILE E(16) /* Too many open files */ -#define PVFS_EFBIG E(17) /* File too large */ -#define PVFS_ENOSPC E(18) /* No space left on device */ -#define PVFS_EROFS E(19) /* Read-only file system */ -#define PVFS_EMLINK E(20) /* Too many links */ -#define PVFS_EPIPE E(21) /* Broken pipe */ -#define PVFS_EDEADLK E(22) /* Resource deadlock would occur */ -#define PVFS_ENAMETOOLONG E(23) /* File name too long */ -#define PVFS_ENOLCK E(24) /* No record locks available */ -#define PVFS_ENOSYS E(25) /* Function not implemented */ -#define PVFS_ENOTEMPTY E(26) /* Directory not empty */ - /* -#define PVFS_ELOOP E(27) * Too many symbolic links encountered - */ -#define PVFS_EWOULDBLOCK E(28) /* Operation would block */ -#define PVFS_ENOMSG E(29) /* No message of desired type */ -#define PVFS_EUNATCH E(30) /* Protocol driver not attached */ -#define PVFS_EBADR E(31) /* Invalid request descriptor */ -#define PVFS_EDEADLOCK E(32) -#define PVFS_ENODATA E(33) /* No data available */ -#define PVFS_ETIME E(34) /* Timer expired */ -#define PVFS_ENONET E(35) /* Machine is not on the network */ -#define PVFS_EREMOTE E(36) /* Object is remote */ -#define PVFS_ECOMM E(37) /* Communication error on send */ -#define PVFS_EPROTO E(38) /* Protocol error */ -#define PVFS_EBADMSG E(39) /* Not a data message */ - /* -#define PVFS_EOVERFLOW E(40) * Value too large for defined data - * type - */ - /* -#define PVFS_ERESTART E(41) * Interrupted system call should be - * restarted - */ -#define PVFS_EMSGSIZE E(42) /* Message too long */ -#define PVFS_EPROTOTYPE E(43) /* Protocol wrong type for socket */ -#define PVFS_ENOPROTOOPT E(44) /* Protocol not available */ -#define PVFS_EPROTONOSUPPORT E(45) /* Protocol not supported */ - /* -#define PVFS_EOPNOTSUPP E(46) * Operation not supported on transport - * endpoint - */ -#define PVFS_EADDRINUSE E(47) /* Address already in use */ -#define PVFS_EADDRNOTAVAIL E(48) /* Cannot assign requested address */ -#define PVFS_ENETDOWN E(49) /* Network is down */ -#define PVFS_ENETUNREACH E(50) /* Network is unreachable */ - /* -#define PVFS_ENETRESET E(51) * Network dropped connection because - * of reset - */ -#define PVFS_ENOBUFS E(52) /* No buffer space available */ -#define PVFS_ETIMEDOUT E(53) /* Connection timed out */ -#define PVFS_ECONNREFUSED E(54) /* Connection refused */ -#define PVFS_EHOSTDOWN E(55) /* Host is down */ -#define PVFS_EHOSTUNREACH E(56) /* No route to host */ -#define PVFS_EALREADY E(57) /* Operation already in progress */ -#define PVFS_EACCES E(58) /* Access not allowed */ -#define PVFS_ECONNRESET E(59) /* Connection reset by peer */ -#define PVFS_ERANGE E(60) /* Math out of range or buf too small */ +/* Bits 6 - 0 are reserved for the actual error code. */ +#define PVFS_ERROR_NUMBER_BITS 0x7f -/***************** non-errno/pvfs2 specific error codes *****************/ -#define PVFS_ECANCEL (1|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) -#define PVFS_EDEVINIT (2|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) -#define PVFS_EDETAIL (3|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) -#define PVFS_EHOSTNTFD (4|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) -#define PVFS_EADDRNTFD (5|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) -#define PVFS_ENORECVR (6|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) -#define PVFS_ETRYAGAIN (7|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) -#define PVFS_ENOTPVFS (8|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) -#define PVFS_ESECURITY (9|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) +/* Encoded errno values are decoded by PINT_errno_mapping in pvfs2-utils.c. */ -/* - * NOTE: PLEASE DO NOT ARBITRARILY ADD NEW ERRNO ERROR CODES! - * - * IF YOU CHOOSE TO ADD A NEW ERROR CODE (DESPITE OUR PLEA), YOU ALSO - * NEED TO INCREMENT PVFS_ERRNO MAX (BELOW) AND ADD A MAPPING TO A - * UNIX ERRNO VALUE IN THE MACROS BELOW (USED IN - * src/common/misc/errno-mapping.c and the kernel module) - */ -#define PVFS_ERRNO_MAX 61 - -#define PVFS_ERROR_BMI (1 << 7) /* BMI-specific error */ -#define PVFS_ERROR_TROVE (2 << 7) /* Trove-specific error */ -#define PVFS_ERROR_FLOW (3 << 7) -#define PVFS_ERROR_SM (4 << 7) /* state machine specific error */ -#define PVFS_ERROR_SCHED (5 << 7) -#define PVFS_ERROR_CLIENT (6 << 7) -#define PVFS_ERROR_DEV (7 << 7) /* device file interaction */ - -#define PVFS_ERROR_CLASS_BITS \ - (PVFS_ERROR_BMI | \ - PVFS_ERROR_TROVE | \ - PVFS_ERROR_FLOW | \ - PVFS_ERROR_SM | \ - PVFS_ERROR_SCHED | \ - PVFS_ERROR_CLIENT | \ - PVFS_ERROR_DEV) - -#define DECLARE_ERRNO_MAPPING() \ -__s32 PINT_errno_mapping[PVFS_ERRNO_MAX + 1] = { \ - 0, /* leave this one empty */ \ - EPERM, /* 1 */ \ - ENOENT, \ - EINTR, \ - EIO, \ - ENXIO, \ - EBADF, \ - EAGAIN, \ - ENOMEM, \ - EFAULT, \ - EBUSY, /* 10 */ \ - EEXIST, \ - ENODEV, \ - ENOTDIR, \ - EISDIR, \ - EINVAL, \ - EMFILE, \ - EFBIG, \ - ENOSPC, \ - EROFS, \ - EMLINK, /* 20 */ \ - EPIPE, \ - EDEADLK, \ - ENAMETOOLONG, \ - ENOLCK, \ - ENOSYS, \ - ENOTEMPTY, \ - ELOOP, \ - EWOULDBLOCK, \ - ENOMSG, \ - EUNATCH, /* 30 */ \ - EBADR, \ - EDEADLOCK, \ - ENODATA, \ - ETIME, \ - ENONET, \ - EREMOTE, \ - ECOMM, \ - EPROTO, \ - EBADMSG, \ - EOVERFLOW, /* 40 */ \ - ERESTART, \ - EMSGSIZE, \ - EPROTOTYPE, \ - ENOPROTOOPT, \ - EPROTONOSUPPORT, \ - EOPNOTSUPP, \ - EADDRINUSE, \ - EADDRNOTAVAIL, \ - ENETDOWN, \ - ENETUNREACH, /* 50 */ \ - ENETRESET, \ - ENOBUFS, \ - ETIMEDOUT, \ - ECONNREFUSED, \ - EHOSTDOWN, \ - EHOSTUNREACH, \ - EALREADY, \ - EACCES, \ - ECONNRESET, /* 59 */ \ - ERANGE, \ - 0 /* PVFS_ERRNO_MAX */ \ -}; \ -const char *PINT_non_errno_strerror_mapping[] = { \ - "Success", /* 0 */ \ - "Operation cancelled (possibly due to timeout)", \ - "Device initialization failed", \ - "Detailed per-server errors are available", \ - "Unknown host", \ - "No address associated with name", \ - "Unknown server error", \ - "Host name lookup failure", \ - "Path contains non-PVFS elements", \ - "Security error", \ -}; \ -__s32 PINT_non_errno_mapping[] = { \ - 0, /* leave this one empty */ \ - PVFS_ECANCEL, /* 1 */ \ - PVFS_EDEVINIT, /* 2 */ \ - PVFS_EDETAIL, /* 3 */ \ - PVFS_EHOSTNTFD, /* 4 */ \ - PVFS_EADDRNTFD, /* 5 */ \ - PVFS_ENORECVR, /* 6 */ \ - PVFS_ETRYAGAIN, /* 7 */ \ - PVFS_ENOTPVFS, /* 8 */ \ - PVFS_ESECURITY, /* 9 */ \ -} - -/* - * NOTE: PVFS_get_errno_mapping will convert a PVFS_ERROR_CODE to an - * errno value. If the error code is a pvfs2 specific error code - * (i.e. a PVFS_NON_ERRNO_ERROR_CODE), PVFS_get_errno_mapping will - * return an index into the PINT_non_errno_strerror_mapping array which - * can be used for getting the pvfs2 specific strerror message given - * the error code. if the value is not a recognized error code, the - * passed in value will be returned unchanged. - */ -#define DECLARE_ERRNO_MAPPING_AND_FN() \ -extern __s32 PINT_errno_mapping[]; \ -extern __s32 PINT_non_errno_mapping[]; \ -extern const char *PINT_non_errno_strerror_mapping[]; \ -static __s32 PVFS_get_errno_mapping(__s32 error) \ -{ \ - __s32 ret = error, mask = 0; \ - __s32 positive = ((error > -1) ? 1 : 0); \ - if (IS_PVFS_NON_ERRNO_ERROR((positive ? error : -error))) { \ - mask = (PVFS_NON_ERRNO_ERROR_BIT | \ - PVFS_ERROR_BIT | \ - PVFS_ERROR_CLASS_BITS); \ - ret = PVFS_NON_ERRNO_ERROR_CODE(((positive ? \ - error : \ - abs(error))) & \ - ~mask); \ - } \ - else if (IS_PVFS_ERROR((positive ? error : -error))) { \ - mask = (PVFS_ERROR_BIT | \ - PVFS_ERROR_CLASS_BITS); \ - ret = PINT_errno_mapping[PVFS_ERROR_CODE(((positive ? \ - error : \ - abs(error))) & \ - ~mask)]; \ - } \ - return ret; \ -} \ -DECLARE_ERRNO_MAPPING() +/* Our own PVFS2 protocol error codes. */ +#define PVFS_ECANCEL (1|PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT) +#define PVFS_EDEVINIT (2|PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT) +#define PVFS_EDETAIL (3|PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT) +#define PVFS_EHOSTNTFD (4|PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT) +#define PVFS_EADDRNTFD (5|PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT) +#define PVFS_ENORECVR (6|PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT) +#define PVFS_ETRYAGAIN (7|PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT) +#define PVFS_ENOTPVFS (8|PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT) +#define PVFS_ESECURITY (9|PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT) /* permission bits */ #define PVFS_O_EXECUTE (1 << 0) diff --git a/fs/orangefs/pvfs2-utils.c b/fs/orangefs/pvfs2-utils.c index 834e06674d0a..086ebbb36570 100644 --- a/fs/orangefs/pvfs2-utils.c +++ b/fs/orangefs/pvfs2-utils.c @@ -662,20 +662,45 @@ __u64 pvfs2_convert_time_field(void *time_ptr) return pvfs2_time; } -/* macro defined in include/pvfs2-types.h */ -DECLARE_ERRNO_MAPPING_AND_FN(); +/* The following is a very dirty hack that is now a permanent part of the + * PVFS2 protocol. See protocol.h for more error definitions. */ + +/* The order matches include/pvfs2-types.h in the OrangeFS source. */ +static int PINT_errno_mapping[] = { + 0, EPERM, ENOENT, EINTR, EIO, ENXIO, EBADF, EAGAIN, ENOMEM, + EFAULT, EBUSY, EEXIST, ENODEV, ENOTDIR, EISDIR, EINVAL, EMFILE, + EFBIG, ENOSPC, EROFS, EMLINK, EPIPE, EDEADLK, ENAMETOOLONG, + ENOLCK, ENOSYS, ENOTEMPTY, ELOOP, EWOULDBLOCK, ENOMSG, EUNATCH, + EBADR, EDEADLOCK, ENODATA, ETIME, ENONET, EREMOTE, ECOMM, + EPROTO, EBADMSG, EOVERFLOW, ERESTART, EMSGSIZE, EPROTOTYPE, + ENOPROTOOPT, EPROTONOSUPPORT, EOPNOTSUPP, EADDRINUSE, + EADDRNOTAVAIL, ENETDOWN, ENETUNREACH, ENETRESET, ENOBUFS, + ETIMEDOUT, ECONNREFUSED, EHOSTDOWN, EHOSTUNREACH, EALREADY, + EACCES, ECONNRESET, ERANGE +}; int pvfs2_normalize_to_errno(__s32 error_code) { - if (error_code > 0) { + /* Success */ + if (error_code == 0) { + return 0; + /* This shouldn't ever happen. If it does it should be fixed on the + * server. */ + } else if (error_code > 0) { gossip_err("pvfs2: error status receieved.\n"); gossip_err("pvfs2: assuming error code is inverted.\n"); error_code = -error_code; } - /* convert any error codes that are in pvfs2 format */ - if (IS_PVFS_NON_ERRNO_ERROR(-error_code)) { - if (PVFS_NON_ERRNO_ERROR_CODE(-error_code) == PVFS_ECANCEL) { + /* XXX: This is very bad since error codes from PVFS2 may not be + * suitable for return into userspace. */ + + /* Convert PVFS2 error values into errno values suitable for return + * from the kernel. */ + if ((-error_code) & PVFS_NON_ERRNO_ERROR_BIT) { + if (((-error_code) & + (PVFS_ERROR_NUMBER_BITS|PVFS_NON_ERRNO_ERROR_BIT| + PVFS_ERROR_BIT)) == PVFS_ECANCEL) { /* * cancellation error codes generally correspond to * a timeout from the client's perspective @@ -683,12 +708,25 @@ int pvfs2_normalize_to_errno(__s32 error_code) error_code = -ETIMEDOUT; } else { /* assume a default error code */ - gossip_err("pvfs2: warning: got error code without errno equivalent: %d.\n", - error_code); + gossip_err("pvfs2: warning: got error code without " + "errno equivalent: %d.\n", error_code); error_code = -EINVAL; } - } else if (IS_PVFS_ERROR(-error_code)) { - error_code = -PVFS_ERROR_TO_ERRNO(-error_code); + + /* Convert PVFS2 encoded errno values into regular errno values. */ + } else if ((-error_code) & PVFS_ERROR_BIT) { + __u32 i; + i = (-error_code) & ~(PVFS_ERROR_BIT|PVFS_ERROR_CLASS_BITS); + if (i < sizeof PINT_errno_mapping/sizeof *PINT_errno_mapping) + error_code = -PINT_errno_mapping[i]; + else + error_code = -EINVAL; + + /* Only PVFS2 protocol error codes should ever come here. Otherwise + * there is a bug somewhere. */ + } else { + gossip_err("pvfs2: pvfs2_normalize_to_errno: got error code" + "which is not from PVFS2.\n"); } return error_code; } From 548049495cb46348866aec1cb7721e9d00b4eb83 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Mon, 5 Oct 2015 13:44:24 -0400 Subject: [PATCH 024/174] Orangefs: fix some checkpatch.pl complaints that had creeped in. Signed-off-by: Mike Marshall --- fs/orangefs/devpvfs2-req.c | 6 +-- fs/orangefs/dir.c | 2 +- fs/orangefs/downcall.h | 2 +- fs/orangefs/file.c | 25 ++++++----- fs/orangefs/inode.c | 24 +++++------ fs/orangefs/protocol.h | 32 +++++++------- fs/orangefs/pvfs2-bufmap.c | 10 ++--- fs/orangefs/pvfs2-debug.h | 6 ++- fs/orangefs/pvfs2-kernel.h | 9 ++-- fs/orangefs/pvfs2-mod.c | 10 ++--- fs/orangefs/pvfs2-utils.c | 87 +++++++++++++++++++++----------------- 11 files changed, 111 insertions(+), 102 deletions(-) diff --git a/fs/orangefs/devpvfs2-req.c b/fs/orangefs/devpvfs2-req.c index 13878cac49ed..ede842f05b62 100644 --- a/fs/orangefs/devpvfs2-req.c +++ b/fs/orangefs/devpvfs2-req.c @@ -476,9 +476,9 @@ static ssize_t pvfs2_devreq_writev(struct file *file, set_op_state_serviced(op); spin_unlock(&op->lock); /* - for every other operation (i.e. non-I/O), we need to - wake up the callers for downcall completion - notification + * for every other operation (i.e. non-I/O), we need to + * wake up the callers for downcall completion + * notification */ wake_up_interruptible(&op->waitq); } diff --git a/fs/orangefs/dir.c b/fs/orangefs/dir.c index 3870e78f5ecf..daf497384501 100644 --- a/fs/orangefs/dir.c +++ b/fs/orangefs/dir.c @@ -289,7 +289,7 @@ get_new_buffer_index: } - /* + /* * we ran all the way through the last batch, set up for * getting another batch... */ diff --git a/fs/orangefs/downcall.h b/fs/orangefs/downcall.h index a79129f875f3..f8bea46e7c6a 100644 --- a/fs/orangefs/downcall.h +++ b/fs/orangefs/downcall.h @@ -107,7 +107,7 @@ struct pvfs2_downcall_s { __s32 status; /* currently trailer is used only by readdir */ __s64 trailer_size; - char * trailer_buf; + char *trailer_buf; union { struct pvfs2_io_response io; diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 87f718163d1b..feb1764c2f80 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -54,7 +54,6 @@ static int precopy_buffers(struct pvfs2_bufmap *bufmap, gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n", __func__, (long)ret); - } if (ret < 0) @@ -199,9 +198,9 @@ populate_shared_memory: if (ret < 0) { handle_io_error(); /* defined in pvfs2-kernel.h */ /* - don't write an error to syslog on signaled operation - termination unless we've got debugging turned on, as - this can happen regularly (i.e. ctrl-c) + * don't write an error to syslog on signaled operation + * termination unless we've got debugging turned on, as + * this can happen regularly (i.e. ctrl-c) */ if (ret == -EINTR) gossip_debug(GOSSIP_FILE_DEBUG, @@ -245,10 +244,10 @@ populate_shared_memory: ret = new_op->downcall.resp.io.amt_complete; /* - tell the device file owner waiting on I/O that this read has - completed and it can return now. in this exact case, on - wakeup the daemon will free the op, so we *cannot* touch it - after this. + * tell the device file owner waiting on I/O that this read has + * completed and it can return now. in this exact case, on + * wakeup the daemon will free the op, so we *cannot* touch it + * after this. */ wake_up_daemon_for_return(new_op); new_op = NULL; @@ -875,9 +874,9 @@ static int pvfs2_file_release(struct inode *inode, struct file *file) pvfs2_flush_inode(inode); /* - remove all associated inode pages from the page cache and mmap - readahead cache (if any); this forces an expensive refresh of - data for the next caller of mmap (or 'get_block' accesses) + * remove all associated inode pages from the page cache and mmap + * readahead cache (if any); this forces an expensive refresh of + * data for the next caller of mmap (or 'get_block' accesses) */ if (file->f_path.dentry->d_inode && file->f_path.dentry->d_inode->i_mapping && @@ -960,8 +959,8 @@ static loff_t pvfs2_file_llseek(struct file *file, loff_t offset, int origin) } gossip_debug(GOSSIP_FILE_DEBUG, - "pvfs2_file_llseek: offset is %ld | origin is %d | " - "inode size is %lu\n", + "pvfs2_file_llseek: offset is %ld | origin is %d" + " | inode size is %lu\n", (long)offset, origin, (unsigned long)file->f_path.dentry->d_inode->i_size); diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 9ff6b2985240..4f7c45a44c1f 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -128,18 +128,18 @@ static int pvfs2_releasepage(struct page *page, gfp_t foo) * AIO. Modeled after NFS, they do this too. */ /* -static ssize_t pvfs2_direct_IO(int rw, - struct kiocb *iocb, - struct iov_iter *iter, - loff_t offset) -{ - gossip_debug(GOSSIP_INODE_DEBUG, - "pvfs2_direct_IO: %s\n", - iocb->ki_filp->f_path.dentry->d_name.name); - - return -EINVAL; -} -*/ + * static ssize_t pvfs2_direct_IO(int rw, + * struct kiocb *iocb, + * struct iov_iter *iter, + * loff_t offset) + *{ + * gossip_debug(GOSSIP_INODE_DEBUG, + * "pvfs2_direct_IO: %s\n", + * iocb->ki_filp->f_path.dentry->d_name.name); + * + * return -EINVAL; + *} + */ struct backing_dev_info pvfs2_backing_dev_info = { .name = "pvfs2", diff --git a/fs/orangefs/protocol.h b/fs/orangefs/protocol.h index b374c4b2009e..85f611fe0536 100644 --- a/fs/orangefs/protocol.h +++ b/fs/orangefs/protocol.h @@ -101,22 +101,30 @@ typedef __s64 PVFS_offset; #define PVFS2_SUPER_MAGIC 0x20030528 -/* PVFS2 error codes are a signed 32-bit integer. Error codes are negative, but - * the sign is stripped before decoding. */ +/* + * PVFS2 error codes are a signed 32-bit integer. Error codes are negative, but + * the sign is stripped before decoding. + */ /* Bit 31 is not used since it is the sign. */ -/* Bit 30 specifies that this is a PVFS2 error. A PVFS2 error is either an - * encoded errno value or a PVFS2 protocol error. */ +/* + * Bit 30 specifies that this is a PVFS2 error. A PVFS2 error is either an + * encoded errno value or a PVFS2 protocol error. + */ #define PVFS_ERROR_BIT (1 << 30) -/* Bit 29 specifies that this is a PVFS2 protocol error and not an encoded - * errno value. */ +/* + * Bit 29 specifies that this is a PVFS2 protocol error and not an encoded + * errno value. + */ #define PVFS_NON_ERRNO_ERROR_BIT (1 << 29) -/* Bits 9, 8, and 7 specify the error class, which encodes the section of +/* + * Bits 9, 8, and 7 specify the error class, which encodes the section of * server code the error originated in for logging purposes. It is not used - * in the kernel except to be masked out. */ + * in the kernel except to be masked out. + */ #define PVFS_ERROR_CLASS_BITS 0x380 /* Bits 6 - 0 are reserved for the actual error code. */ @@ -388,14 +396,8 @@ enum { /* * version number for use in communicating between kernel space and user - * space + * space. Zero signifies the upstream version of the kernel module. */ -/* -#define PVFS_KERNEL_PROTO_VERSION \ - ((PVFS2_VERSION_MAJOR * 10000) + \ - (PVFS2_VERSION_MINOR * 100) + \ - PVFS2_VERSION_SUB) -*/ #define PVFS_KERNEL_PROTO_VERSION 0 /* diff --git a/fs/orangefs/pvfs2-bufmap.c b/fs/orangefs/pvfs2-bufmap.c index e01e220fd5d7..9d0392a3e824 100644 --- a/fs/orangefs/pvfs2-bufmap.c +++ b/fs/orangefs/pvfs2-bufmap.c @@ -508,9 +508,9 @@ void readdir_index_put(struct pvfs2_bufmap *bufmap, int buffer_index) } int pvfs_bufmap_copy_from_iovec(struct pvfs2_bufmap *bufmap, - struct iov_iter *iter, - int buffer_index, - size_t size) + struct iov_iter *iter, + int buffer_index, + size_t size) { struct pvfs_bufmap_desc *to; struct page *page; @@ -553,7 +553,7 @@ int pvfs_bufmap_copy_to_iovec(struct pvfs2_bufmap *bufmap, "%s: buffer_index:%d: iov_iter_count(iter):%lu:\n", __func__, buffer_index, iov_iter_count(iter)); - from = &bufmap->desc_array[buffer_index]; + from = &bufmap->desc_array[buffer_index]; for (i = 0; iov_iter_count(iter); i++) { page = from->page_array[i]; @@ -562,5 +562,5 @@ int pvfs_bufmap_copy_to_iovec(struct pvfs2_bufmap *bufmap, break; } - return iov_iter_count(iter) ? -EFAULT : 0; + return iov_iter_count(iter) ? -EFAULT : 0; } diff --git a/fs/orangefs/pvfs2-debug.h b/fs/orangefs/pvfs2-debug.h index 4c27ad77fa16..fd71d6c84cf6 100644 --- a/fs/orangefs/pvfs2-debug.h +++ b/fs/orangefs/pvfs2-debug.h @@ -180,8 +180,10 @@ static struct __keyword_mask_s s_keyword_mask_map[] = { {"readdir", GOSSIP_READDIR_DEBUG}, /* Debug the mkdir operation (server only) */ {"mkdir", GOSSIP_MKDIR_DEBUG}, - /* Debug the io operation (reads and writes) - * for both the client and server */ + /* + * Debug the io operation (reads and writes) + * for both the client and server. + */ {"io", GOSSIP_IO_DEBUG}, /* Debug the server's open file descriptor cache */ {"open_cache", GOSSIP_DBPF_OPEN_CACHE_DEBUG}, diff --git a/fs/orangefs/pvfs2-kernel.h b/fs/orangefs/pvfs2-kernel.h index 299b48c37cab..29b4a48b3a25 100644 --- a/fs/orangefs/pvfs2-kernel.h +++ b/fs/orangefs/pvfs2-kernel.h @@ -229,9 +229,6 @@ int keyword_is_amalgam(char *); extern char kernel_debug_string[PVFS2_MAX_DEBUG_STRING_LEN]; extern char client_debug_string[PVFS2_MAX_DEBUG_STRING_LEN]; extern char client_debug_array_string[PVFS2_MAX_DEBUG_STRING_LEN]; -/* HELLO -extern struct client_debug_mask current_client_mask; -*/ extern unsigned int kernel_mask_set_mod_init; extern int pvfs2_init_acl(struct inode *inode, struct inode *dir); @@ -431,9 +428,9 @@ struct pvfs2_stats { extern struct pvfs2_stats g_pvfs2_stats; /* - NOTE: See Documentation/filesystems/porting for information - on implementing FOO_I and properly accessing fs private data -*/ + * NOTE: See Documentation/filesystems/porting for information + * on implementing FOO_I and properly accessing fs private data + */ static inline struct pvfs2_inode_s *PVFS2_I(struct inode *inode) { return container_of(inode, struct pvfs2_inode_s, vfs_inode); diff --git a/fs/orangefs/pvfs2-mod.c b/fs/orangefs/pvfs2-mod.c index d80537dadcd8..d848c90413d1 100644 --- a/fs/orangefs/pvfs2-mod.c +++ b/fs/orangefs/pvfs2-mod.c @@ -73,11 +73,11 @@ module_param(slot_timeout_secs, int, 0); struct mutex devreq_mutex; /* - blocks non-priority requests from being queued for servicing. this - could be used for protecting the request list data structure, but - for now it's only being used to stall the op addition to the request - list -*/ + * Blocks non-priority requests from being queued for servicing. This + * could be used for protecting the request list data structure, but + * for now it's only being used to stall the op addition to the request + * list + */ struct mutex request_mutex; /* hash table for storing operations waiting for matching downcall */ diff --git a/fs/orangefs/pvfs2-utils.c b/fs/orangefs/pvfs2-utils.c index 086ebbb36570..c33e7193599c 100644 --- a/fs/orangefs/pvfs2-utils.c +++ b/fs/orangefs/pvfs2-utils.c @@ -111,18 +111,18 @@ static int copy_attributes_to_inode(struct inode *inode, /* - arbitrarily set the inode block size; FIXME: we need to - resolve the difference between the reported inode blocksize - and the PAGE_CACHE_SIZE, since our block count will always - be wrong. - - For now, we're setting the block count to be the proper - number assuming the block size is 512 bytes, and the size is - rounded up to the nearest 4K. This is apparently required - to get proper size reports from the 'du' shell utility. - - changing the inode->i_blkbits to something other than - PAGE_CACHE_SHIFT breaks mmap/execution as we depend on that. + * arbitrarily set the inode block size; FIXME: we need to + * resolve the difference between the reported inode blocksize + * and the PAGE_CACHE_SIZE, since our block count will always + * be wrong. + * + * For now, we're setting the block count to be the proper + * number assuming the block size is 512 bytes, and the size is + * rounded up to the nearest 4K. This is apparently required + * to get proper size reports from the 'du' shell utility. + * + * changing the inode->i_blkbits to something other than + * PAGE_CACHE_SHIFT breaks mmap/execution as we depend on that. */ gossip_debug(GOSSIP_UTILS_DEBUG, "attrs->mask = %x (objtype = %s)\n", @@ -662,41 +662,51 @@ __u64 pvfs2_convert_time_field(void *time_ptr) return pvfs2_time; } -/* The following is a very dirty hack that is now a permanent part of the - * PVFS2 protocol. See protocol.h for more error definitions. */ +/* + * The following is a very dirty hack that is now a permanent part of the + * PVFS2 protocol. See protocol.h for more error definitions. + */ /* The order matches include/pvfs2-types.h in the OrangeFS source. */ static int PINT_errno_mapping[] = { - 0, EPERM, ENOENT, EINTR, EIO, ENXIO, EBADF, EAGAIN, ENOMEM, - EFAULT, EBUSY, EEXIST, ENODEV, ENOTDIR, EISDIR, EINVAL, EMFILE, - EFBIG, ENOSPC, EROFS, EMLINK, EPIPE, EDEADLK, ENAMETOOLONG, - ENOLCK, ENOSYS, ENOTEMPTY, ELOOP, EWOULDBLOCK, ENOMSG, EUNATCH, - EBADR, EDEADLOCK, ENODATA, ETIME, ENONET, EREMOTE, ECOMM, - EPROTO, EBADMSG, EOVERFLOW, ERESTART, EMSGSIZE, EPROTOTYPE, - ENOPROTOOPT, EPROTONOSUPPORT, EOPNOTSUPP, EADDRINUSE, - EADDRNOTAVAIL, ENETDOWN, ENETUNREACH, ENETRESET, ENOBUFS, - ETIMEDOUT, ECONNREFUSED, EHOSTDOWN, EHOSTUNREACH, EALREADY, - EACCES, ECONNRESET, ERANGE + 0, EPERM, ENOENT, EINTR, EIO, ENXIO, EBADF, EAGAIN, ENOMEM, + EFAULT, EBUSY, EEXIST, ENODEV, ENOTDIR, EISDIR, EINVAL, EMFILE, + EFBIG, ENOSPC, EROFS, EMLINK, EPIPE, EDEADLK, ENAMETOOLONG, + ENOLCK, ENOSYS, ENOTEMPTY, ELOOP, EWOULDBLOCK, ENOMSG, EUNATCH, + EBADR, EDEADLOCK, ENODATA, ETIME, ENONET, EREMOTE, ECOMM, + EPROTO, EBADMSG, EOVERFLOW, ERESTART, EMSGSIZE, EPROTOTYPE, + ENOPROTOOPT, EPROTONOSUPPORT, EOPNOTSUPP, EADDRINUSE, + EADDRNOTAVAIL, ENETDOWN, ENETUNREACH, ENETRESET, ENOBUFS, + ETIMEDOUT, ECONNREFUSED, EHOSTDOWN, EHOSTUNREACH, EALREADY, + EACCES, ECONNRESET, ERANGE }; int pvfs2_normalize_to_errno(__s32 error_code) { + __u32 i; + /* Success */ if (error_code == 0) { return 0; - /* This shouldn't ever happen. If it does it should be fixed on the - * server. */ + /* + * This shouldn't ever happen. If it does it should be fixed on the + * server. + */ } else if (error_code > 0) { gossip_err("pvfs2: error status receieved.\n"); gossip_err("pvfs2: assuming error code is inverted.\n"); error_code = -error_code; } - /* XXX: This is very bad since error codes from PVFS2 may not be - * suitable for return into userspace. */ + /* + * XXX: This is very bad since error codes from PVFS2 may not be + * suitable for return into userspace. + */ - /* Convert PVFS2 error values into errno values suitable for return - * from the kernel. */ + /* + * Convert PVFS2 error values into errno values suitable for return + * from the kernel. + */ if ((-error_code) & PVFS_NON_ERRNO_ERROR_BIT) { if (((-error_code) & (PVFS_ERROR_NUMBER_BITS|PVFS_NON_ERRNO_ERROR_BIT| @@ -708,25 +718,24 @@ int pvfs2_normalize_to_errno(__s32 error_code) error_code = -ETIMEDOUT; } else { /* assume a default error code */ - gossip_err("pvfs2: warning: got error code without " - "errno equivalent: %d.\n", error_code); + gossip_err("pvfs2: warning: got error code without errno equivalent: %d.\n", error_code); error_code = -EINVAL; } /* Convert PVFS2 encoded errno values into regular errno values. */ } else if ((-error_code) & PVFS_ERROR_BIT) { - __u32 i; i = (-error_code) & ~(PVFS_ERROR_BIT|PVFS_ERROR_CLASS_BITS); - if (i < sizeof PINT_errno_mapping/sizeof *PINT_errno_mapping) + if (i < sizeof(PINT_errno_mapping)/sizeof(*PINT_errno_mapping)) error_code = -PINT_errno_mapping[i]; else error_code = -EINVAL; - /* Only PVFS2 protocol error codes should ever come here. Otherwise - * there is a bug somewhere. */ + /* + * Only PVFS2 protocol error codes should ever come here. Otherwise + * there is a bug somewhere. + */ } else { - gossip_err("pvfs2: pvfs2_normalize_to_errno: got error code" - "which is not from PVFS2.\n"); + gossip_err("pvfs2: pvfs2_normalize_to_errno: got error code which is not from PVFS2.\n"); } return error_code; } @@ -993,7 +1002,7 @@ void do_k_string(void *k_mask, int index) __u64 *mask = (__u64 *) k_mask; if (keyword_is_amalgam((char *) s_kmod_keyword_mask_map[index].keyword)) - goto out; + goto out; if (*mask & s_kmod_keyword_mask_map[index].mask_val) { if ((strlen(kernel_debug_string) + From 5c278228bbfe3abb7d468ef39dffac23de15c078 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 Oct 2015 17:43:58 -0400 Subject: [PATCH 025/174] orangefs: explicitly pass the size to pvfs_bufmap_copy_to_iovec() Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/file.c | 3 ++- fs/orangefs/pvfs2-bufmap.c | 29 +++++++++++++++-------------- fs/orangefs/pvfs2-bufmap.h | 3 ++- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index feb1764c2f80..92a0974f0743 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -89,7 +89,8 @@ static int postcopy_buffers(struct pvfs2_bufmap *bufmap, iov_iter_init(&iter, READ, vec, nr_segs, total_size); ret = pvfs_bufmap_copy_to_iovec(bufmap, &iter, - buffer_index); + buffer_index, + total_size); if (ret < 0) gossip_err("%s: Failed to copy-out buffers. Please make sure that the pvfs2-client is running (%ld)\n", __func__, diff --git a/fs/orangefs/pvfs2-bufmap.c b/fs/orangefs/pvfs2-bufmap.c index 9d0392a3e824..843883035214 100644 --- a/fs/orangefs/pvfs2-bufmap.c +++ b/fs/orangefs/pvfs2-bufmap.c @@ -542,25 +542,26 @@ int pvfs_bufmap_copy_from_iovec(struct pvfs2_bufmap *bufmap, */ int pvfs_bufmap_copy_to_iovec(struct pvfs2_bufmap *bufmap, struct iov_iter *iter, - int buffer_index) + int buffer_index, + size_t size) { - struct pvfs_bufmap_desc *from; - struct page *page; + struct pvfs_bufmap_desc *from = &bufmap->desc_array[buffer_index]; int i; - size_t written; gossip_debug(GOSSIP_BUFMAP_DEBUG, - "%s: buffer_index:%d: iov_iter_count(iter):%lu:\n", - __func__, buffer_index, iov_iter_count(iter)); + "%s: buffer_index:%d: size:%zu:\n", + __func__, buffer_index, size); - from = &bufmap->desc_array[buffer_index]; - for (i = 0; iov_iter_count(iter); i++) { - page = from->page_array[i]; - written = copy_page_to_iter(page, 0, PAGE_SIZE, iter); - if ((written == 0) && (iov_iter_count(iter))) - break; + for (i = 0; size; i++) { + struct page *page = from->page_array[i]; + size_t n = size; + if (n > PAGE_SIZE) + n = PAGE_SIZE; + n = copy_page_to_iter(page, 0, n, iter); + if (!n) + return -EFAULT; + size -= n; } - - return iov_iter_count(iter) ? -EFAULT : 0; + return 0; } diff --git a/fs/orangefs/pvfs2-bufmap.h b/fs/orangefs/pvfs2-bufmap.h index a0f84c045d73..d1aedb52a877 100644 --- a/fs/orangefs/pvfs2-bufmap.h +++ b/fs/orangefs/pvfs2-bufmap.h @@ -49,7 +49,8 @@ int pvfs_bufmap_copy_from_iovec(struct pvfs2_bufmap *bufmap, int pvfs_bufmap_copy_to_iovec(struct pvfs2_bufmap *bufmap, struct iov_iter *iter, - int buffer_index); + int buffer_index, + size_t size); size_t pvfs_bufmap_copy_to_user_task_iovec(struct task_struct *tsk, struct iovec *iovec, From 34204fde4c877cb33d8ec0df09f38333f570cc84 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 Oct 2015 17:47:44 -0400 Subject: [PATCH 026/174] pvfs_bufmap_copy_from_iovec(): don't rely upon size being equal to iov_iter_count(iter) Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/pvfs2-bufmap.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/fs/orangefs/pvfs2-bufmap.c b/fs/orangefs/pvfs2-bufmap.c index 843883035214..dacf42bee196 100644 --- a/fs/orangefs/pvfs2-bufmap.c +++ b/fs/orangefs/pvfs2-bufmap.c @@ -512,26 +512,25 @@ int pvfs_bufmap_copy_from_iovec(struct pvfs2_bufmap *bufmap, int buffer_index, size_t size) { - struct pvfs_bufmap_desc *to; - struct page *page; - size_t copied; + struct pvfs_bufmap_desc *to = &bufmap->desc_array[buffer_index]; int i; gossip_debug(GOSSIP_BUFMAP_DEBUG, - "%s: buffer_index:%d: size:%lu:\n", + "%s: buffer_index:%d: size:%zu:\n", __func__, buffer_index, size); - to = &bufmap->desc_array[buffer_index]; for (i = 0; size; i++) { - page = to->page_array[i]; - copied = copy_page_from_iter(page, 0, PAGE_SIZE, iter); - size -= copied; - if ((copied == 0) && (size)) - break; + struct page *page = to->page_array[i]; + size_t n = size; + if (n > PAGE_SIZE) + n = PAGE_SIZE; + n = copy_page_from_iter(page, 0, n, iter); + if (!n) + return -EFAULT; + size -= n; } - - return size ? -EFAULT : 0; + return 0; } From 5f0e3c953fd962d82e1f38aeb24f7aec9bd1ba54 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 Oct 2015 17:52:44 -0400 Subject: [PATCH 027/174] orangefs: make postcopy_buffers() take iov_iter Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/file.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 92a0974f0743..c169bdda66a3 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -72,23 +72,18 @@ static int precopy_buffers(struct pvfs2_bufmap *bufmap, */ static int postcopy_buffers(struct pvfs2_bufmap *bufmap, int buffer_index, - const struct iovec *vec, - int nr_segs, + struct iov_iter *iter, size_t total_size) { int ret = 0; - - struct iov_iter iter; - /* * copy data to application/kernel by pushing it out to * the iovec. NOTE; target buffers can be addresses or * struct page pointers. */ if (total_size) { - iov_iter_init(&iter, READ, vec, nr_segs, total_size); ret = pvfs_bufmap_copy_to_iovec(bufmap, - &iter, + iter, buffer_index, total_size); if (ret < 0) @@ -221,10 +216,11 @@ populate_shared_memory: * postcopy_buffers only pertains to reads. */ if (type == PVFS_IO_READ) { + struct iov_iter iter; + iov_iter_init(&iter, READ, vec, nr_segs, new_op->downcall.resp.io.amt_complete); ret = postcopy_buffers(bufmap, buffer_index, - vec, - nr_segs, + &iter, new_op->downcall.resp.io.amt_complete); if (ret < 0) { /* From a5c126a52269ce304b6da95e980e595668bf467d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 Oct 2015 17:54:31 -0400 Subject: [PATCH 028/174] orangefs: make precopy_buffers() take iov_iter Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/file.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index c169bdda66a3..bd8e6f866047 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -31,13 +31,10 @@ do { \ */ static int precopy_buffers(struct pvfs2_bufmap *bufmap, int buffer_index, - const struct iovec *vec, - unsigned long nr_segs, + struct iov_iter *iter, size_t total_size) { int ret = 0; - struct iov_iter iter; - /* * copy data from application/kernel by pulling it out * of the iovec. @@ -45,9 +42,8 @@ static int precopy_buffers(struct pvfs2_bufmap *bufmap, if (total_size) { - iov_iter_init(&iter, WRITE, vec, nr_segs, total_size); ret = pvfs_bufmap_copy_from_iovec(bufmap, - &iter, + iter, buffer_index, total_size); if (ret < 0) @@ -152,10 +148,11 @@ populate_shared_memory: * precopy_buffers only pertains to writes. */ if (type == PVFS_IO_WRITE) { + struct iov_iter iter; + iov_iter_init(&iter, WRITE, vec, nr_segs, total_size); ret = precopy_buffers(bufmap, buffer_index, - vec, - nr_segs, + &iter, total_size); if (ret < 0) goto out; From 3c2fcfcb6858585e9df6c7832464ab28bfb5bb6b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 Oct 2015 18:00:26 -0400 Subject: [PATCH 029/174] orangefs: make wait_for_direct_io() take iov_iter incidentally, insane or compromised server returning *more* than requested on read should not oops the kernel - initialize the iov_iter for read according to the iovec we've got. That's why pvfs_bufmap_copy_to_iovec() needed a separate size argument - we shouldn't abuse iov_iter_count(iter) for passing that. Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/file.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index bd8e6f866047..9a439b2e8bde 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -94,7 +94,7 @@ static int postcopy_buffers(struct pvfs2_bufmap *bufmap, * Post and wait for the I/O upcall to finish */ static ssize_t wait_for_direct_io(enum PVFS_io_type type, struct inode *inode, - loff_t *offset, struct iovec *vec, unsigned long nr_segs, + loff_t *offset, struct iov_iter *iter, size_t total_size, loff_t readahead_size) { struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); @@ -137,10 +137,9 @@ populate_shared_memory: new_op->upcall.req.io.offset = *offset; gossip_debug(GOSSIP_FILE_DEBUG, - "%s(%pU): nr_segs %lu, offset: %llu total_size: %zd\n", + "%s(%pU): offset: %llu total_size: %zd\n", __func__, handle, - nr_segs, llu(*offset), total_size); /* @@ -148,11 +147,9 @@ populate_shared_memory: * precopy_buffers only pertains to writes. */ if (type == PVFS_IO_WRITE) { - struct iov_iter iter; - iov_iter_init(&iter, WRITE, vec, nr_segs, total_size); ret = precopy_buffers(bufmap, buffer_index, - &iter, + iter, total_size); if (ret < 0) goto out; @@ -213,11 +210,9 @@ populate_shared_memory: * postcopy_buffers only pertains to reads. */ if (type == PVFS_IO_READ) { - struct iov_iter iter; - iov_iter_init(&iter, READ, vec, nr_segs, new_op->downcall.resp.io.amt_complete); ret = postcopy_buffers(bufmap, buffer_index, - &iter, + iter, new_op->downcall.resp.io.amt_complete); if (ret < 0) { /* @@ -563,6 +558,7 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, #endif seg = 0; while (total_count < count) { + struct iov_iter iter; size_t each_count; size_t amt_complete; @@ -583,8 +579,11 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, handle, (int)*offset); - ret = wait_for_direct_io(type, inode, offset, ptr, - seg_array[seg], each_count, 0); + iov_iter_init(&iter, type == PVFS_IO_READ ? READ : WRITE, + ptr, seg_array[seg], each_count); + + ret = wait_for_direct_io(type, inode, offset, &iter, + each_count, 0); gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): return from wait_for_io:%d\n", __func__, @@ -654,6 +653,7 @@ ssize_t pvfs2_inode_read(struct inode *inode, struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); size_t bufmap_size; struct iovec vec; + struct iov_iter iter; ssize_t ret = -EINVAL; g_pvfs2_stats.reads++; @@ -676,7 +676,8 @@ ssize_t pvfs2_inode_read(struct inode *inode, count, llu(*offset)); - ret = wait_for_direct_io(PVFS_IO_READ, inode, offset, &vec, 1, + iov_iter_init(&iter, READ, &vec, 1, count); + ret = wait_for_direct_io(PVFS_IO_READ, inode, offset, &iter, count, readahead_size); if (ret > 0) *offset += ret; From dc4067f671231eea971298cb44f687a30e04d0fd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 Oct 2015 18:17:26 -0400 Subject: [PATCH 030/174] orangefs: don't bother with splitting iovecs copy_page_{to,from}_iter() advances it just fine *and* it has no problem with partially consumed segments. Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/file.c | 282 +-------------------------------------------- 1 file changed, 6 insertions(+), 276 deletions(-) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 9a439b2e8bde..ff7fe37f5a22 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -256,168 +256,6 @@ out: return ret; } -/* - * The reason we need to do this is to be able to support readv and writev - * that are larger than (pvfs_bufmap_size_query()) Default is - * PVFS2_BUFMAP_DEFAULT_DESC_SIZE MB. What that means is that we will - * create a new io vec descriptor for those memory addresses that - * go beyond the limit. Return value for this routine is negative in case - * of errors and 0 in case of success. - * - * Further, the new_nr_segs pointer is updated to hold the new value - * of number of iovecs, the new_vec pointer is updated to hold the pointer - * to the new split iovec, and the size array is an array of integers holding - * the number of iovecs that straddle pvfs_bufmap_size_query(). - * The max_new_nr_segs value is computed by the caller and returned. - * (It will be (count of all iov_len/ block_size) + 1). - */ -static int split_iovecs(unsigned long max_new_nr_segs, /* IN */ - unsigned long nr_segs, /* IN */ - const struct iovec *original_iovec, /* IN */ - unsigned long *new_nr_segs, /* OUT */ - struct iovec **new_vec, /* OUT */ - unsigned long *seg_count, /* OUT */ - unsigned long **seg_array) /* OUT */ -{ - unsigned long seg; - unsigned long count = 0; - unsigned long begin_seg; - unsigned long tmpnew_nr_segs = 0; - struct iovec *new_iovec = NULL; - struct iovec *orig_iovec; - unsigned long *sizes = NULL; - unsigned long sizes_count = 0; - - if (nr_segs <= 0 || - original_iovec == NULL || - new_nr_segs == NULL || - new_vec == NULL || - seg_count == NULL || - seg_array == NULL || - max_new_nr_segs <= 0) { - gossip_err("Invalid parameters to split_iovecs\n"); - return -EINVAL; - } - *new_nr_segs = 0; - *new_vec = NULL; - *seg_count = 0; - *seg_array = NULL; - /* copy the passed in iovec descriptor to a temp structure */ - orig_iovec = kmalloc_array(nr_segs, - sizeof(*orig_iovec), - PVFS2_BUFMAP_GFP_FLAGS); - if (orig_iovec == NULL) { - gossip_err( - "split_iovecs: Could not allocate memory for %lu bytes!\n", - (unsigned long)(nr_segs * sizeof(*orig_iovec))); - return -ENOMEM; - } - new_iovec = kcalloc(max_new_nr_segs, - sizeof(*new_iovec), - PVFS2_BUFMAP_GFP_FLAGS); - if (new_iovec == NULL) { - kfree(orig_iovec); - gossip_err( - "split_iovecs: Could not allocate memory for %lu bytes!\n", - (unsigned long)(max_new_nr_segs * sizeof(*new_iovec))); - return -ENOMEM; - } - sizes = kcalloc(max_new_nr_segs, - sizeof(*sizes), - PVFS2_BUFMAP_GFP_FLAGS); - if (sizes == NULL) { - kfree(new_iovec); - kfree(orig_iovec); - gossip_err( - "split_iovecs: Could not allocate memory for %lu bytes!\n", - (unsigned long)(max_new_nr_segs * sizeof(*sizes))); - return -ENOMEM; - } - /* copy the passed in iovec to a temp structure */ - memcpy(orig_iovec, original_iovec, nr_segs * sizeof(*orig_iovec)); - begin_seg = 0; -repeat: - for (seg = begin_seg; seg < nr_segs; seg++) { - if (tmpnew_nr_segs >= max_new_nr_segs || - sizes_count >= max_new_nr_segs) { - kfree(sizes); - kfree(orig_iovec); - kfree(new_iovec); - gossip_err - ("split_iovecs: exceeded the index limit (%lu)\n", - tmpnew_nr_segs); - return -EINVAL; - } - if (count + orig_iovec[seg].iov_len < - pvfs_bufmap_size_query()) { - count += orig_iovec[seg].iov_len; - memcpy(&new_iovec[tmpnew_nr_segs], - &orig_iovec[seg], - sizeof(*new_iovec)); - tmpnew_nr_segs++; - sizes[sizes_count]++; - } else { - new_iovec[tmpnew_nr_segs].iov_base = - orig_iovec[seg].iov_base; - new_iovec[tmpnew_nr_segs].iov_len = - (pvfs_bufmap_size_query() - count); - tmpnew_nr_segs++; - sizes[sizes_count]++; - sizes_count++; - begin_seg = seg; - orig_iovec[seg].iov_base += - (pvfs_bufmap_size_query() - count); - orig_iovec[seg].iov_len -= - (pvfs_bufmap_size_query() - count); - count = 0; - break; - } - } - if (seg != nr_segs) - goto repeat; - else - sizes_count++; - - *new_nr_segs = tmpnew_nr_segs; - /* new_iovec is freed by the caller */ - *new_vec = new_iovec; - *seg_count = sizes_count; - /* seg_array is also freed by the caller */ - *seg_array = sizes; - kfree(orig_iovec); - return 0; -} - -static long bound_max_iovecs(const struct iovec *curr, unsigned long nr_segs, - ssize_t *total_count) -{ - unsigned long i; - long max_nr_iovecs; - ssize_t total; - ssize_t count; - - total = 0; - count = 0; - max_nr_iovecs = 0; - for (i = 0; i < nr_segs; i++) { - const struct iovec *iv = &curr[i]; - - count += iv->iov_len; - if (unlikely((ssize_t) (count | iv->iov_len) < 0)) - return -EINVAL; - if (total + iv->iov_len < pvfs_bufmap_size_query()) { - total += iv->iov_len; - max_nr_iovecs++; - } else { - total = - (total + iv->iov_len - pvfs_bufmap_size_query()); - max_nr_iovecs += (total / pvfs_bufmap_size_query() + 2); - } - } - *total_count = count; - return max_nr_iovecs; -} - /* * Common entry point for read/write/readv/writev * This function will dispatch it to either the direct I/O @@ -431,25 +269,10 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, struct inode *inode = file->f_mapping->host; struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); struct pvfs2_khandle *handle = &pvfs2_inode->refn.khandle; - ssize_t ret; - ssize_t total_count; - unsigned int to_free; - size_t count; - unsigned long seg; - unsigned long new_nr_segs; - unsigned long max_new_nr_segs; - unsigned long seg_count; - unsigned long *seg_array; - struct iovec *iovecptr; - struct iovec *ptr; - - total_count = 0; - ret = -EINVAL; - count = 0; - to_free = 0; - - /* Compute total and max number of segments after split */ - max_new_nr_segs = bound_max_iovecs(iov, nr_segs, &count); + struct iov_iter iter; + size_t count = iov_length(iov, nr_segs); + ssize_t total_count = 0; + ssize_t ret = -EINVAL; gossip_debug(GOSSIP_FILE_DEBUG, "%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n", @@ -472,93 +295,10 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, goto out; } - /* - * if the total size of data transfer requested is greater than - * the kernel-set blocksize of PVFS2, then we split the iovecs - * such that no iovec description straddles a block size limit - */ + iov_iter_init(&iter, type == PVFS_IO_READ ? READ : WRITE, + iov, nr_segs, count); - gossip_debug(GOSSIP_FILE_DEBUG, - "%s: pvfs_bufmap_size:%d\n", - __func__, - pvfs_bufmap_size_query()); - - if (count > pvfs_bufmap_size_query()) { - /* - * Split up the given iovec description such that - * no iovec descriptor straddles over the block-size limitation. - * This makes us our job easier to stage the I/O. - * In addition, this function will also compute an array - * with seg_count entries that will store the number of - * segments that straddle the block-size boundaries. - */ - ret = split_iovecs(max_new_nr_segs, /* IN */ - nr_segs, /* IN */ - iov, /* IN */ - &new_nr_segs, /* OUT */ - &iovecptr, /* OUT */ - &seg_count, /* OUT */ - &seg_array); /* OUT */ - if (ret < 0) { - gossip_err("%s: Failed to split iovecs to satisfy larger than blocksize readv/writev request %zd\n", - __func__, - ret); - goto out; - } - gossip_debug(GOSSIP_FILE_DEBUG, - "%s: Splitting iovecs from %lu to %lu" - " [max_new %lu]\n", - __func__, - nr_segs, - new_nr_segs, - max_new_nr_segs); - /* We must free seg_array and iovecptr */ - to_free = 1; - } else { - new_nr_segs = nr_segs; - /* use the given iovec description */ - iovecptr = (struct iovec *)iov; - /* There is only 1 element in the seg_array */ - seg_count = 1; - /* and its value is the number of segments passed in */ - seg_array = &nr_segs; - /* We dont have to free up anything */ - to_free = 0; - } - ptr = iovecptr; - - gossip_debug(GOSSIP_FILE_DEBUG, - "%s(%pU) %zd@%llu\n", - __func__, - handle, - count, - llu(*offset)); - gossip_debug(GOSSIP_FILE_DEBUG, - "%s(%pU): new_nr_segs: %lu, seg_count: %lu\n", - __func__, - handle, - new_nr_segs, seg_count); - -/* PVFS2_KERNEL_DEBUG is a CFLAGS define. */ -#ifdef PVFS2_KERNEL_DEBUG - for (seg = 0; seg < new_nr_segs; seg++) - gossip_debug(GOSSIP_FILE_DEBUG, - "%s: %d) %p to %p [%d bytes]\n", - __func__, - (int)seg + 1, - iovecptr[seg].iov_base, - iovecptr[seg].iov_base + iovecptr[seg].iov_len, - (int)iovecptr[seg].iov_len); - for (seg = 0; seg < seg_count; seg++) - gossip_debug(GOSSIP_FILE_DEBUG, - "%s: %zd) %lu\n", - __func__, - seg + 1, - seg_array[seg]); -#endif - seg = 0; while (total_count < count) { - struct iov_iter iter; size_t each_count; size_t amt_complete; @@ -579,9 +319,6 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, handle, (int)*offset); - iov_iter_init(&iter, type == PVFS_IO_READ ? READ : WRITE, - ptr, seg_array[seg], each_count); - ret = wait_for_direct_io(type, inode, offset, &iter, each_count, 0); gossip_debug(GOSSIP_FILE_DEBUG, @@ -593,9 +330,6 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, if (ret < 0) goto out; - /* advance the iovec pointer */ - ptr += seg_array[seg]; - seg++; *offset += ret; total_count += ret; amt_complete = ret; @@ -617,10 +351,6 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, if (total_count > 0) ret = total_count; out: - if (to_free) { - kfree(iovecptr); - kfree(seg_array); - } if (ret > 0) { if (type == PVFS_IO_READ) { file_accessed(file); From 0071ed1ec663fa87a3a8ae18f6d0812db010a343 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 Oct 2015 18:22:08 -0400 Subject: [PATCH 031/174] orangefs: make do_readv_writev() take iov_iter no need to build a copy of what the caller already has; what's more, we want the one given to caller properly advanced *and* we shouldn't depend upon it being an iovec-backed one. Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/file.c | 31 +++++++++---------------------- 1 file changed, 9 insertions(+), 22 deletions(-) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index ff7fe37f5a22..8dae04dc9df4 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -264,13 +264,12 @@ out: * Note: File extended attributes override any mount options. */ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, - loff_t *offset, const struct iovec *iov, unsigned long nr_segs) + loff_t *offset, struct iov_iter *iter) { struct inode *inode = file->f_mapping->host; struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); struct pvfs2_khandle *handle = &pvfs2_inode->refn.khandle; - struct iov_iter iter; - size_t count = iov_length(iov, nr_segs); + size_t count = iov_iter_count(iter); ssize_t total_count = 0; ssize_t ret = -EINVAL; @@ -295,18 +294,13 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, goto out; } - iov_iter_init(&iter, type == PVFS_IO_READ ? READ : WRITE, - iov, nr_segs, count); - - while (total_count < count) { - size_t each_count; + while (iov_iter_count(iter)) { + size_t each_count = iov_iter_count(iter); size_t amt_complete; /* how much to transfer in this loop iteration */ - each_count = - (((count - total_count) > pvfs_bufmap_size_query()) ? - pvfs_bufmap_size_query() : - (count - total_count)); + if (each_count > pvfs_bufmap_size_query()) + each_count = pvfs_bufmap_size_query(); gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): size of each_count(%d)\n", @@ -319,7 +313,7 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, handle, (int)*offset); - ret = wait_for_direct_io(type, inode, offset, &iter, + ret = wait_for_direct_io(type, inode, offset, iter, each_count, 0); gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): return from wait_for_io:%d\n", @@ -426,7 +420,6 @@ static ssize_t pvfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) struct file *file = iocb->ki_filp; loff_t pos = *(&iocb->ki_pos); ssize_t rc = 0; - unsigned long nr_segs = iter->nr_segs; BUG_ON(iocb->private); @@ -434,11 +427,7 @@ static ssize_t pvfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) g_pvfs2_stats.reads++; - rc = do_readv_writev(PVFS_IO_READ, - file, - &pos, - iter->iov, - nr_segs); + rc = do_readv_writev(PVFS_IO_READ, file, &pos, iter); iocb->ki_pos = pos; return rc; @@ -448,7 +437,6 @@ static ssize_t pvfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; loff_t pos = *(&iocb->ki_pos); - unsigned long nr_segs = iter->nr_segs; ssize_t rc; BUG_ON(iocb->private); @@ -482,8 +470,7 @@ static ssize_t pvfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *iter) rc = do_readv_writev(PVFS_IO_WRITE, file, &pos, - iter->iov, - nr_segs); + iter); if (rc < 0) { gossip_err("%s: do_readv_writev failed, rc:%zd:.\n", __func__, rc); From 74f68fce2a395a188d454a488ea167affa4d7cf5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 Oct 2015 18:31:05 -0400 Subject: [PATCH 032/174] orangefs: make pvfs2_inode_read() take iov_iter ... and make the only caller use page-backed iov_iter, getting rid of kmap/kunmap *and* of the bug with attempted use of iovec-backed copy_page_to_iter() on a kernel pointer. Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/file.c | 12 +++--------- fs/orangefs/inode.c | 17 +++++++---------- fs/orangefs/pvfs2-kernel.h | 3 +-- 3 files changed, 11 insertions(+), 21 deletions(-) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 8dae04dc9df4..78d296bb870e 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -369,22 +369,17 @@ out: * Data may be placed either in a user or kernel buffer. */ ssize_t pvfs2_inode_read(struct inode *inode, - char __user *buf, - size_t count, + struct iov_iter *iter, loff_t *offset, loff_t readahead_size) { struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); + size_t count = iov_iter_count(iter); size_t bufmap_size; - struct iovec vec; - struct iov_iter iter; ssize_t ret = -EINVAL; g_pvfs2_stats.reads++; - vec.iov_base = buf; - vec.iov_len = count; - bufmap_size = pvfs_bufmap_size_query(); if (count > bufmap_size) { gossip_debug(GOSSIP_FILE_DEBUG, @@ -400,8 +395,7 @@ ssize_t pvfs2_inode_read(struct inode *inode, count, llu(*offset)); - iov_iter_init(&iter, READ, &vec, 1, count); - ret = wait_for_direct_io(PVFS_IO_READ, inode, offset, &iter, + ret = wait_for_direct_io(PVFS_IO_READ, inode, offset, iter, count, readahead_size); if (ret > 0) *offset += ret; diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 4f7c45a44c1f..70d1c1925ea3 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -14,18 +14,20 @@ static int read_one_page(struct page *page) { - void *page_data; int ret; int max_block; ssize_t bytes_read = 0; struct inode *inode = page->mapping->host; const __u32 blocksize = PAGE_CACHE_SIZE; /* inode->i_blksize */ const __u32 blockbits = PAGE_CACHE_SHIFT; /* inode->i_blkbits */ + struct iov_iter to; + struct bio_vec bv = {.bv_page = page, .bv_len = PAGE_SIZE}; + + iov_iter_bvec(&to, ITER_BVEC | READ, &bv, 1, PAGE_SIZE); gossip_debug(GOSSIP_INODE_DEBUG, "pvfs2_readpage called with page %p\n", page); - page_data = pvfs2_kmap(page); max_block = ((inode->i_size / blocksize) + 1); @@ -33,16 +35,12 @@ static int read_one_page(struct page *page) loff_t blockptr_offset = (((loff_t) page->index) << blockbits); bytes_read = pvfs2_inode_read(inode, - (char __user *) page_data, - blocksize, + &to, &blockptr_offset, inode->i_size); } - /* only zero remaining unread portions of the page data */ - if (bytes_read > 0) - memset(page_data + bytes_read, 0, blocksize - bytes_read); - else - memset(page_data, 0, blocksize); + /* this will only zero remaining unread portions of the page data */ + iov_iter_zero(~0U, &to); /* takes care of potential aliasing */ flush_dcache_page(page); if (bytes_read < 0) { @@ -54,7 +52,6 @@ static int read_one_page(struct page *page) ClearPageError(page); ret = 0; } - pvfs2_kunmap(page); /* unlock the page after the ->readpage() routine completes */ unlock_page(page); return ret; diff --git a/fs/orangefs/pvfs2-kernel.h b/fs/orangefs/pvfs2-kernel.h index 29b4a48b3a25..916a35513419 100644 --- a/fs/orangefs/pvfs2-kernel.h +++ b/fs/orangefs/pvfs2-kernel.h @@ -605,8 +605,7 @@ struct inode *pvfs2_iget(struct super_block *sb, struct pvfs2_object_kref *ref); ssize_t pvfs2_inode_read(struct inode *inode, - char __user *buf, - size_t count, + struct iov_iter *iter, loff_t *offset, loff_t readahead_size); From a0435ca18efe3e052393c2866a755f9ca1902268 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 Oct 2015 20:09:20 -0400 Subject: [PATCH 033/174] orangefs: kill kmap/kunmap wrappers Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/pvfs2-kernel.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/orangefs/pvfs2-kernel.h b/fs/orangefs/pvfs2-kernel.h index 916a35513419..16df1d5aa879 100644 --- a/fs/orangefs/pvfs2-kernel.h +++ b/fs/orangefs/pvfs2-kernel.h @@ -185,9 +185,6 @@ struct client_debug_mask { #define PVFS2_GFP_FLAGS (GFP_KERNEL) #define PVFS2_BUFMAP_GFP_FLAGS (GFP_KERNEL) -#define pvfs2_kmap(page) kmap(page) -#define pvfs2_kunmap(page) kunmap(page) - /* pvfs2 xattr and acl related defines */ #define PVFS2_XATTR_INDEX_POSIX_ACL_ACCESS 1 #define PVFS2_XATTR_INDEX_POSIX_ACL_DEFAULT 2 From 16742f2d7c1004bea5222a19428196b7125a41d1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 Oct 2015 20:10:00 -0400 Subject: [PATCH 034/174] orangefs: use get_user_pages_fast(), not get_user_pages() Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/pvfs2-bufmap.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/fs/orangefs/pvfs2-bufmap.c b/fs/orangefs/pvfs2-bufmap.c index dacf42bee196..c7b0f3560734 100644 --- a/fs/orangefs/pvfs2-bufmap.c +++ b/fs/orangefs/pvfs2-bufmap.c @@ -171,16 +171,8 @@ pvfs2_bufmap_map(struct pvfs2_bufmap *bufmap, int offset = 0, ret, i; /* map the pages */ - down_write(¤t->mm->mmap_sem); - ret = get_user_pages(current, - current->mm, - (unsigned long)user_desc->ptr, - bufmap->page_count, - 1, - 0, - bufmap->page_array, - NULL); - up_write(¤t->mm->mmap_sem); + ret = get_user_pages_fast((unsigned long)user_desc->ptr, + bufmap->page_count, 1, bufmap->page_array); if (ret < 0) return ret; From b05a7851095c24ff62d5ffeb81baeffe7acd26a2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 Oct 2015 20:18:00 -0400 Subject: [PATCH 035/174] orangefs: double iput() in case of d_make_root() failure Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/super.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 9dee95293599..833af68c2227 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -403,10 +403,8 @@ static int pvfs2_fill_sb(struct super_block *sb, void *data, int silent) /* allocates and places root dentry in dcache */ root_dentry = d_make_root(root); - if (!root_dentry) { - iput(root); + if (!root_dentry) return -ENOMEM; - } sb->s_export_op = &pvfs2_export_ops; sb->s_root = root_dentry; From 5c0dbbc64b25fde6a4e29c545ac2296fc5194b3f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 Oct 2015 20:22:43 -0400 Subject: [PATCH 036/174] orangefs: kill struct pvfs2_mount_sb_info_s The only reason for that thing used to be the API of mount_nodev() callback; since we are calling pvfs2_fill_sb() ourselves now, we don't have to shove everything into a single structure. Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/pvfs2-kernel.h | 12 ------------ fs/orangefs/super.c | 27 +++++++++------------------ 2 files changed, 9 insertions(+), 30 deletions(-) diff --git a/fs/orangefs/pvfs2-kernel.h b/fs/orangefs/pvfs2-kernel.h index 16df1d5aa879..c36868be03dc 100644 --- a/fs/orangefs/pvfs2-kernel.h +++ b/fs/orangefs/pvfs2-kernel.h @@ -364,18 +364,6 @@ struct pvfs2_sb_info_s { struct list_head list; }; -/* - * a temporary structure used only for sb mount time that groups the - * mount time data provided along with a private superblock structure - * that is allocated before a 'kernel' superblock is allocated. -*/ -struct pvfs2_mount_sb_info_s { - void *data; - struct pvfs2_khandle root_khandle; - __s32 fs_id; - int id; -}; - /* * structure that holds the state of any async I/O operation issued * through the VFS. Needed especially to handle cancellation requests diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 833af68c2227..f29e7cccdfd1 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -347,13 +347,13 @@ static struct export_operations pvfs2_export_ops = { .fh_to_dentry = pvfs2_fh_to_dentry, }; -static int pvfs2_fill_sb(struct super_block *sb, void *data, int silent) +static int pvfs2_fill_sb(struct super_block *sb, + struct pvfs2_fs_mount_response *fs_mount, + void *data, int silent) { int ret = -EINVAL; struct inode *root = NULL; struct dentry *root_dentry = NULL; - struct pvfs2_mount_sb_info_s *mount_sb_info = - (struct pvfs2_mount_sb_info_s *) data; struct pvfs2_object_kref root_object; /* alloc and init our private pvfs2 sb info */ @@ -364,13 +364,12 @@ static int pvfs2_fill_sb(struct super_block *sb, void *data, int silent) memset(sb->s_fs_info, 0, sizeof(struct pvfs2_sb_info_s)); PVFS2_SB(sb)->sb = sb; - PVFS2_SB(sb)->root_khandle = mount_sb_info->root_khandle; - PVFS2_SB(sb)->fs_id = mount_sb_info->fs_id; - PVFS2_SB(sb)->id = mount_sb_info->id; + PVFS2_SB(sb)->root_khandle = fs_mount->root_khandle; + PVFS2_SB(sb)->fs_id = fs_mount->fs_id; + PVFS2_SB(sb)->id = fs_mount->id; - if (mount_sb_info->data) { - ret = parse_mount_options(sb, mount_sb_info->data, - silent); + if (data) { + ret = parse_mount_options(sb, data, silent); if (ret) return ret; } @@ -419,7 +418,6 @@ struct dentry *pvfs2_mount(struct file_system_type *fst, int ret = -EINVAL; struct super_block *sb = ERR_PTR(-EINVAL); struct pvfs2_kernel_op_s *new_op; - struct pvfs2_mount_sb_info_s mount_sb_info; struct dentry *d = ERR_PTR(-EINVAL); gossip_debug(GOSSIP_SUPER_DEBUG, @@ -455,13 +453,6 @@ struct dentry *pvfs2_mount(struct file_system_type *fst, goto free_op; } - /* fill in temporary structure passed to fill_sb method */ - mount_sb_info.data = data; - mount_sb_info.root_khandle = - new_op->downcall.resp.fs_mount.root_khandle; - mount_sb_info.fs_id = new_op->downcall.resp.fs_mount.fs_id; - mount_sb_info.id = new_op->downcall.resp.fs_mount.id; - sb = sget(fst, NULL, set_anon_super, flags, NULL); if (IS_ERR(sb)) { @@ -470,7 +461,7 @@ struct dentry *pvfs2_mount(struct file_system_type *fst, } ret = pvfs2_fill_sb(sb, - (void *)&mount_sb_info, + &new_op->downcall.resp.fs_mount, data, flags & MS_SILENT ? 1 : 0); if (ret) { From 75992b0fa95a667d8f436962ea6a694fe992c001 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 Oct 2015 20:27:40 -0400 Subject: [PATCH 037/174] pvfs2_fill_sb(): use kzalloc() Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/super.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index f29e7cccdfd1..45db0772a767 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -358,10 +358,9 @@ static int pvfs2_fill_sb(struct super_block *sb, /* alloc and init our private pvfs2 sb info */ sb->s_fs_info = - kmalloc(sizeof(struct pvfs2_sb_info_s), PVFS2_GFP_FLAGS); + kzalloc(sizeof(struct pvfs2_sb_info_s), PVFS2_GFP_FLAGS); if (!PVFS2_SB(sb)) return -ENOMEM; - memset(sb->s_fs_info, 0, sizeof(struct pvfs2_sb_info_s)); PVFS2_SB(sb)->sb = sb; PVFS2_SB(sb)->root_khandle = fs_mount->root_khandle; From aada5c5872aa3048980a0e6926efd9086dc98532 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 Oct 2015 20:35:36 -0400 Subject: [PATCH 038/174] orangefs: kill pointless ->link() and ->mknod() Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/namei.c | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index 05f6feadfd0d..39f96ace0289 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -242,27 +242,6 @@ static int pvfs2_unlink(struct inode *dir, struct dentry *dentry) return ret; } -/* - * PVFS2 does not support hard links. - */ -static int pvfs2_link(struct dentry *old_dentry, - struct inode *dir, - struct dentry *dentry) -{ - return -EPERM; -} - -/* - * PVFS2 does not support special files. - */ -static int pvfs2_mknod(struct inode *dir, - struct dentry *dentry, - umode_t mode, - dev_t rdev) -{ - return -EPERM; -} - static int pvfs2_symlink(struct inode *dir, struct dentry *dentry, const char *symname) @@ -453,12 +432,10 @@ struct inode_operations pvfs2_dir_inode_operations = { .get_acl = pvfs2_get_acl, .set_acl = pvfs2_set_acl, .create = pvfs2_create, - .link = pvfs2_link, .unlink = pvfs2_unlink, .symlink = pvfs2_symlink, .mkdir = pvfs2_mkdir, .rmdir = pvfs2_unlink, - .mknod = pvfs2_mknod, .rename = pvfs2_rename, .setattr = pvfs2_setattr, .getattr = pvfs2_getattr, From 5714156be232b088e24c74fe4e95cb900a8b12e0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 Oct 2015 22:02:00 -0400 Subject: [PATCH 039/174] orangefs: sanitize pvfs2_convert_time_field() Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/pvfs2-kernel.h | 5 ++++- fs/orangefs/pvfs2-utils.c | 13 ++----------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/fs/orangefs/pvfs2-kernel.h b/fs/orangefs/pvfs2-kernel.h index c36868be03dc..e96251717966 100644 --- a/fs/orangefs/pvfs2-kernel.h +++ b/fs/orangefs/pvfs2-kernel.h @@ -638,7 +638,10 @@ int pvfs2_unmount_sb(struct super_block *sb); int pvfs2_cancel_op_in_progress(__u64 tag); -__u64 pvfs2_convert_time_field(void *time_ptr); +static inline __u64 pvfs2_convert_time_field(const struct timespec *ts) +{ + return (__u64)ts->tv_sec; +} int pvfs2_normalize_to_errno(__s32 error_code); diff --git a/fs/orangefs/pvfs2-utils.c b/fs/orangefs/pvfs2-utils.c index c33e7193599c..1180a2480d2b 100644 --- a/fs/orangefs/pvfs2-utils.c +++ b/fs/orangefs/pvfs2-utils.c @@ -298,7 +298,7 @@ static inline int copy_attributes_from_inode(struct inode *inode, attrs->mask |= PVFS_ATTR_SYS_ATIME; if (iattr->ia_valid & ATTR_ATIME_SET) { attrs->atime = - pvfs2_convert_time_field((void *)&iattr->ia_atime); + pvfs2_convert_time_field(&iattr->ia_atime); attrs->mask |= PVFS_ATTR_SYS_ATIME_SET; } } @@ -306,7 +306,7 @@ static inline int copy_attributes_from_inode(struct inode *inode, attrs->mask |= PVFS_ATTR_SYS_MTIME; if (iattr->ia_valid & ATTR_MTIME_SET) { attrs->mtime = - pvfs2_convert_time_field((void *)&iattr->ia_mtime); + pvfs2_convert_time_field(&iattr->ia_mtime); attrs->mask |= PVFS_ATTR_SYS_MTIME_SET; } } @@ -653,15 +653,6 @@ void set_signals(sigset_t *sigset) sigprocmask(SIG_SETMASK, sigset, NULL); } -__u64 pvfs2_convert_time_field(void *time_ptr) -{ - __u64 pvfs2_time; - struct timespec *tspec = (struct timespec *)time_ptr; - - pvfs2_time = (__u64) ((time_t) tspec->tv_sec); - return pvfs2_time; -} - /* * The following is a very dirty hack that is now a permanent part of the * PVFS2 protocol. See protocol.h for more error definitions. From ef4af94edcf8fc32ab1d4141537a4eb29ff45a40 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 9 Oct 2015 13:23:16 -0400 Subject: [PATCH 040/174] orangefs: switch decode_dirents() to use of kcalloc() gets rid of multiplication overflow Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/orangefs/dir.c b/fs/orangefs/dir.c index daf497384501..280755db1814 100644 --- a/fs/orangefs/dir.c +++ b/fs/orangefs/dir.c @@ -27,7 +27,7 @@ static long decode_dirents(char *ptr, struct pvfs2_readdir_response_s *readdir) readdir->token = rd->token; readdir->pvfs_dirent_outcount = rd->pvfs_dirent_outcount; - readdir->dirent_array = kmalloc(readdir->pvfs_dirent_outcount * + readdir->dirent_array = kcalloc(readdir->pvfs_dirent_outcount, sizeof(*readdir->dirent_array), GFP_KERNEL); if (readdir->dirent_array == NULL) From 9be68b08719c10cc3cc9305e7b2452475a9dcacd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 9 Oct 2015 17:43:15 -0400 Subject: [PATCH 041/174] orangefs: get rid of dec_string and enc_string The latter is never used, the former has one user and would be better off spelled out right there. Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/dir.c | 15 ++++++++------- fs/orangefs/pvfs2-dev-proto.h | 16 ---------------- 2 files changed, 8 insertions(+), 23 deletions(-) diff --git a/fs/orangefs/dir.c b/fs/orangefs/dir.c index 280755db1814..eb4c3d334088 100644 --- a/fs/orangefs/dir.c +++ b/fs/orangefs/dir.c @@ -23,7 +23,6 @@ static long decode_dirents(char *ptr, struct pvfs2_readdir_response_s *readdir) struct pvfs2_readdir_response_s *rd = (struct pvfs2_readdir_response_s *) ptr; char *buf = ptr; - char **pptr = &buf; readdir->token = rd->token; readdir->pvfs_dirent_outcount = rd->pvfs_dirent_outcount; @@ -32,15 +31,17 @@ static long decode_dirents(char *ptr, struct pvfs2_readdir_response_s *readdir) GFP_KERNEL); if (readdir->dirent_array == NULL) return -ENOMEM; - *pptr += offsetof(struct pvfs2_readdir_response_s, dirent_array); + buf += offsetof(struct pvfs2_readdir_response_s, dirent_array); for (i = 0; i < readdir->pvfs_dirent_outcount; i++) { - dec_string(pptr, &readdir->dirent_array[i].d_name, - &readdir->dirent_array[i].d_length); + __u32 len = *(__u32 *)buf; + readdir->dirent_array[i].d_name = buf + 4; + buf += roundup8(4 + len + 1); + readdir->dirent_array[i].d_length = len; readdir->dirent_array[i].khandle = - *(struct pvfs2_khandle *) *pptr; - *pptr += 16; + *(struct pvfs2_khandle *) buf; + buf += 16; } - return (unsigned long)*pptr - (unsigned long)ptr; + return buf - ptr; } static long readdir_handle_ctor(struct readdir_handle_s *rhandle, void *buf, diff --git a/fs/orangefs/pvfs2-dev-proto.h b/fs/orangefs/pvfs2-dev-proto.h index 9c82e6e651f3..68b1bc6e57b4 100644 --- a/fs/orangefs/pvfs2-dev-proto.h +++ b/fs/orangefs/pvfs2-dev-proto.h @@ -78,22 +78,6 @@ #define roundup8(x) (((x)+7) & ~7) #endif -/* strings; decoding just points into existing character data */ -#define enc_string(pptr, pbuf) do { \ - __u32 len = strlen(*pbuf); \ - *(__u32 *) *(pptr) = (len); \ - memcpy(*(pptr)+4, *pbuf, len+1); \ - *(pptr) += roundup8(4 + len + 1); \ -} while (0) - -#define dec_string(pptr, pbuf, plen) do { \ - __u32 len = (*(__u32 *) *(pptr)); \ - *pbuf = *(pptr) + 4; \ - *(pptr) += roundup8(4 + len + 1); \ - if (plen) \ - *plen = len;\ -} while (0) - struct read_write_x { __s64 off; __s64 len; From ade1d48b788996e05fb9914dfb62993b1c279357 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 9 Oct 2015 17:51:36 -0400 Subject: [PATCH 042/174] orangefs: don't leave uninitialized data in ->trailer_buf minimal fix; it would be better to reject such requests outright. Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/devpvfs2-req.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/fs/orangefs/devpvfs2-req.c b/fs/orangefs/devpvfs2-req.c index ede842f05b62..7a719900235f 100644 --- a/fs/orangefs/devpvfs2-req.c +++ b/fs/orangefs/devpvfs2-req.c @@ -352,18 +352,20 @@ static ssize_t pvfs2_devreq_writev(struct file *file, * to reset trailer size on op errors. */ if (op->downcall.status == 0 && op->downcall.trailer_size > 0) { + __u64 trailer_size = op->downcall.trailer_size; + size_t size; gossip_debug(GOSSIP_DEV_DEBUG, "writev: trailer size %ld\n", - (unsigned long)op->downcall.trailer_size); + (unsigned long)size); if (count != (notrailer_count + 1)) { - gossip_err("Error: trailer size (%ld) is non-zero, no trailer elements though? (%zu)\n", (unsigned long)op->downcall.trailer_size, count); + gossip_err("Error: trailer size (%ld) is non-zero, no trailer elements though? (%zu)\n", (unsigned long)trailer_size, count); dev_req_release(buffer); put_op(op); return -EPROTO; } - if (iov[notrailer_count].iov_len > - op->downcall.trailer_size) { - gossip_err("writev error: trailer size (%ld) != iov_len (%ld)\n", (unsigned long)op->downcall.trailer_size, (unsigned long)iov[notrailer_count].iov_len); + size = iov[notrailer_count].iov_len; + if (size > trailer_size) { + gossip_err("writev error: trailer size (%ld) != iov_len (%zd)\n", (unsigned long)trailer_size, size); dev_req_release(buffer); put_op(op); return -EMSGSIZE; @@ -371,16 +373,14 @@ static ssize_t pvfs2_devreq_writev(struct file *file, /* Allocate a buffer large enough to hold the * trailer bytes. */ - op->downcall.trailer_buf = - vmalloc(op->downcall.trailer_size); + op->downcall.trailer_buf = vmalloc(trailer_size); if (op->downcall.trailer_buf != NULL) { gossip_debug(GOSSIP_DEV_DEBUG, "vmalloc: %p\n", op->downcall.trailer_buf); ret = copy_from_user(op->downcall.trailer_buf, iov[notrailer_count]. iov_base, - iov[notrailer_count]. - iov_len); + size); if (ret) { gossip_err("Failed to copy trailer data from user space\n"); dev_req_release(buffer); @@ -392,6 +392,8 @@ static ssize_t pvfs2_devreq_writev(struct file *file, put_op(op); return -EIO; } + memset(op->downcall.trailer_buf + size, 0, + trailer_size - size); } else { /* Change downcall status */ op->downcall.status = -ENOMEM; From 8092895f759ede31634d0f0fc85a74d970552c49 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 9 Oct 2015 18:11:10 -0400 Subject: [PATCH 043/174] orangefs: validate the response in decode_dirents() Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/dir.c | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/fs/orangefs/dir.c b/fs/orangefs/dir.c index eb4c3d334088..3049cd61b700 100644 --- a/fs/orangefs/dir.c +++ b/fs/orangefs/dir.c @@ -17,13 +17,17 @@ struct readdir_handle_s { /* * decode routine needed by kmod to make sense of the shared page for readdirs. */ -static long decode_dirents(char *ptr, struct pvfs2_readdir_response_s *readdir) +static long decode_dirents(char *ptr, size_t size, + struct pvfs2_readdir_response_s *readdir) { int i; struct pvfs2_readdir_response_s *rd = (struct pvfs2_readdir_response_s *) ptr; char *buf = ptr; + if (size < offsetof(struct pvfs2_readdir_response_s, dirent_array)) + return -EINVAL; + readdir->token = rd->token; readdir->pvfs_dirent_outcount = rd->pvfs_dirent_outcount; readdir->dirent_array = kcalloc(readdir->pvfs_dirent_outcount, @@ -31,21 +35,43 @@ static long decode_dirents(char *ptr, struct pvfs2_readdir_response_s *readdir) GFP_KERNEL); if (readdir->dirent_array == NULL) return -ENOMEM; + buf += offsetof(struct pvfs2_readdir_response_s, dirent_array); + size -= offsetof(struct pvfs2_readdir_response_s, dirent_array); + for (i = 0; i < readdir->pvfs_dirent_outcount; i++) { - __u32 len = *(__u32 *)buf; + __u32 len; + + if (size < 4) + goto Einval; + + len = *(__u32 *)buf; + if (len >= (unsigned)-24) + goto Einval; + readdir->dirent_array[i].d_name = buf + 4; - buf += roundup8(4 + len + 1); readdir->dirent_array[i].d_length = len; + + len = roundup8(4 + len + 1); + if (size < len + 16) + goto Einval; + size -= len + 16; + + buf += len; + readdir->dirent_array[i].khandle = *(struct pvfs2_khandle *) buf; buf += 16; } return buf - ptr; +Einval: + kfree(readdir->dirent_array); + readdir->dirent_array = NULL; + return -EINVAL; } static long readdir_handle_ctor(struct readdir_handle_s *rhandle, void *buf, - int buffer_index) + size_t size, int buffer_index) { long ret; @@ -61,7 +87,7 @@ static long readdir_handle_ctor(struct readdir_handle_s *rhandle, void *buf, } rhandle->buffer_index = buffer_index; rhandle->dents_buf = buf; - ret = decode_dirents(buf, &rhandle->readdir_response); + ret = decode_dirents(buf, size, &rhandle->readdir_response); if (ret < 0) { gossip_err("Could not decode readdir from buffer %ld\n", ret); rhandle->buffer_index = -1; @@ -209,6 +235,7 @@ get_new_buffer_index: bytes_decoded = readdir_handle_ctor(&rhandle, new_op->downcall.trailer_buf, + new_op->downcall.trailer_size, buffer_index); if (bytes_decoded < 0) { gossip_err("pvfs2_readdir: Could not decode trailer buffer into a readdir response %d\n", From 3f1b6947dcfa76de0b690022dcf3ed8814744aa7 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Fri, 13 Nov 2015 13:05:11 -0500 Subject: [PATCH 044/174] Orangefs: set pos after generic_write_checks if we are appending, generic_write_checks would have updated pos to the end of the file... Signed-off-by: Mike Marshall --- fs/orangefs/file.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 78d296bb870e..78a46968a994 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -430,7 +430,7 @@ static ssize_t pvfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) static ssize_t pvfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; - loff_t pos = *(&iocb->ki_pos); + loff_t pos; ssize_t rc; BUG_ON(iocb->private); @@ -461,6 +461,13 @@ static ssize_t pvfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *iter) goto out; } + /* + * if we are appending, generic_write_checks would have updated + * pos to the end of the file, so we will wait till now to set + * pos... + */ + pos = *(&iocb->ki_pos); + rc = do_readv_writev(PVFS_IO_WRITE, file, &pos, From 6d0dd7684cc8d010ab3082db572534dffd2ad42d Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Wed, 7 Oct 2015 13:40:54 -0400 Subject: [PATCH 045/174] Orangefs: Remove unused #defines from signal blocking code. Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/pvfs2-kernel.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/orangefs/pvfs2-kernel.h b/fs/orangefs/pvfs2-kernel.h index e96251717966..8b7d57118f9e 100644 --- a/fs/orangefs/pvfs2-kernel.h +++ b/fs/orangefs/pvfs2-kernel.h @@ -806,8 +806,6 @@ do { \ #define pvfs2_lock_inode(inode) spin_lock(&inode->i_lock) #define pvfs2_unlock_inode(inode) spin_unlock(&inode->i_lock) -#define pvfs2_current_signal_lock current->sighand->siglock -#define pvfs2_current_sigaction current->sighand->action #define fill_default_sys_attrs(sys_attr, type, mode) \ do { \ From b5bbc84328556bb653412b8e9682b8fdb091866a Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Fri, 13 Nov 2015 14:39:15 -0500 Subject: [PATCH 046/174] Orangefs: fix gossip statement Signed-off-by: Mike Marshall --- fs/orangefs/devpvfs2-req.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/orangefs/devpvfs2-req.c b/fs/orangefs/devpvfs2-req.c index 7a719900235f..dbf52ab1e569 100644 --- a/fs/orangefs/devpvfs2-req.c +++ b/fs/orangefs/devpvfs2-req.c @@ -356,7 +356,7 @@ static ssize_t pvfs2_devreq_writev(struct file *file, size_t size; gossip_debug(GOSSIP_DEV_DEBUG, "writev: trailer size %ld\n", - (unsigned long)size); + (unsigned long)trailer_size); if (count != (notrailer_count + 1)) { gossip_err("Error: trailer size (%ld) is non-zero, no trailer elements though? (%zu)\n", (unsigned long)trailer_size, count); dev_req_release(buffer); From f0ed4418d46db587eca981065ef5014332678606 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Fri, 13 Nov 2015 14:26:09 -0500 Subject: [PATCH 047/174] Orangefs: Remove upcall trailers which are not used. Also removes remnants of iox (readx/writex) which previously used trailers, but no longer exist. Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/devpvfs2-req.c | 117 ++++++++++------------------------ fs/orangefs/downcall.h | 5 -- fs/orangefs/pvfs2-cache.c | 16 +---- fs/orangefs/pvfs2-dev-proto.h | 1 - fs/orangefs/pvfs2-kernel.h | 14 ---- fs/orangefs/upcall.h | 11 +--- 6 files changed, 36 insertions(+), 128 deletions(-) diff --git a/fs/orangefs/devpvfs2-req.c b/fs/orangefs/devpvfs2-req.c index dbf52ab1e569..34e2240f1d29 100644 --- a/fs/orangefs/devpvfs2-req.c +++ b/fs/orangefs/devpvfs2-req.c @@ -139,20 +139,6 @@ static ssize_t pvfs2_devreq_read(struct file *file, cur_op = op; spin_lock(&cur_op->lock); list_del(&cur_op->list); - cur_op->op_linger_tmp--; - /* - * if there is a trailer, re-add it to - * the request list. - */ - if (cur_op->op_linger == 2 && - cur_op->op_linger_tmp == 1) { - if (cur_op->upcall.trailer_size <= 0 || - cur_op->upcall.trailer_buf == NULL) - gossip_err("BUG:trailer_size is %ld and trailer buf is %p\n", (long)cur_op->upcall.trailer_size, cur_op->upcall.trailer_buf); - /* re-add it to the head of the list */ - list_add(&cur_op->list, - &pvfs2_request_list); - } spin_unlock(&cur_op->lock); break; } @@ -167,11 +153,8 @@ static ssize_t pvfs2_devreq_read(struct file *file, "client-core: reading op tag %llu %s\n", llu(cur_op->tag), get_opname_string(cur_op)); if (op_state_in_progress(cur_op) || op_state_serviced(cur_op)) { - if (cur_op->op_linger == 1) - gossip_err("WARNING: Current op already queued...skipping\n"); - } else if (cur_op->op_linger == 1 || - (cur_op->op_linger == 2 && - cur_op->op_linger_tmp == 0)) { + gossip_err("WARNING: Current op already queued...skipping\n"); + } else { /* * atomically move the operation to the * htable_ops_in_progress @@ -182,71 +165,40 @@ static ssize_t pvfs2_devreq_read(struct file *file, spin_unlock(&cur_op->lock); - /* 2 cases - * a) OPs with no trailers - * b) OPs with trailers, Stage 1 - * Either way push the upcall out - */ - if (cur_op->op_linger == 1 || - (cur_op->op_linger == 2 && cur_op->op_linger_tmp == 1)) { - len = MAX_ALIGNED_DEV_REQ_UPSIZE; - if ((size_t) len <= count) { - ret = copy_to_user(buf, - &proto_ver, - sizeof(__s32)); + /* Push the upcall out */ + len = MAX_ALIGNED_DEV_REQ_UPSIZE; + if ((size_t) len <= count) { + ret = copy_to_user(buf, + &proto_ver, + sizeof(__s32)); + if (ret == 0) { + ret = copy_to_user(buf + sizeof(__s32), + &magic, + sizeof(__s32)); + if (ret == 0) { + ret = copy_to_user(buf+2 * sizeof(__s32), + &cur_op->tag, + sizeof(__u64)); if (ret == 0) { - ret = copy_to_user(buf + sizeof(__s32), - &magic, - sizeof(__s32)); - if (ret == 0) { - ret = copy_to_user(buf+2 * sizeof(__s32), - &cur_op->tag, - sizeof(__u64)); - if (ret == 0) { - ret = copy_to_user( - buf + - 2 * - sizeof(__s32) + - sizeof(__u64), - &cur_op->upcall, - sizeof(struct pvfs2_upcall_s)); - } - } + ret = copy_to_user( + buf + + 2 * + sizeof(__s32) + + sizeof(__u64), + &cur_op->upcall, + sizeof(struct pvfs2_upcall_s)); } + } + } - if (ret) { - gossip_err("Failed to copy data to user space\n"); - len = -EFAULT; - } - } else { - gossip_err - ("Failed to copy data to user space\n"); - len = -EIO; - } - } - /* Stage 2: Push the trailer out */ - else if (cur_op->op_linger == 2 && cur_op->op_linger_tmp == 0) { - len = cur_op->upcall.trailer_size; - if ((size_t) len <= count) { - ret = copy_to_user(buf, - cur_op->upcall.trailer_buf, - len); - if (ret) { - gossip_err("Failed to copy trailer to user space\n"); - len = -EFAULT; - } - } else { - gossip_err("Read buffer for trailer is too small (%ld as opposed to %ld)\n", - (long)count, - (long)len); - len = -EIO; - } + if (ret) { + gossip_err("Failed to copy data to user space\n"); + len = -EFAULT; + } } else { - gossip_err("cur_op: %p (op_linger %d), (op_linger_tmp %d), erroneous request list?\n", - cur_op, - cur_op->op_linger, - cur_op->op_linger_tmp); - len = 0; + gossip_err + ("Failed to copy data to user space\n"); + len = -EIO; } } else if (file->f_flags & O_NONBLOCK) { /* @@ -413,9 +365,8 @@ static ssize_t pvfs2_devreq_writev(struct file *file, * application reading/writing this device to return until * the buffers are done being used. */ - if ((op->upcall.type == PVFS2_VFS_OP_FILE_IO && - op->upcall.req.io.async_vfs_io == PVFS_VFS_SYNC_IO) || - op->upcall.type == PVFS2_VFS_OP_FILE_IOX) { + if (op->upcall.type == PVFS2_VFS_OP_FILE_IO && + op->upcall.req.io.async_vfs_io == PVFS_VFS_SYNC_IO) { int timed_out = 0; DECLARE_WAITQUEUE(wait_entry, current); diff --git a/fs/orangefs/downcall.h b/fs/orangefs/downcall.h index f8bea46e7c6a..e372f446f6ba 100644 --- a/fs/orangefs/downcall.h +++ b/fs/orangefs/downcall.h @@ -19,10 +19,6 @@ struct pvfs2_io_response { __s64 amt_complete; }; -struct pvfs2_iox_response { - __s64 amt_complete; -}; - struct pvfs2_lookup_response { struct pvfs2_object_kref refn; }; @@ -111,7 +107,6 @@ struct pvfs2_downcall_s { union { struct pvfs2_io_response io; - struct pvfs2_iox_response iox; struct pvfs2_lookup_response lookup; struct pvfs2_create_response create; struct pvfs2_symlink_response sym; diff --git a/fs/orangefs/pvfs2-cache.c b/fs/orangefs/pvfs2-cache.c index 15251884ba4a..f982616a4349 100644 --- a/fs/orangefs/pvfs2-cache.c +++ b/fs/orangefs/pvfs2-cache.c @@ -103,13 +103,11 @@ char *get_opname_string(struct pvfs2_kernel_op_s *new_op) return "OP_FSYNC"; else if (type == PVFS2_VFS_OP_FSKEY) return "OP_FSKEY"; - else if (type == PVFS2_VFS_OP_FILE_IOX) - return "OP_FILE_IOX"; } return "OP_UNKNOWN?"; } -static struct pvfs2_kernel_op_s *op_alloc_common(__s32 op_linger, __s32 type) +struct pvfs2_kernel_op_s *op_alloc(__s32 type) { struct pvfs2_kernel_op_s *new_op = NULL; @@ -145,24 +143,12 @@ static struct pvfs2_kernel_op_s *op_alloc_common(__s32 op_linger, __s32 type) new_op->upcall.gid = from_kgid(current_user_ns(), current_fsgid()); - - new_op->op_linger = new_op->op_linger_tmp = op_linger; } else { gossip_err("op_alloc: kmem_cache_alloc failed!\n"); } return new_op; } -struct pvfs2_kernel_op_s *op_alloc(__s32 type) -{ - return op_alloc_common(1, type); -} - -struct pvfs2_kernel_op_s *op_alloc_trailer(__s32 type) -{ - return op_alloc_common(2, type); -} - void op_release(struct pvfs2_kernel_op_s *pvfs2_op) { if (pvfs2_op) { diff --git a/fs/orangefs/pvfs2-dev-proto.h b/fs/orangefs/pvfs2-dev-proto.h index 68b1bc6e57b4..71ab56df4ad7 100644 --- a/fs/orangefs/pvfs2-dev-proto.h +++ b/fs/orangefs/pvfs2-dev-proto.h @@ -41,7 +41,6 @@ #define PVFS2_VFS_OP_FSYNC 0xFF00EE01 #define PVFS2_VFS_OP_FSKEY 0xFF00EE02 #define PVFS2_VFS_OP_READDIRPLUS 0xFF00EE03 -#define PVFS2_VFS_OP_FILE_IOX 0xFF00EE04 /* * Misc constants. Please retain them as multiples of 8! diff --git a/fs/orangefs/pvfs2-kernel.h b/fs/orangefs/pvfs2-kernel.h index 8b7d57118f9e..ac90b6365fd3 100644 --- a/fs/orangefs/pvfs2-kernel.h +++ b/fs/orangefs/pvfs2-kernel.h @@ -279,19 +279,6 @@ struct pvfs2_kernel_op_s { int io_completed; wait_queue_head_t io_completion_waitq; - /* - * upcalls requiring variable length trailers require that this struct - * be in the request list even after client-core does a read() on the - * device to dequeue the upcall. - * if op_linger field goes to 0, we dequeue this op off the list. - * else we let it stay. What gets passed to the read() is - * a) if op_linger field is = 1, pvfs2_kernel_op_s itself - * b) else if = 0, we pass ->upcall.trailer_buf - * We expect to have only a single upcall trailer buffer, - * so we expect callers with trailers - * to set this field to 2 and others to set it to 1. - */ - __s32 op_linger, op_linger_tmp; /* VFS aio fields */ /* used by the async I/O code to stash the pvfs2_kiocb_s structure */ @@ -507,7 +494,6 @@ static inline int match_handle(struct pvfs2_khandle resp_handle, int op_cache_initialize(void); int op_cache_finalize(void); struct pvfs2_kernel_op_s *op_alloc(__s32 type); -struct pvfs2_kernel_op_s *op_alloc_trailer(__s32 type); char *get_opname_string(struct pvfs2_kernel_op_s *new_op); void op_release(struct pvfs2_kernel_op_s *op); diff --git a/fs/orangefs/upcall.h b/fs/orangefs/upcall.h index 1e07f626aac6..0805778a8185 100644 --- a/fs/orangefs/upcall.h +++ b/fs/orangefs/upcall.h @@ -23,14 +23,6 @@ struct pvfs2_io_request_s { __s32 readahead_size; }; -struct pvfs2_iox_request_s { - __s32 buf_index; - __s32 count; - struct pvfs2_object_kref refn; - enum PVFS_io_type io_type; - __s32 __pad1; -}; - struct pvfs2_lookup_request_s { __s32 sym_follow; __s32 __pad1; @@ -218,13 +210,12 @@ struct pvfs2_upcall_s { __u32 gid; int pid; int tgid; - /* currently trailer is used only by readx/writex (iox) */ + /* Trailers unused but must be retained for protocol compatibility. */ __s64 trailer_size; char *trailer_buf; union { struct pvfs2_io_request_s io; - struct pvfs2_iox_request_s iox; struct pvfs2_lookup_request_s lookup; struct pvfs2_create_request_s create; struct pvfs2_symlink_request_s sym; From 24c8d0804be00da90af9efa8eb404bd7a3284ba9 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Fri, 13 Nov 2015 14:26:10 -0500 Subject: [PATCH 048/174] Orangefs: Clean up pvfs2_devreq_read. * Kick invalid arguments out early, so handling them does not clutter the code. * Avoid possibility of race by not releasing lock until completely done. * Do not leak ops (memory) in certain error condition. * Check for more error conditions. * Put module name in all error and debug logs. * Document behavior. Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/devpvfs2-req.c | 213 +++++++++++++++++++++---------------- 1 file changed, 120 insertions(+), 93 deletions(-) diff --git a/fs/orangefs/devpvfs2-req.c b/fs/orangefs/devpvfs2-req.c index 34e2240f1d29..e37b6479a6a1 100644 --- a/fs/orangefs/devpvfs2-req.c +++ b/fs/orangefs/devpvfs2-req.c @@ -104,110 +104,137 @@ static ssize_t pvfs2_devreq_read(struct file *file, char __user *buf, size_t count, loff_t *offset) { - int ret = 0; - ssize_t len = 0; - struct pvfs2_kernel_op_s *cur_op = NULL; - static __s32 magic = PVFS2_DEVREQ_MAGIC; + struct pvfs2_kernel_op_s *op, *temp; __s32 proto_ver = PVFS_KERNEL_PROTO_VERSION; + static __s32 magic = PVFS2_DEVREQ_MAGIC; + struct pvfs2_kernel_op_s *cur_op = NULL; + unsigned long ret; + /* We do not support blocking IO. */ if (!(file->f_flags & O_NONBLOCK)) { - /* We do not support blocking reads/opens any more */ - gossip_err("pvfs2: blocking reads are not supported! (pvfs2-client-core bug)\n"); + gossip_err("orangefs: blocking reads are not supported! (pvfs2-client-core bug)\n"); return -EINVAL; - } else { - struct pvfs2_kernel_op_s *op = NULL, *temp = NULL; - /* get next op (if any) from top of list */ - spin_lock(&pvfs2_request_list_lock); - list_for_each_entry_safe(op, temp, &pvfs2_request_list, list) { - __s32 fsid = fsid_of_op(op); - /* - * Check if this op's fsid is known and needs - * remounting - */ - if (fsid != PVFS_FS_ID_NULL && - fs_mount_pending(fsid) == 1) { + } + + /* + * The client will do an ioctl to find MAX_ALIGNED_DEV_REQ_UPSIZE, then + * always read with that size buffer. + */ + if (count != MAX_ALIGNED_DEV_REQ_UPSIZE) { + gossip_err("orangefs: client-core tried to read wrong size\n"); + return -EINVAL; + } + + /* Get next op (if any) from top of list. */ + spin_lock(&pvfs2_request_list_lock); + list_for_each_entry_safe(op, temp, &pvfs2_request_list, list) { + __s32 fsid; + /* This lock is held past the end of the loop when we break. */ + spin_lock(&op->lock); + + fsid = fsid_of_op(op); + if (fsid != PVFS_FS_ID_NULL) { + int ret; + /* Skip ops whose filesystem needs to be mounted. */ + ret = fs_mount_pending(fsid); + if (ret == 1) { gossip_debug(GOSSIP_DEV_DEBUG, - "Skipping op tag %llu %s\n", - llu(op->tag), - get_opname_string(op)); + "orangefs: skipping op tag %llu %s\n", + llu(op->tag), get_opname_string(op)); + spin_unlock(&op->lock); + continue; + /* Skip ops whose filesystem we don't know about unless + * it is being mounted. */ + /* XXX: is there a better way to detect this? */ + } else if (ret == -1 && + !(op->upcall.type == PVFS2_VFS_OP_FS_MOUNT || + op->upcall.type == PVFS2_VFS_OP_GETATTR)) { + gossip_debug(GOSSIP_DEV_DEBUG, + "orangefs: skipping op tag %llu %s\n", + llu(op->tag), get_opname_string(op)); + gossip_err( + "orangefs: ERROR: fs_mount_pending %d\n", + fsid); + spin_unlock(&op->lock); continue; - } else { - /* - * op does not belong to any particular fsid - * or already mounted.. let it through - */ - cur_op = op; - spin_lock(&cur_op->lock); - list_del(&cur_op->list); - spin_unlock(&cur_op->lock); - break; } } - spin_unlock(&pvfs2_request_list_lock); - } - - if (cur_op) { - spin_lock(&cur_op->lock); - - gossip_debug(GOSSIP_DEV_DEBUG, - "client-core: reading op tag %llu %s\n", - llu(cur_op->tag), get_opname_string(cur_op)); - if (op_state_in_progress(cur_op) || op_state_serviced(cur_op)) { - gossip_err("WARNING: Current op already queued...skipping\n"); - } else { - /* - * atomically move the operation to the - * htable_ops_in_progress - */ - set_op_state_inprogress(cur_op); - pvfs2_devreq_add_op(cur_op); - } - - spin_unlock(&cur_op->lock); - - /* Push the upcall out */ - len = MAX_ALIGNED_DEV_REQ_UPSIZE; - if ((size_t) len <= count) { - ret = copy_to_user(buf, - &proto_ver, - sizeof(__s32)); - if (ret == 0) { - ret = copy_to_user(buf + sizeof(__s32), - &magic, - sizeof(__s32)); - if (ret == 0) { - ret = copy_to_user(buf+2 * sizeof(__s32), - &cur_op->tag, - sizeof(__u64)); - if (ret == 0) { - ret = copy_to_user( - buf + - 2 * - sizeof(__s32) + - sizeof(__u64), - &cur_op->upcall, - sizeof(struct pvfs2_upcall_s)); - } - } - } - - if (ret) { - gossip_err("Failed to copy data to user space\n"); - len = -EFAULT; - } - } else { - gossip_err - ("Failed to copy data to user space\n"); - len = -EIO; - } - } else if (file->f_flags & O_NONBLOCK) { /* - * if in non-blocking mode, return EAGAIN since no requests are - * ready yet + * Either this op does not pertain to a filesystem, is mounting + * a filesystem, or pertains to a mounted filesystem. Let it + * through. */ - len = -EAGAIN; + cur_op = op; + break; } - return len; + + /* + * At this point we either have a valid op and can continue or have not + * found an op and must ask the client to try again later. + */ + if (!cur_op) { + spin_unlock(&pvfs2_request_list_lock); + return -EAGAIN; + } + + gossip_debug(GOSSIP_DEV_DEBUG, "orangefs: reading op tag %llu %s\n", + llu(cur_op->tag), get_opname_string(cur_op)); + + /* + * Such an op should never be on the list in the first place. If so, we + * will abort. + */ + if (op_state_in_progress(cur_op) || op_state_serviced(cur_op)) { + gossip_err("orangefs: ERROR: Current op already queued.\n"); + list_del(&cur_op->list); + spin_unlock(&cur_op->lock); + spin_unlock(&pvfs2_request_list_lock); + return -EAGAIN; + } + + /* + * Set the operation to be in progress and move it between lists since + * it has been sent to the client. + */ + set_op_state_inprogress(cur_op); + + list_del(&cur_op->list); + spin_unlock(&pvfs2_request_list_lock); + pvfs2_devreq_add_op(cur_op); + spin_unlock(&cur_op->lock); + + /* Push the upcall out. */ + ret = copy_to_user(buf, &proto_ver, sizeof(__s32)); + if (ret != 0) + goto error; + ret = copy_to_user(buf+sizeof(__s32), &magic, sizeof(__s32)); + if (ret != 0) + goto error; + ret = copy_to_user(buf+2 * sizeof(__s32), &cur_op->tag, sizeof(__u64)); + if (ret != 0) + goto error; + ret = copy_to_user(buf+2*sizeof(__s32)+sizeof(__u64), &cur_op->upcall, + sizeof(struct pvfs2_upcall_s)); + if (ret != 0) + goto error; + + /* The client only asks to read one size buffer. */ + return MAX_ALIGNED_DEV_REQ_UPSIZE; +error: + /* + * We were unable to copy the op data to the client. Put the op back in + * list. If client has crashed, the op will be purged later when the + * device is released. + */ + gossip_err("orangefs: Failed to copy data to user space\n"); + spin_lock(&pvfs2_request_list_lock); + spin_lock(&cur_op->lock); + set_op_state_waiting(cur_op); + pvfs2_devreq_remove_op(cur_op->tag); + list_add(&cur_op->list, &pvfs2_request_list); + spin_unlock(&cur_op->lock); + spin_unlock(&pvfs2_request_list_lock); + return -EFAULT; } /* Function for writev() callers into the device */ From 555fa0fa618b846c5b38406347b7d53ace320ac6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 11 Nov 2015 16:33:39 +0000 Subject: [PATCH 049/174] fs: out of bounds on stack in iov_iter_advance On Wed, Nov 11, 2015 at 10:19:48AM +0000, Al Viro wrote: > I'll cook the minimal fixup for API change after I get some sleep and > send it your way, unless somebody gets there first... This should do it - switches ->ioctl() to pvfs2_inode_[gs]etxattr() and converts xattr_handler ->[gs]et() to new API. Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/file.c | 19 +++++++--------- fs/orangefs/pvfs2-kernel.h | 13 ----------- fs/orangefs/xattr.c | 44 +++++++++++++++++++------------------- 3 files changed, 30 insertions(+), 46 deletions(-) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 78a46968a994..3a8140f289f6 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -506,11 +506,10 @@ static long pvfs2_ioctl(struct file *file, unsigned int cmd, unsigned long arg) */ if (cmd == FS_IOC_GETFLAGS) { val = 0; - ret = pvfs2_xattr_get_default(file->f_path.dentry, - "user.pvfs2.meta_hint", - &val, - sizeof(val), - 0); + ret = pvfs2_inode_getxattr(file_inode(file), + PVFS2_XATTR_NAME_DEFAULT_PREFIX, + "user.pvfs2.meta_hint", + &val, sizeof(val)); if (ret < 0 && ret != -ENODATA) return ret; else if (ret == -ENODATA) @@ -540,12 +539,10 @@ static long pvfs2_ioctl(struct file *file, unsigned int cmd, unsigned long arg) gossip_debug(GOSSIP_FILE_DEBUG, "pvfs2_ioctl: FS_IOC_SETFLAGS: %llu\n", (unsigned long long)val); - ret = pvfs2_xattr_set_default(file->f_path.dentry, - "user.pvfs2.meta_hint", - &val, - sizeof(val), - 0, - 0); + ret = pvfs2_inode_setxattr(file_inode(file), + PVFS2_XATTR_NAME_DEFAULT_PREFIX, + "user.pvfs2.meta_hint", + &val, sizeof(val), 0); } return ret; diff --git a/fs/orangefs/pvfs2-kernel.h b/fs/orangefs/pvfs2-kernel.h index ac90b6365fd3..4295e263e25b 100644 --- a/fs/orangefs/pvfs2-kernel.h +++ b/fs/orangefs/pvfs2-kernel.h @@ -234,19 +234,6 @@ extern const struct xattr_handler *pvfs2_xattr_handlers[]; extern struct posix_acl *pvfs2_get_acl(struct inode *inode, int type); extern int pvfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type); -int pvfs2_xattr_set_default(struct dentry *dentry, - const char *name, - const void *buffer, - size_t size, - int flags, - int handler_flags); - -int pvfs2_xattr_get_default(struct dentry *dentry, - const char *name, - void *buffer, - size_t size, - int handler_flags); - /* * Redefine xtvec structure so that we could move helper functions out of * the define diff --git a/fs/orangefs/xattr.c b/fs/orangefs/xattr.c index 227eaa47b1e1..b683daab7425 100644 --- a/fs/orangefs/xattr.c +++ b/fs/orangefs/xattr.c @@ -447,12 +447,12 @@ out_unlock: return ret; } -int pvfs2_xattr_set_default(struct dentry *dentry, - const char *name, - const void *buffer, - size_t size, - int flags, - int handler_flags) +static int pvfs2_xattr_set_default(const struct xattr_handler *handler, + struct dentry *dentry, + const char *name, + const void *buffer, + size_t size, + int flags) { return pvfs2_inode_setxattr(dentry->d_inode, PVFS2_XATTR_NAME_DEFAULT_PREFIX, @@ -462,11 +462,11 @@ int pvfs2_xattr_set_default(struct dentry *dentry, flags); } -int pvfs2_xattr_get_default(struct dentry *dentry, - const char *name, - void *buffer, - size_t size, - int handler_flags) +static int pvfs2_xattr_get_default(const struct xattr_handler *handler, + struct dentry *dentry, + const char *name, + void *buffer, + size_t size) { return pvfs2_inode_getxattr(dentry->d_inode, PVFS2_XATTR_NAME_DEFAULT_PREFIX, @@ -476,12 +476,12 @@ int pvfs2_xattr_get_default(struct dentry *dentry, } -static int pvfs2_xattr_set_trusted(struct dentry *dentry, - const char *name, - const void *buffer, - size_t size, - int flags, - int handler_flags) +static int pvfs2_xattr_set_trusted(const struct xattr_handler *handler, + struct dentry *dentry, + const char *name, + const void *buffer, + size_t size, + int flags) { return pvfs2_inode_setxattr(dentry->d_inode, PVFS2_XATTR_NAME_TRUSTED_PREFIX, @@ -491,11 +491,11 @@ static int pvfs2_xattr_set_trusted(struct dentry *dentry, flags); } -static int pvfs2_xattr_get_trusted(struct dentry *dentry, - const char *name, - void *buffer, - size_t size, - int handler_flags) +static int pvfs2_xattr_get_trusted(const struct xattr_handler *handler, + struct dentry *dentry, + const char *name, + void *buffer, + size_t size) { return pvfs2_inode_getxattr(dentry->d_inode, PVFS2_XATTR_NAME_TRUSTED_PREFIX, From 8bb8aefd5afb54a25a002feb4ec70011812d06a0 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Tue, 24 Nov 2015 15:12:14 -0500 Subject: [PATCH 050/174] OrangeFS: Change almost all instances of the string PVFS2 to OrangeFS. OrangeFS was formerly known as PVFS2 and retains the name in many places. I leave the device /dev/pvfs2-req since this affects userspace. I leave the filesystem type pvfs2 since this affects userspace. Further the OrangeFS sysint library reads fstab for an entry of type pvfs2 independently of kernel mounts. I leave extended attribute keys user.pvfs2 and system.pvfs2 as the sysint library understands these. I leave references to userspace binaries still named pvfs2. I leave the filenames. Signed-off-by: Yi Liu [martin@omnibond.com: clairify above constraints and merge] Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/acl.c | 42 ++-- fs/orangefs/dcache.c | 24 +- fs/orangefs/devpvfs2-req.c | 256 +++++++++++----------- fs/orangefs/dir.c | 86 ++++---- fs/orangefs/downcall.h | 84 +++---- fs/orangefs/file.c | 208 +++++++++--------- fs/orangefs/inode.c | 176 +++++++-------- fs/orangefs/namei.c | 134 ++++++------ fs/orangefs/protocol.h | 302 ++++++++++++------------- fs/orangefs/pvfs2-bufmap.c | 168 +++++++------- fs/orangefs/pvfs2-bufmap.h | 40 ++-- fs/orangefs/pvfs2-cache.c | 122 +++++------ fs/orangefs/pvfs2-debug.h | 18 +- fs/orangefs/pvfs2-debugfs.c | 48 ++-- fs/orangefs/pvfs2-debugfs.h | 6 +- fs/orangefs/pvfs2-dev-proto.h | 68 +++--- fs/orangefs/pvfs2-kernel.h | 402 +++++++++++++++++----------------- fs/orangefs/pvfs2-mod.c | 106 ++++----- fs/orangefs/pvfs2-sysfs.c | 108 ++++----- fs/orangefs/pvfs2-utils.c | 336 ++++++++++++++-------------- fs/orangefs/super.c | 280 +++++++++++------------ fs/orangefs/symlink.c | 14 +- fs/orangefs/upcall.h | 250 ++++++++++----------- fs/orangefs/waitqueue.c | 70 +++--- fs/orangefs/xattr.c | 204 ++++++++--------- 25 files changed, 1776 insertions(+), 1776 deletions(-) diff --git a/fs/orangefs/acl.c b/fs/orangefs/acl.c index e462b81a3ba1..5e27d5fcb6bf 100644 --- a/fs/orangefs/acl.c +++ b/fs/orangefs/acl.c @@ -10,7 +10,7 @@ #include #include -struct posix_acl *pvfs2_get_acl(struct inode *inode, int type) +struct posix_acl *orangefs_get_acl(struct inode *inode, int type) { struct posix_acl *acl; int ret; @@ -18,23 +18,23 @@ struct posix_acl *pvfs2_get_acl(struct inode *inode, int type) switch (type) { case ACL_TYPE_ACCESS: - key = PVFS2_XATTR_NAME_ACL_ACCESS; + key = ORANGEFS_XATTR_NAME_ACL_ACCESS; break; case ACL_TYPE_DEFAULT: - key = PVFS2_XATTR_NAME_ACL_DEFAULT; + key = ORANGEFS_XATTR_NAME_ACL_DEFAULT; break; default: - gossip_err("pvfs2_get_acl: bogus value of type %d\n", type); + gossip_err("orangefs_get_acl: bogus value of type %d\n", type); return ERR_PTR(-EINVAL); } /* * Rather than incurring a network call just to determine the exact * length of the attribute, I just allocate a max length to save on * the network call. Conceivably, we could pass NULL to - * pvfs2_inode_getxattr() to probe the length of the value, but + * orangefs_inode_getxattr() to probe the length of the value, but * I don't do that for now. */ - value = kmalloc(PVFS_MAX_XATTR_VALUELEN, GFP_KERNEL); + value = kmalloc(ORANGEFS_MAX_XATTR_VALUELEN, GFP_KERNEL); if (value == NULL) return ERR_PTR(-ENOMEM); @@ -43,11 +43,11 @@ struct posix_acl *pvfs2_get_acl(struct inode *inode, int type) get_khandle_from_ino(inode), key, type); - ret = pvfs2_inode_getxattr(inode, + ret = orangefs_inode_getxattr(inode, "", key, value, - PVFS_MAX_XATTR_VALUELEN); + ORANGEFS_MAX_XATTR_VALUELEN); /* if the key exists, convert it to an in-memory rep */ if (ret > 0) { acl = posix_acl_from_xattr(&init_user_ns, value, ret); @@ -64,9 +64,9 @@ struct posix_acl *pvfs2_get_acl(struct inode *inode, int type) return acl; } -int pvfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) +int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type) { - struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); + struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); int error = 0; void *value = NULL; size_t size = 0; @@ -74,7 +74,7 @@ int pvfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) switch (type) { case ACL_TYPE_ACCESS: - name = PVFS2_XATTR_NAME_ACL_ACCESS; + name = ORANGEFS_XATTR_NAME_ACL_ACCESS; if (acl) { umode_t mode = inode->i_mode; /* @@ -90,7 +90,7 @@ int pvfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) } if (inode->i_mode != mode) - SetModeFlag(pvfs2_inode); + SetModeFlag(orangefs_inode); inode->i_mode = mode; mark_inode_dirty_sync(inode); if (error == 0) @@ -98,7 +98,7 @@ int pvfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) } break; case ACL_TYPE_DEFAULT: - name = PVFS2_XATTR_NAME_ACL_DEFAULT; + name = ORANGEFS_XATTR_NAME_ACL_DEFAULT; break; default: gossip_err("%s: invalid type %d!\n", __func__, type); @@ -131,7 +131,7 @@ int pvfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) * will xlate to a removexattr. However, we don't want removexattr * complain if attributes does not exist. */ - error = pvfs2_inode_setxattr(inode, "", name, value, size, 0); + error = orangefs_inode_setxattr(inode, "", name, value, size, 0); out: kfree(value); @@ -140,35 +140,35 @@ out: return error; } -int pvfs2_init_acl(struct inode *inode, struct inode *dir) +int orangefs_init_acl(struct inode *inode, struct inode *dir) { - struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); + struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); struct posix_acl *default_acl, *acl; umode_t mode = inode->i_mode; int error = 0; - ClearModeFlag(pvfs2_inode); + ClearModeFlag(orangefs_inode); error = posix_acl_create(dir, &mode, &default_acl, &acl); if (error) return error; if (default_acl) { - error = pvfs2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); + error = orangefs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); posix_acl_release(default_acl); } if (acl) { if (!error) - error = pvfs2_set_acl(inode, acl, ACL_TYPE_ACCESS); + error = orangefs_set_acl(inode, acl, ACL_TYPE_ACCESS); posix_acl_release(acl); } /* If mode of the inode was changed, then do a forcible ->setattr */ if (mode != inode->i_mode) { - SetModeFlag(pvfs2_inode); + SetModeFlag(orangefs_inode); inode->i_mode = mode; - pvfs2_flush_inode(inode); + orangefs_flush_inode(inode); } return error; diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c index 9466b179bf24..12c916fa4c7f 100644 --- a/fs/orangefs/dcache.c +++ b/fs/orangefs/dcache.c @@ -12,27 +12,27 @@ #include "pvfs2-kernel.h" /* Returns 1 if dentry can still be trusted, else 0. */ -static int pvfs2_revalidate_lookup(struct dentry *dentry) +static int orangefs_revalidate_lookup(struct dentry *dentry) { struct dentry *parent_dentry = dget_parent(dentry); struct inode *parent_inode = parent_dentry->d_inode; - struct pvfs2_inode_s *parent = PVFS2_I(parent_inode); + struct orangefs_inode_s *parent = ORANGEFS_I(parent_inode); struct inode *inode = dentry->d_inode; - struct pvfs2_kernel_op_s *new_op; + struct orangefs_kernel_op_s *new_op; int ret = 0; int err = 0; gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: attempting lookup.\n", __func__); - new_op = op_alloc(PVFS2_VFS_OP_LOOKUP); + new_op = op_alloc(ORANGEFS_VFS_OP_LOOKUP); if (!new_op) goto out_put_parent; - new_op->upcall.req.lookup.sym_follow = PVFS2_LOOKUP_LINK_NO_FOLLOW; + new_op->upcall.req.lookup.sym_follow = ORANGEFS_LOOKUP_LINK_NO_FOLLOW; new_op->upcall.req.lookup.parent_refn = parent->refn; strncpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name, - PVFS2_NAME_LEN); + ORANGEFS_NAME_LEN); gossip_debug(GOSSIP_DCACHE_DEBUG, "%s:%s:%d interrupt flag [%d]\n", @@ -41,7 +41,7 @@ static int pvfs2_revalidate_lookup(struct dentry *dentry) __LINE__, get_interruptible_flag(parent_inode)); - err = service_operation(new_op, "pvfs2_lookup", + err = service_operation(new_op, "orangefs_lookup", get_interruptible_flag(parent_inode)); if (err) goto out_drop; @@ -79,7 +79,7 @@ out_drop: * * Should return 1 if dentry can still be trusted, else 0 */ -static int pvfs2_d_revalidate(struct dentry *dentry, unsigned int flags) +static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags) { struct inode *inode; int ret = 0; @@ -105,7 +105,7 @@ static int pvfs2_d_revalidate(struct dentry *dentry, unsigned int flags) * exists, but is still in the expected place in the name space */ if (!is_root_handle(inode)) { - if (!pvfs2_revalidate_lookup(dentry)) + if (!orangefs_revalidate_lookup(dentry)) goto invalid_exit; } else { gossip_debug(GOSSIP_DCACHE_DEBUG, @@ -119,7 +119,7 @@ static int pvfs2_d_revalidate(struct dentry *dentry, unsigned int flags) __func__, inode, get_khandle_from_ino(inode)); - ret = pvfs2_inode_getattr(inode, PVFS_ATTR_SYS_ALL_NOHINT); + ret = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT); gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: getattr %s (ret = %d), returning %s for dentry i_count=%d\n", __func__, @@ -137,6 +137,6 @@ invalid_exit: return 0; } -const struct dentry_operations pvfs2_dentry_operations = { - .d_revalidate = pvfs2_d_revalidate, +const struct dentry_operations orangefs_dentry_operations = { + .d_revalidate = orangefs_d_revalidate, }; diff --git a/fs/orangefs/devpvfs2-req.c b/fs/orangefs/devpvfs2-req.c index e37b6479a6a1..e18149f0975b 100644 --- a/fs/orangefs/devpvfs2-req.c +++ b/fs/orangefs/devpvfs2-req.c @@ -22,14 +22,14 @@ static int open_access_count; #define DUMP_DEVICE_ERROR() \ do { \ gossip_err("*****************************************************\n");\ - gossip_err("PVFS2 Device Error: You cannot open the device file "); \ + gossip_err("ORANGEFS Device Error: You cannot open the device file "); \ gossip_err("\n/dev/%s more than once. Please make sure that\nthere " \ - "are no ", PVFS2_REQDEVICE_NAME); \ + "are no ", ORANGEFS_REQDEVICE_NAME); \ gossip_err("instances of a program using this device\ncurrently " \ "running. (You must verify this!)\n"); \ gossip_err("For example, you can use the lsof program as follows:\n");\ gossip_err("'lsof | grep %s' (run this as root)\n", \ - PVFS2_REQDEVICE_NAME); \ + ORANGEFS_REQDEVICE_NAME); \ gossip_err(" open_access_count = %d\n", open_access_count); \ gossip_err("*****************************************************\n");\ } while (0) @@ -39,7 +39,7 @@ static int hash_func(__u64 tag, int table_size) return do_div(tag, (unsigned int)table_size); } -static void pvfs2_devreq_add_op(struct pvfs2_kernel_op_s *op) +static void orangefs_devreq_add_op(struct orangefs_kernel_op_s *op) { int index = hash_func(op->tag, hash_table_size); @@ -48,9 +48,9 @@ static void pvfs2_devreq_add_op(struct pvfs2_kernel_op_s *op) spin_unlock(&htable_ops_in_progress_lock); } -static struct pvfs2_kernel_op_s *pvfs2_devreq_remove_op(__u64 tag) +static struct orangefs_kernel_op_s *orangefs_devreq_remove_op(__u64 tag) { - struct pvfs2_kernel_op_s *op, *next; + struct orangefs_kernel_op_s *op, *next; int index; index = hash_func(tag, hash_table_size); @@ -71,12 +71,12 @@ static struct pvfs2_kernel_op_s *pvfs2_devreq_remove_op(__u64 tag) return NULL; } -static int pvfs2_devreq_open(struct inode *inode, struct file *file) +static int orangefs_devreq_open(struct inode *inode, struct file *file) { int ret = -EINVAL; if (!(file->f_flags & O_NONBLOCK)) { - gossip_err("pvfs2: device cannot be opened in blocking mode\n"); + gossip_err("orangefs: device cannot be opened in blocking mode\n"); goto out; } ret = -EACCES; @@ -100,14 +100,14 @@ out: return ret; } -static ssize_t pvfs2_devreq_read(struct file *file, +static ssize_t orangefs_devreq_read(struct file *file, char __user *buf, size_t count, loff_t *offset) { - struct pvfs2_kernel_op_s *op, *temp; - __s32 proto_ver = PVFS_KERNEL_PROTO_VERSION; - static __s32 magic = PVFS2_DEVREQ_MAGIC; - struct pvfs2_kernel_op_s *cur_op = NULL; + struct orangefs_kernel_op_s *op, *temp; + __s32 proto_ver = ORANGEFS_KERNEL_PROTO_VERSION; + static __s32 magic = ORANGEFS_DEVREQ_MAGIC; + struct orangefs_kernel_op_s *cur_op = NULL; unsigned long ret; /* We do not support blocking IO. */ @@ -126,14 +126,14 @@ static ssize_t pvfs2_devreq_read(struct file *file, } /* Get next op (if any) from top of list. */ - spin_lock(&pvfs2_request_list_lock); - list_for_each_entry_safe(op, temp, &pvfs2_request_list, list) { + spin_lock(&orangefs_request_list_lock); + list_for_each_entry_safe(op, temp, &orangefs_request_list, list) { __s32 fsid; /* This lock is held past the end of the loop when we break. */ spin_lock(&op->lock); fsid = fsid_of_op(op); - if (fsid != PVFS_FS_ID_NULL) { + if (fsid != ORANGEFS_FS_ID_NULL) { int ret; /* Skip ops whose filesystem needs to be mounted. */ ret = fs_mount_pending(fsid); @@ -147,8 +147,8 @@ static ssize_t pvfs2_devreq_read(struct file *file, * it is being mounted. */ /* XXX: is there a better way to detect this? */ } else if (ret == -1 && - !(op->upcall.type == PVFS2_VFS_OP_FS_MOUNT || - op->upcall.type == PVFS2_VFS_OP_GETATTR)) { + !(op->upcall.type == ORANGEFS_VFS_OP_FS_MOUNT || + op->upcall.type == ORANGEFS_VFS_OP_GETATTR)) { gossip_debug(GOSSIP_DEV_DEBUG, "orangefs: skipping op tag %llu %s\n", llu(op->tag), get_opname_string(op)); @@ -173,7 +173,7 @@ static ssize_t pvfs2_devreq_read(struct file *file, * found an op and must ask the client to try again later. */ if (!cur_op) { - spin_unlock(&pvfs2_request_list_lock); + spin_unlock(&orangefs_request_list_lock); return -EAGAIN; } @@ -188,7 +188,7 @@ static ssize_t pvfs2_devreq_read(struct file *file, gossip_err("orangefs: ERROR: Current op already queued.\n"); list_del(&cur_op->list); spin_unlock(&cur_op->lock); - spin_unlock(&pvfs2_request_list_lock); + spin_unlock(&orangefs_request_list_lock); return -EAGAIN; } @@ -199,8 +199,8 @@ static ssize_t pvfs2_devreq_read(struct file *file, set_op_state_inprogress(cur_op); list_del(&cur_op->list); - spin_unlock(&pvfs2_request_list_lock); - pvfs2_devreq_add_op(cur_op); + spin_unlock(&orangefs_request_list_lock); + orangefs_devreq_add_op(cur_op); spin_unlock(&cur_op->lock); /* Push the upcall out. */ @@ -214,7 +214,7 @@ static ssize_t pvfs2_devreq_read(struct file *file, if (ret != 0) goto error; ret = copy_to_user(buf+2*sizeof(__s32)+sizeof(__u64), &cur_op->upcall, - sizeof(struct pvfs2_upcall_s)); + sizeof(struct orangefs_upcall_s)); if (ret != 0) goto error; @@ -227,23 +227,23 @@ error: * device is released. */ gossip_err("orangefs: Failed to copy data to user space\n"); - spin_lock(&pvfs2_request_list_lock); + spin_lock(&orangefs_request_list_lock); spin_lock(&cur_op->lock); set_op_state_waiting(cur_op); - pvfs2_devreq_remove_op(cur_op->tag); - list_add(&cur_op->list, &pvfs2_request_list); + orangefs_devreq_remove_op(cur_op->tag); + list_add(&cur_op->list, &orangefs_request_list); spin_unlock(&cur_op->lock); - spin_unlock(&pvfs2_request_list_lock); + spin_unlock(&orangefs_request_list_lock); return -EFAULT; } /* Function for writev() callers into the device */ -static ssize_t pvfs2_devreq_writev(struct file *file, +static ssize_t orangefs_devreq_writev(struct file *file, const struct iovec *iov, size_t count, loff_t *offset) { - struct pvfs2_kernel_op_s *op = NULL; + struct orangefs_kernel_op_s *op = NULL; void *buffer = NULL; void *ptr = NULL; unsigned long i = 0; @@ -301,7 +301,7 @@ static ssize_t pvfs2_devreq_writev(struct file *file, tag = *((__u64 *) ptr); ptr += sizeof(__u64); - if (magic != PVFS2_DEVREQ_MAGIC) { + if (magic != ORANGEFS_DEVREQ_MAGIC) { gossip_err("Error: Device magic number does not match.\n"); dev_req_release(buffer); return -EPROTO; @@ -311,17 +311,17 @@ static ssize_t pvfs2_devreq_writev(struct file *file, * proto_ver = 20902 for 2.9.2 */ - op = pvfs2_devreq_remove_op(tag); + op = orangefs_devreq_remove_op(tag); if (op) { /* Increase ref count! */ get_op(op); /* cut off magic and tag from payload size */ payload_size -= (2 * sizeof(__s32) + sizeof(__u64)); - if (payload_size <= sizeof(struct pvfs2_downcall_s)) + if (payload_size <= sizeof(struct orangefs_downcall_s)) /* copy the passed in downcall into the op */ memcpy(&op->downcall, ptr, - sizeof(struct pvfs2_downcall_s)); + sizeof(struct orangefs_downcall_s)); else gossip_debug(GOSSIP_DEV_DEBUG, "writev: Ignoring %d bytes\n", @@ -392,8 +392,8 @@ static ssize_t pvfs2_devreq_writev(struct file *file, * application reading/writing this device to return until * the buffers are done being used. */ - if (op->upcall.type == PVFS2_VFS_OP_FILE_IO && - op->upcall.req.io.async_vfs_io == PVFS_VFS_SYNC_IO) { + if (op->upcall.type == ORANGEFS_VFS_OP_FILE_IO && + op->upcall.req.io.async_vfs_io == ORANGEFS_VFS_SYNC_IO) { int timed_out = 0; DECLARE_WAITQUEUE(wait_entry, current); @@ -473,10 +473,10 @@ static ssize_t pvfs2_devreq_writev(struct file *file, return total_returned_size; } -static ssize_t pvfs2_devreq_write_iter(struct kiocb *iocb, +static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb, struct iov_iter *iter) { - return pvfs2_devreq_writev(iocb->ki_filp, + return orangefs_devreq_writev(iocb->ki_filp, iter->iov, iter->nr_segs, &iocb->ki_pos); @@ -486,15 +486,15 @@ static ssize_t pvfs2_devreq_write_iter(struct kiocb *iocb, static int mark_all_pending_mounts(void) { int unmounted = 1; - struct pvfs2_sb_info_s *pvfs2_sb = NULL; + struct orangefs_sb_info_s *orangefs_sb = NULL; - spin_lock(&pvfs2_superblocks_lock); - list_for_each_entry(pvfs2_sb, &pvfs2_superblocks, list) { + spin_lock(&orangefs_superblocks_lock); + list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) { /* All of these file system require a remount */ - pvfs2_sb->mount_pending = 1; + orangefs_sb->mount_pending = 1; unmounted = 0; } - spin_unlock(&pvfs2_superblocks_lock); + spin_unlock(&orangefs_superblocks_lock); return unmounted; } @@ -507,16 +507,16 @@ static int mark_all_pending_mounts(void) int fs_mount_pending(__s32 fsid) { int mount_pending = -1; - struct pvfs2_sb_info_s *pvfs2_sb = NULL; + struct orangefs_sb_info_s *orangefs_sb = NULL; - spin_lock(&pvfs2_superblocks_lock); - list_for_each_entry(pvfs2_sb, &pvfs2_superblocks, list) { - if (pvfs2_sb->fs_id == fsid) { - mount_pending = pvfs2_sb->mount_pending; + spin_lock(&orangefs_superblocks_lock); + list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) { + if (orangefs_sb->fs_id == fsid) { + mount_pending = orangefs_sb->mount_pending; break; } } - spin_unlock(&pvfs2_superblocks_lock); + spin_unlock(&orangefs_superblocks_lock); return mount_pending; } @@ -525,10 +525,10 @@ int fs_mount_pending(__s32 fsid) * Using the open_access_count variable, we enforce a reference count * on this file so that it can be opened by only one process at a time. * the devreq_mutex is used to make sure all i/o has completed - * before we call pvfs_bufmap_finalize, and similar such tricky + * before we call orangefs_bufmap_finalize, and similar such tricky * situations */ -static int pvfs2_devreq_release(struct inode *inode, struct file *file) +static int orangefs_devreq_release(struct inode *inode, struct file *file) { int unmounted = 0; @@ -537,12 +537,12 @@ static int pvfs2_devreq_release(struct inode *inode, struct file *file) __func__); mutex_lock(&devreq_mutex); - pvfs_bufmap_finalize(); + orangefs_bufmap_finalize(); open_access_count--; unmounted = mark_all_pending_mounts(); - gossip_debug(GOSSIP_DEV_DEBUG, "PVFS2 Device Close: Filesystem(s) %s\n", + gossip_debug(GOSSIP_DEV_DEBUG, "ORANGEFS Device Close: Filesystem(s) %s\n", (unmounted ? "UNMOUNTED" : "MOUNTED")); mutex_unlock(&devreq_mutex); @@ -578,17 +578,17 @@ int is_daemon_in_service(void) static inline long check_ioctl_command(unsigned int command) { /* Check for valid ioctl codes */ - if (_IOC_TYPE(command) != PVFS_DEV_MAGIC) { + if (_IOC_TYPE(command) != ORANGEFS_DEV_MAGIC) { gossip_err("device ioctl magic numbers don't match! Did you rebuild pvfs2-client-core/libpvfs2? [cmd %x, magic %x != %x]\n", command, _IOC_TYPE(command), - PVFS_DEV_MAGIC); + ORANGEFS_DEV_MAGIC); return -EINVAL; } /* and valid ioctl commands */ - if (_IOC_NR(command) >= PVFS_DEV_MAXNR || _IOC_NR(command) <= 0) { + if (_IOC_NR(command) >= ORANGEFS_DEV_MAXNR || _IOC_NR(command) <= 0) { gossip_err("Invalid ioctl command number [%d >= %d]\n", - _IOC_NR(command), PVFS_DEV_MAXNR); + _IOC_NR(command), ORANGEFS_DEV_MAXNR); return -ENOIOCTLCMD; } return 0; @@ -596,46 +596,46 @@ static inline long check_ioctl_command(unsigned int command) static long dispatch_ioctl_command(unsigned int command, unsigned long arg) { - static __s32 magic = PVFS2_DEVREQ_MAGIC; + static __s32 magic = ORANGEFS_DEVREQ_MAGIC; static __s32 max_up_size = MAX_ALIGNED_DEV_REQ_UPSIZE; static __s32 max_down_size = MAX_ALIGNED_DEV_REQ_DOWNSIZE; - struct PVFS_dev_map_desc user_desc; + struct ORANGEFS_dev_map_desc user_desc; int ret = 0; struct dev_mask_info_s mask_info = { 0 }; struct dev_mask2_info_s mask2_info = { 0, 0 }; int upstream_kmod = 1; struct list_head *tmp = NULL; - struct pvfs2_sb_info_s *pvfs2_sb = NULL; + struct orangefs_sb_info_s *orangefs_sb = NULL; /* mtmoore: add locking here */ switch (command) { - case PVFS_DEV_GET_MAGIC: + case ORANGEFS_DEV_GET_MAGIC: return ((put_user(magic, (__s32 __user *) arg) == -EFAULT) ? -EIO : 0); - case PVFS_DEV_GET_MAX_UPSIZE: + case ORANGEFS_DEV_GET_MAX_UPSIZE: return ((put_user(max_up_size, (__s32 __user *) arg) == -EFAULT) ? -EIO : 0); - case PVFS_DEV_GET_MAX_DOWNSIZE: + case ORANGEFS_DEV_GET_MAX_DOWNSIZE: return ((put_user(max_down_size, (__s32 __user *) arg) == -EFAULT) ? -EIO : 0); - case PVFS_DEV_MAP: + case ORANGEFS_DEV_MAP: ret = copy_from_user(&user_desc, - (struct PVFS_dev_map_desc __user *) + (struct ORANGEFS_dev_map_desc __user *) arg, - sizeof(struct PVFS_dev_map_desc)); - return ret ? -EIO : pvfs_bufmap_initialize(&user_desc); - case PVFS_DEV_REMOUNT_ALL: + sizeof(struct ORANGEFS_dev_map_desc)); + return ret ? -EIO : orangefs_bufmap_initialize(&user_desc); + case ORANGEFS_DEV_REMOUNT_ALL: gossip_debug(GOSSIP_DEV_DEBUG, - "pvfs2_devreq_ioctl: got PVFS_DEV_REMOUNT_ALL\n"); + "orangefs_devreq_ioctl: got ORANGEFS_DEV_REMOUNT_ALL\n"); /* - * remount all mounted pvfs2 volumes to regain the lost + * remount all mounted orangefs volumes to regain the lost * dynamic mount tables (if any) -- NOTE: this is done * without keeping the superblock list locked due to the * upcall/downcall waiting. also, the request semaphore is @@ -647,30 +647,30 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg) if (ret < 0) return ret; gossip_debug(GOSSIP_DEV_DEBUG, - "pvfs2_devreq_ioctl: priority remount in progress\n"); - list_for_each(tmp, &pvfs2_superblocks) { - pvfs2_sb = - list_entry(tmp, struct pvfs2_sb_info_s, list); - if (pvfs2_sb && (pvfs2_sb->sb)) { + "orangefs_devreq_ioctl: priority remount in progress\n"); + list_for_each(tmp, &orangefs_superblocks) { + orangefs_sb = + list_entry(tmp, struct orangefs_sb_info_s, list); + if (orangefs_sb && (orangefs_sb->sb)) { gossip_debug(GOSSIP_DEV_DEBUG, "Remounting SB %p\n", - pvfs2_sb); + orangefs_sb); - ret = pvfs2_remount(pvfs2_sb->sb); + ret = orangefs_remount(orangefs_sb->sb); if (ret) { gossip_debug(GOSSIP_DEV_DEBUG, "SB %p remount failed\n", - pvfs2_sb); + orangefs_sb); break; } } } gossip_debug(GOSSIP_DEV_DEBUG, - "pvfs2_devreq_ioctl: priority remount complete\n"); + "orangefs_devreq_ioctl: priority remount complete\n"); mutex_unlock(&request_mutex); return ret; - case PVFS_DEV_UPSTREAM: + case ORANGEFS_DEV_UPSTREAM: ret = copy_to_user((void __user *)arg, &upstream_kmod, sizeof(upstream_kmod)); @@ -680,7 +680,7 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg) else return ret; - case PVFS_DEV_CLIENT_MASK: + case ORANGEFS_DEV_CLIENT_MASK: ret = copy_from_user(&mask2_info, (void __user *)arg, sizeof(struct dev_mask2_info_s)); @@ -699,13 +699,13 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg) return ret; - case PVFS_DEV_CLIENT_STRING: + case ORANGEFS_DEV_CLIENT_STRING: ret = copy_from_user(&client_debug_array_string, (void __user *)arg, - PVFS2_MAX_DEBUG_STRING_LEN); + ORANGEFS_MAX_DEBUG_STRING_LEN); if (ret != 0) { pr_info("%s: " - "PVFS_DEV_CLIENT_STRING: copy_from_user failed" + "ORANGEFS_DEV_CLIENT_STRING: copy_from_user failed" "\n", __func__); return -EIO; @@ -753,13 +753,13 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg) debugfs_remove(client_debug_dentry); - pvfs2_client_debug_init(); + orangefs_client_debug_init(); help_string_initialized++; return ret; - case PVFS_DEV_DEBUG: + case ORANGEFS_DEV_DEBUG: ret = copy_from_user(&mask_info, (void __user *)arg, sizeof(mask_info)); @@ -774,21 +774,21 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg) * the kernel debug mask was set when the * kernel module was loaded; don't override * it if the client-core was started without - * a value for PVFS2_KMODMASK. + * a value for ORANGEFS_KMODMASK. */ return 0; } debug_mask_to_string(&mask_info.mask_value, mask_info.mask_type); gossip_debug_mask = mask_info.mask_value; - pr_info("PVFS: kernel debug mask has been modified to " + pr_info("ORANGEFS: kernel debug mask has been modified to " ":%s: :%llx:\n", kernel_debug_string, (unsigned long long)gossip_debug_mask); } else if (mask_info.mask_type == CLIENT_MASK) { debug_mask_to_string(&mask_info.mask_value, mask_info.mask_type); - pr_info("PVFS: client debug mask has been modified to" + pr_info("ORANGEFS: client debug mask has been modified to" ":%s: :%llx:\n", client_debug_string, llu(mask_info.mask_value)); @@ -805,7 +805,7 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg) return -ENOIOCTLCMD; } -static long pvfs2_devreq_ioctl(struct file *file, +static long orangefs_devreq_ioctl(struct file *file, unsigned int command, unsigned long arg) { long ret; @@ -820,8 +820,8 @@ static long pvfs2_devreq_ioctl(struct file *file, #ifdef CONFIG_COMPAT /* CONFIG_COMPAT is in .config */ -/* Compat structure for the PVFS_DEV_MAP ioctl */ -struct PVFS_dev_map_desc32 { +/* Compat structure for the ORANGEFS_DEV_MAP ioctl */ +struct ORANGEFS_dev_map_desc32 { compat_uptr_t ptr; __s32 total_size; __s32 size; @@ -830,12 +830,12 @@ struct PVFS_dev_map_desc32 { static unsigned long translate_dev_map26(unsigned long args, long *error) { - struct PVFS_dev_map_desc32 __user *p32 = (void __user *)args; + struct ORANGEFS_dev_map_desc32 __user *p32 = (void __user *)args; /* * Depending on the architecture, allocate some space on the * user-call-stack based on our expected layout. */ - struct PVFS_dev_map_desc __user *p = + struct ORANGEFS_dev_map_desc __user *p = compat_alloc_user_space(sizeof(*p)); compat_uptr_t addr; @@ -863,7 +863,7 @@ err: * 32 bit user-space apps' ioctl handlers when kernel modules * is compiled as a 64 bit one */ -static long pvfs2_devreq_compat_ioctl(struct file *filp, unsigned int cmd, +static long orangefs_devreq_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long args) { long ret; @@ -873,7 +873,7 @@ static long pvfs2_devreq_compat_ioctl(struct file *filp, unsigned int cmd, ret = check_ioctl_command(cmd); if (ret < 0) return ret; - if (cmd == PVFS_DEV_MAP) { + if (cmd == ORANGEFS_DEV_MAP) { /* * convert the arguments to what we expect internally * in kernel space @@ -896,89 +896,89 @@ static long pvfs2_devreq_compat_ioctl(struct file *filp, unsigned int cmd, * not noticed until we tried to compile on power pc... */ #if (defined(CONFIG_COMPAT) && !defined(HAVE_REGISTER_IOCTL32_CONVERSION)) || !defined(CONFIG_COMPAT) -static int pvfs2_ioctl32_init(void) +static int orangefs_ioctl32_init(void) { return 0; } -static void pvfs2_ioctl32_cleanup(void) +static void orangefs_ioctl32_cleanup(void) { return; } #endif /* the assigned character device major number */ -static int pvfs2_dev_major; +static int orangefs_dev_major; /* - * Initialize pvfs2 device specific state: + * Initialize orangefs device specific state: * Must be called at module load time only */ -int pvfs2_dev_init(void) +int orangefs_dev_init(void) { int ret; /* register the ioctl32 sub-system */ - ret = pvfs2_ioctl32_init(); + ret = orangefs_ioctl32_init(); if (ret < 0) return ret; - /* register pvfs2-req device */ - pvfs2_dev_major = register_chrdev(0, - PVFS2_REQDEVICE_NAME, - &pvfs2_devreq_file_operations); - if (pvfs2_dev_major < 0) { + /* register orangefs-req device */ + orangefs_dev_major = register_chrdev(0, + ORANGEFS_REQDEVICE_NAME, + &orangefs_devreq_file_operations); + if (orangefs_dev_major < 0) { gossip_debug(GOSSIP_DEV_DEBUG, "Failed to register /dev/%s (error %d)\n", - PVFS2_REQDEVICE_NAME, pvfs2_dev_major); - pvfs2_ioctl32_cleanup(); - return pvfs2_dev_major; + ORANGEFS_REQDEVICE_NAME, orangefs_dev_major); + orangefs_ioctl32_cleanup(); + return orangefs_dev_major; } gossip_debug(GOSSIP_DEV_DEBUG, "*** /dev/%s character device registered ***\n", - PVFS2_REQDEVICE_NAME); + ORANGEFS_REQDEVICE_NAME); gossip_debug(GOSSIP_DEV_DEBUG, "'mknod /dev/%s c %d 0'.\n", - PVFS2_REQDEVICE_NAME, pvfs2_dev_major); + ORANGEFS_REQDEVICE_NAME, orangefs_dev_major); return 0; } -void pvfs2_dev_cleanup(void) +void orangefs_dev_cleanup(void) { - unregister_chrdev(pvfs2_dev_major, PVFS2_REQDEVICE_NAME); + unregister_chrdev(orangefs_dev_major, ORANGEFS_REQDEVICE_NAME); gossip_debug(GOSSIP_DEV_DEBUG, "*** /dev/%s character device unregistered ***\n", - PVFS2_REQDEVICE_NAME); + ORANGEFS_REQDEVICE_NAME); /* unregister the ioctl32 sub-system */ - pvfs2_ioctl32_cleanup(); + orangefs_ioctl32_cleanup(); } -static unsigned int pvfs2_devreq_poll(struct file *file, +static unsigned int orangefs_devreq_poll(struct file *file, struct poll_table_struct *poll_table) { int poll_revent_mask = 0; if (open_access_count == 1) { - poll_wait(file, &pvfs2_request_list_waitq, poll_table); + poll_wait(file, &orangefs_request_list_waitq, poll_table); - spin_lock(&pvfs2_request_list_lock); - if (!list_empty(&pvfs2_request_list)) + spin_lock(&orangefs_request_list_lock); + if (!list_empty(&orangefs_request_list)) poll_revent_mask |= POLL_IN; - spin_unlock(&pvfs2_request_list_lock); + spin_unlock(&orangefs_request_list_lock); } return poll_revent_mask; } -const struct file_operations pvfs2_devreq_file_operations = { +const struct file_operations orangefs_devreq_file_operations = { .owner = THIS_MODULE, - .read = pvfs2_devreq_read, - .write_iter = pvfs2_devreq_write_iter, - .open = pvfs2_devreq_open, - .release = pvfs2_devreq_release, - .unlocked_ioctl = pvfs2_devreq_ioctl, + .read = orangefs_devreq_read, + .write_iter = orangefs_devreq_write_iter, + .open = orangefs_devreq_open, + .release = orangefs_devreq_release, + .unlocked_ioctl = orangefs_devreq_ioctl, #ifdef CONFIG_COMPAT /* CONFIG_COMPAT is in .config */ - .compat_ioctl = pvfs2_devreq_compat_ioctl, + .compat_ioctl = orangefs_devreq_compat_ioctl, #endif - .poll = pvfs2_devreq_poll + .poll = orangefs_devreq_poll }; diff --git a/fs/orangefs/dir.c b/fs/orangefs/dir.c index 3049cd61b700..452d589b9747 100644 --- a/fs/orangefs/dir.c +++ b/fs/orangefs/dir.c @@ -10,7 +10,7 @@ struct readdir_handle_s { int buffer_index; - struct pvfs2_readdir_response_s readdir_response; + struct orangefs_readdir_response_s readdir_response; void *dents_buf; }; @@ -18,28 +18,28 @@ struct readdir_handle_s { * decode routine needed by kmod to make sense of the shared page for readdirs. */ static long decode_dirents(char *ptr, size_t size, - struct pvfs2_readdir_response_s *readdir) + struct orangefs_readdir_response_s *readdir) { int i; - struct pvfs2_readdir_response_s *rd = - (struct pvfs2_readdir_response_s *) ptr; + struct orangefs_readdir_response_s *rd = + (struct orangefs_readdir_response_s *) ptr; char *buf = ptr; - if (size < offsetof(struct pvfs2_readdir_response_s, dirent_array)) + if (size < offsetof(struct orangefs_readdir_response_s, dirent_array)) return -EINVAL; readdir->token = rd->token; - readdir->pvfs_dirent_outcount = rd->pvfs_dirent_outcount; - readdir->dirent_array = kcalloc(readdir->pvfs_dirent_outcount, + readdir->orangefs_dirent_outcount = rd->orangefs_dirent_outcount; + readdir->dirent_array = kcalloc(readdir->orangefs_dirent_outcount, sizeof(*readdir->dirent_array), GFP_KERNEL); if (readdir->dirent_array == NULL) return -ENOMEM; - buf += offsetof(struct pvfs2_readdir_response_s, dirent_array); - size -= offsetof(struct pvfs2_readdir_response_s, dirent_array); + buf += offsetof(struct orangefs_readdir_response_s, dirent_array); + size -= offsetof(struct orangefs_readdir_response_s, dirent_array); - for (i = 0; i < readdir->pvfs_dirent_outcount; i++) { + for (i = 0; i < readdir->orangefs_dirent_outcount; i++) { __u32 len; if (size < 4) @@ -60,7 +60,7 @@ static long decode_dirents(char *ptr, size_t size, buf += len; readdir->dirent_array[i].khandle = - *(struct pvfs2_khandle *) buf; + *(struct orangefs_khandle *) buf; buf += 16; } return buf - ptr; @@ -98,7 +98,7 @@ static long readdir_handle_ctor(struct readdir_handle_s *rhandle, void *buf, return ret; } -static void readdir_handle_dtor(struct pvfs2_bufmap *bufmap, +static void readdir_handle_dtor(struct orangefs_bufmap *bufmap, struct readdir_handle_s *rhandle) { if (rhandle == NULL) @@ -123,9 +123,9 @@ static void readdir_handle_dtor(struct pvfs2_bufmap *bufmap, /* * Read directory entries from an instance of an open directory. */ -static int pvfs2_readdir(struct file *file, struct dir_context *ctx) +static int orangefs_readdir(struct file *file, struct dir_context *ctx) { - struct pvfs2_bufmap *bufmap = NULL; + struct orangefs_bufmap *bufmap = NULL; int ret = 0; int buffer_index; /* @@ -136,8 +136,8 @@ static int pvfs2_readdir(struct file *file, struct dir_context *ctx) __u64 pos = 0; ino_t ino = 0; struct dentry *dentry = file->f_path.dentry; - struct pvfs2_kernel_op_s *new_op = NULL; - struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(dentry->d_inode); + struct orangefs_kernel_op_s *new_op = NULL; + struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(dentry->d_inode); int buffer_full = 0; struct readdir_handle_s rhandle; int i = 0; @@ -155,26 +155,26 @@ static int pvfs2_readdir(struct file *file, struct dir_context *ctx) pos = (__u64) ctx->pos; /* are we done? */ - if (pos == PVFS_READDIR_END) { + if (pos == ORANGEFS_READDIR_END) { gossip_debug(GOSSIP_DIR_DEBUG, "Skipping to termination path\n"); return 0; } gossip_debug(GOSSIP_DIR_DEBUG, - "pvfs2_readdir called on %s (pos=%llu)\n", + "orangefs_readdir called on %s (pos=%llu)\n", dentry->d_name.name, llu(pos)); rhandle.buffer_index = -1; rhandle.dents_buf = NULL; memset(&rhandle.readdir_response, 0, sizeof(rhandle.readdir_response)); - new_op = op_alloc(PVFS2_VFS_OP_READDIR); + new_op = op_alloc(ORANGEFS_VFS_OP_READDIR); if (!new_op) return -ENOMEM; new_op->uses_shared_memory = 1; - new_op->upcall.req.readdir.refn = pvfs2_inode->refn; + new_op->upcall.req.readdir.refn = orangefs_inode->refn; new_op->upcall.req.readdir.max_dirent_count = MAX_DIRENT_COUNT_READDIR; gossip_debug(GOSSIP_DIR_DEBUG, @@ -187,14 +187,14 @@ static int pvfs2_readdir(struct file *file, struct dir_context *ctx) get_new_buffer_index: ret = readdir_index_get(&bufmap, &buffer_index); if (ret < 0) { - gossip_lerr("pvfs2_readdir: readdir_index_get() failure (%d)\n", + gossip_lerr("orangefs_readdir: readdir_index_get() failure (%d)\n", ret); goto out_free_op; } new_op->upcall.req.readdir.buf_index = buffer_index; ret = service_operation(new_op, - "pvfs2_readdir", + "orangefs_readdir", get_interruptible_flag(dentry->d_inode)); gossip_debug(GOSSIP_DIR_DEBUG, @@ -238,7 +238,7 @@ get_new_buffer_index: new_op->downcall.trailer_size, buffer_index); if (bytes_decoded < 0) { - gossip_err("pvfs2_readdir: Could not decode trailer buffer into a readdir response %d\n", + gossip_err("orangefs_readdir: Could not decode trailer buffer into a readdir response %d\n", ret); ret = bytes_decoded; readdir_index_put(bufmap, buffer_index); @@ -246,7 +246,7 @@ get_new_buffer_index: } if (bytes_decoded != new_op->downcall.trailer_size) { - gossip_err("pvfs2_readdir: # bytes decoded (%ld) " + gossip_err("orangefs_readdir: # bytes decoded (%ld) " "!= trailer size (%ld)\n", bytes_decoded, (long)new_op->downcall.trailer_size); @@ -255,7 +255,7 @@ get_new_buffer_index: } /* - * pvfs2 doesn't actually store dot and dot-dot, but + * orangefs doesn't actually store dot and dot-dot, but * we need to have them represented. */ if (pos == 0) { @@ -279,19 +279,19 @@ get_new_buffer_index: } /* - * we stored PVFS_ITERATE_NEXT in ctx->pos last time around + * we stored ORANGEFS_ITERATE_NEXT in ctx->pos last time around * to prevent "finding" dot and dot-dot on any iteration * other than the first. */ - if (ctx->pos == PVFS_ITERATE_NEXT) + if (ctx->pos == ORANGEFS_ITERATE_NEXT) ctx->pos = 0; for (i = ctx->pos; - i < rhandle.readdir_response.pvfs_dirent_outcount; + i < rhandle.readdir_response.orangefs_dirent_outcount; i++) { len = rhandle.readdir_response.dirent_array[i].d_length; current_entry = rhandle.readdir_response.dirent_array[i].d_name; - current_ino = pvfs2_khandle_to_ino( + current_ino = orangefs_khandle_to_ino( &(rhandle.readdir_response.dirent_array[i].khandle)); gossip_debug(GOSSIP_DIR_DEBUG, @@ -323,28 +323,28 @@ get_new_buffer_index: */ if (ret) { *ptoken = rhandle.readdir_response.token; - ctx->pos = PVFS_ITERATE_NEXT; + ctx->pos = ORANGEFS_ITERATE_NEXT; } /* * Did we hit the end of the directory? */ - if (rhandle.readdir_response.token == PVFS_READDIR_END && + if (rhandle.readdir_response.token == ORANGEFS_READDIR_END && !buffer_full) { gossip_debug(GOSSIP_DIR_DEBUG, - "End of dir detected; setting ctx->pos to PVFS_READDIR_END.\n"); - ctx->pos = PVFS_READDIR_END; + "End of dir detected; setting ctx->pos to ORANGEFS_READDIR_END.\n"); + ctx->pos = ORANGEFS_READDIR_END; } out_destroy_handle: readdir_handle_dtor(bufmap, &rhandle); out_free_op: op_release(new_op); - gossip_debug(GOSSIP_DIR_DEBUG, "pvfs2_readdir returning %d\n", ret); + gossip_debug(GOSSIP_DIR_DEBUG, "orangefs_readdir returning %d\n", ret); return ret; } -static int pvfs2_dir_open(struct inode *inode, struct file *file) +static int orangefs_dir_open(struct inode *inode, struct file *file) { __u64 *ptoken; @@ -353,21 +353,21 @@ static int pvfs2_dir_open(struct inode *inode, struct file *file) return -ENOMEM; ptoken = file->private_data; - *ptoken = PVFS_READDIR_START; + *ptoken = ORANGEFS_READDIR_START; return 0; } -static int pvfs2_dir_release(struct inode *inode, struct file *file) +static int orangefs_dir_release(struct inode *inode, struct file *file) { - pvfs2_flush_inode(inode); + orangefs_flush_inode(inode); kfree(file->private_data); return 0; } -/** PVFS2 implementation of VFS directory operations */ -const struct file_operations pvfs2_dir_operations = { +/** ORANGEFS implementation of VFS directory operations */ +const struct file_operations orangefs_dir_operations = { .read = generic_read_dir, - .iterate = pvfs2_readdir, - .open = pvfs2_dir_open, - .release = pvfs2_dir_release, + .iterate = orangefs_readdir, + .open = orangefs_dir_open, + .release = orangefs_dir_release, }; diff --git a/fs/orangefs/downcall.h b/fs/orangefs/downcall.h index e372f446f6ba..72d4cac54821 100644 --- a/fs/orangefs/downcall.h +++ b/fs/orangefs/downcall.h @@ -15,42 +15,42 @@ * Sanitized the device-client core interaction * for clean 32-64 bit usage */ -struct pvfs2_io_response { +struct orangefs_io_response { __s64 amt_complete; }; -struct pvfs2_lookup_response { - struct pvfs2_object_kref refn; +struct orangefs_lookup_response { + struct orangefs_object_kref refn; }; -struct pvfs2_create_response { - struct pvfs2_object_kref refn; +struct orangefs_create_response { + struct orangefs_object_kref refn; }; -struct pvfs2_symlink_response { - struct pvfs2_object_kref refn; +struct orangefs_symlink_response { + struct orangefs_object_kref refn; }; -struct pvfs2_getattr_response { - struct PVFS_sys_attr_s attributes; - char link_target[PVFS2_NAME_LEN]; +struct orangefs_getattr_response { + struct ORANGEFS_sys_attr_s attributes; + char link_target[ORANGEFS_NAME_LEN]; }; -struct pvfs2_mkdir_response { - struct pvfs2_object_kref refn; +struct orangefs_mkdir_response { + struct orangefs_object_kref refn; }; /* * duplication of some system interface structures so that I don't have * to allocate extra memory */ -struct pvfs2_dirent { +struct orangefs_dirent { char *d_name; int d_length; - struct pvfs2_khandle khandle; + struct orangefs_khandle khandle; }; -struct pvfs2_statfs_response { +struct orangefs_statfs_response { __s64 block_size; __s64 blocks_total; __s64 blocks_avail; @@ -58,47 +58,47 @@ struct pvfs2_statfs_response { __s64 files_avail; }; -struct pvfs2_fs_mount_response { +struct orangefs_fs_mount_response { __s32 fs_id; __s32 id; - struct pvfs2_khandle root_khandle; + struct orangefs_khandle root_khandle; }; /* the getxattr response is the attribute value */ -struct pvfs2_getxattr_response { +struct orangefs_getxattr_response { __s32 val_sz; __s32 __pad1; - char val[PVFS_MAX_XATTR_VALUELEN]; + char val[ORANGEFS_MAX_XATTR_VALUELEN]; }; /* the listxattr response is an array of attribute names */ -struct pvfs2_listxattr_response { +struct orangefs_listxattr_response { __s32 returned_count; __s32 __pad1; __u64 token; - char key[PVFS_MAX_XATTR_LISTLEN * PVFS_MAX_XATTR_NAMELEN]; + char key[ORANGEFS_MAX_XATTR_LISTLEN * ORANGEFS_MAX_XATTR_NAMELEN]; __s32 keylen; __s32 __pad2; - __s32 lengths[PVFS_MAX_XATTR_LISTLEN]; + __s32 lengths[ORANGEFS_MAX_XATTR_LISTLEN]; }; -struct pvfs2_param_response { +struct orangefs_param_response { __s64 value; }; #define PERF_COUNT_BUF_SIZE 4096 -struct pvfs2_perf_count_response { +struct orangefs_perf_count_response { char buffer[PERF_COUNT_BUF_SIZE]; }; #define FS_KEY_BUF_SIZE 4096 -struct pvfs2_fs_key_response { +struct orangefs_fs_key_response { __s32 fs_keylen; __s32 __pad1; char fs_key[FS_KEY_BUF_SIZE]; }; -struct pvfs2_downcall_s { +struct orangefs_downcall_s { __s32 type; __s32 status; /* currently trailer is used only by readdir */ @@ -106,28 +106,28 @@ struct pvfs2_downcall_s { char *trailer_buf; union { - struct pvfs2_io_response io; - struct pvfs2_lookup_response lookup; - struct pvfs2_create_response create; - struct pvfs2_symlink_response sym; - struct pvfs2_getattr_response getattr; - struct pvfs2_mkdir_response mkdir; - struct pvfs2_statfs_response statfs; - struct pvfs2_fs_mount_response fs_mount; - struct pvfs2_getxattr_response getxattr; - struct pvfs2_listxattr_response listxattr; - struct pvfs2_param_response param; - struct pvfs2_perf_count_response perf_count; - struct pvfs2_fs_key_response fs_key; + struct orangefs_io_response io; + struct orangefs_lookup_response lookup; + struct orangefs_create_response create; + struct orangefs_symlink_response sym; + struct orangefs_getattr_response getattr; + struct orangefs_mkdir_response mkdir; + struct orangefs_statfs_response statfs; + struct orangefs_fs_mount_response fs_mount; + struct orangefs_getxattr_response getxattr; + struct orangefs_listxattr_response listxattr; + struct orangefs_param_response param; + struct orangefs_perf_count_response perf_count; + struct orangefs_fs_key_response fs_key; } resp; }; -struct pvfs2_readdir_response_s { +struct orangefs_readdir_response_s { __u64 token; __u64 directory_version; __u32 __pad2; - __u32 pvfs_dirent_outcount; - struct pvfs2_dirent *dirent_array; + __u32 orangefs_dirent_outcount; + struct orangefs_dirent *dirent_array; }; #endif /* __DOWNCALL_H */ diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 3a8140f289f6..ae5d8ed67ed5 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -29,7 +29,7 @@ do { \ * can futher be kernel-space or user-space addresses. * or it can pointers to struct page's */ -static int precopy_buffers(struct pvfs2_bufmap *bufmap, +static int precopy_buffers(struct orangefs_bufmap *bufmap, int buffer_index, struct iov_iter *iter, size_t total_size) @@ -42,10 +42,10 @@ static int precopy_buffers(struct pvfs2_bufmap *bufmap, if (total_size) { - ret = pvfs_bufmap_copy_from_iovec(bufmap, - iter, - buffer_index, - total_size); + ret = orangefs_bufmap_copy_from_iovec(bufmap, + iter, + buffer_index, + total_size); if (ret < 0) gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n", __func__, @@ -66,7 +66,7 @@ static int precopy_buffers(struct pvfs2_bufmap *bufmap, * can futher be kernel-space or user-space addresses. * or it can pointers to struct page's */ -static int postcopy_buffers(struct pvfs2_bufmap *bufmap, +static int postcopy_buffers(struct orangefs_bufmap *bufmap, int buffer_index, struct iov_iter *iter, size_t total_size) @@ -78,10 +78,10 @@ static int postcopy_buffers(struct pvfs2_bufmap *bufmap, * struct page pointers. */ if (total_size) { - ret = pvfs_bufmap_copy_to_iovec(bufmap, - iter, - buffer_index, - total_size); + ret = orangefs_bufmap_copy_to_iovec(bufmap, + iter, + buffer_index, + total_size); if (ret < 0) gossip_err("%s: Failed to copy-out buffers. Please make sure that the pvfs2-client is running (%ld)\n", __func__, @@ -93,34 +93,34 @@ static int postcopy_buffers(struct pvfs2_bufmap *bufmap, /* * Post and wait for the I/O upcall to finish */ -static ssize_t wait_for_direct_io(enum PVFS_io_type type, struct inode *inode, +static ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode, loff_t *offset, struct iov_iter *iter, size_t total_size, loff_t readahead_size) { - struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); - struct pvfs2_khandle *handle = &pvfs2_inode->refn.khandle; - struct pvfs2_bufmap *bufmap = NULL; - struct pvfs2_kernel_op_s *new_op = NULL; + struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); + struct orangefs_khandle *handle = &orangefs_inode->refn.khandle; + struct orangefs_bufmap *bufmap = NULL; + struct orangefs_kernel_op_s *new_op = NULL; int buffer_index = -1; ssize_t ret; - new_op = op_alloc(PVFS2_VFS_OP_FILE_IO); + new_op = op_alloc(ORANGEFS_VFS_OP_FILE_IO); if (!new_op) { ret = -ENOMEM; goto out; } /* synchronous I/O */ - new_op->upcall.req.io.async_vfs_io = PVFS_VFS_SYNC_IO; + new_op->upcall.req.io.async_vfs_io = ORANGEFS_VFS_SYNC_IO; new_op->upcall.req.io.readahead_size = readahead_size; new_op->upcall.req.io.io_type = type; - new_op->upcall.req.io.refn = pvfs2_inode->refn; + new_op->upcall.req.io.refn = orangefs_inode->refn; populate_shared_memory: /* get a shared buffer index */ - ret = pvfs_bufmap_get(&bufmap, &buffer_index); + ret = orangefs_bufmap_get(&bufmap, &buffer_index); if (ret < 0) { gossip_debug(GOSSIP_FILE_DEBUG, - "%s: pvfs_bufmap_get failure (%ld)\n", + "%s: orangefs_bufmap_get failure (%ld)\n", __func__, (long)ret); goto out; } @@ -146,7 +146,7 @@ populate_shared_memory: * Stage 1: copy the buffers into client-core's address space * precopy_buffers only pertains to writes. */ - if (type == PVFS_IO_WRITE) { + if (type == ORANGEFS_IO_WRITE) { ret = precopy_buffers(bufmap, buffer_index, iter, @@ -163,14 +163,14 @@ populate_shared_memory: /* Stage 2: Service the I/O operation */ ret = service_operation(new_op, - type == PVFS_IO_WRITE ? + type == ORANGEFS_IO_WRITE ? "file_write" : "file_read", get_interruptible_flag(inode)); /* * If service_operation() returns -EAGAIN #and# the operation was - * purged from pvfs2_request_list or htable_ops_in_progress, then + * purged from orangefs_request_list or htable_ops_in_progress, then * we know that the client was restarted, causing the shared memory * area to be wiped clean. To restart a write operation in this * case, we must re-copy the data from the user's iovec to a NEW @@ -178,7 +178,7 @@ populate_shared_memory: * a new shared memory location. */ if (ret == -EAGAIN && op_state_purged(new_op)) { - pvfs_bufmap_put(bufmap, buffer_index); + orangefs_bufmap_put(bufmap, buffer_index); gossip_debug(GOSSIP_FILE_DEBUG, "%s:going to repopulate_shared_memory.\n", __func__); @@ -199,7 +199,7 @@ populate_shared_memory: else gossip_err("%s: error in %s handle %pU, returning %zd\n", __func__, - type == PVFS_IO_READ ? + type == ORANGEFS_IO_READ ? "read from" : "write to", handle, ret); goto out; @@ -209,7 +209,7 @@ populate_shared_memory: * Stage 3: Post copy buffers from client-core's address space * postcopy_buffers only pertains to reads. */ - if (type == PVFS_IO_READ) { + if (type == ORANGEFS_IO_READ) { ret = postcopy_buffers(bufmap, buffer_index, iter, @@ -243,7 +243,7 @@ populate_shared_memory: out: if (buffer_index >= 0) { - pvfs_bufmap_put(bufmap, buffer_index); + orangefs_bufmap_put(bufmap, buffer_index); gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): PUT buffer_index %d\n", __func__, handle, buffer_index); @@ -263,12 +263,12 @@ out: * augmented/extended metadata attached to the file. * Note: File extended attributes override any mount options. */ -static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, +static ssize_t do_readv_writev(enum ORANGEFS_io_type type, struct file *file, loff_t *offset, struct iov_iter *iter) { struct inode *inode = file->f_mapping->host; - struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); - struct pvfs2_khandle *handle = &pvfs2_inode->refn.khandle; + struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); + struct orangefs_khandle *handle = &orangefs_inode->refn.khandle; size_t count = iov_iter_count(iter); ssize_t total_count = 0; ssize_t ret = -EINVAL; @@ -279,7 +279,7 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, handle, (int)count); - if (type == PVFS_IO_WRITE) { + if (type == ORANGEFS_IO_WRITE) { gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): proceeding with offset : %llu, " "size %d\n", @@ -299,8 +299,8 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, size_t amt_complete; /* how much to transfer in this loop iteration */ - if (each_count > pvfs_bufmap_size_query()) - each_count = pvfs_bufmap_size_query(); + if (each_count > orangefs_bufmap_size_query()) + each_count = orangefs_bufmap_size_query(); gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): size of each_count(%d)\n", @@ -346,10 +346,10 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, ret = total_count; out: if (ret > 0) { - if (type == PVFS_IO_READ) { + if (type == ORANGEFS_IO_READ) { file_accessed(file); } else { - SetMtimeFlag(pvfs2_inode); + SetMtimeFlag(orangefs_inode); inode->i_mtime = CURRENT_TIME; mark_inode_dirty_sync(inode); } @@ -368,19 +368,19 @@ out: * Read data from a specified offset in a file (referenced by inode). * Data may be placed either in a user or kernel buffer. */ -ssize_t pvfs2_inode_read(struct inode *inode, - struct iov_iter *iter, - loff_t *offset, - loff_t readahead_size) +ssize_t orangefs_inode_read(struct inode *inode, + struct iov_iter *iter, + loff_t *offset, + loff_t readahead_size) { - struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); + struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); size_t count = iov_iter_count(iter); size_t bufmap_size; ssize_t ret = -EINVAL; - g_pvfs2_stats.reads++; + g_orangefs_stats.reads++; - bufmap_size = pvfs_bufmap_size_query(); + bufmap_size = orangefs_bufmap_size_query(); if (count > bufmap_size) { gossip_debug(GOSSIP_FILE_DEBUG, "%s: count is too large (%zd/%zd)!\n", @@ -391,11 +391,11 @@ ssize_t pvfs2_inode_read(struct inode *inode, gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU) %zd@%llu\n", __func__, - &pvfs2_inode->refn.khandle, + &orangefs_inode->refn.khandle, count, llu(*offset)); - ret = wait_for_direct_io(PVFS_IO_READ, inode, offset, iter, + ret = wait_for_direct_io(ORANGEFS_IO_READ, inode, offset, iter, count, readahead_size); if (ret > 0) *offset += ret; @@ -403,13 +403,13 @@ ssize_t pvfs2_inode_read(struct inode *inode, gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): Value(%zd) returned.\n", __func__, - &pvfs2_inode->refn.khandle, + &orangefs_inode->refn.khandle, ret); return ret; } -static ssize_t pvfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) +static ssize_t orangefs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; loff_t pos = *(&iocb->ki_pos); @@ -417,17 +417,17 @@ static ssize_t pvfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) BUG_ON(iocb->private); - gossip_debug(GOSSIP_FILE_DEBUG, "pvfs2_file_read_iter\n"); + gossip_debug(GOSSIP_FILE_DEBUG, "orangefs_file_read_iter\n"); - g_pvfs2_stats.reads++; + g_orangefs_stats.reads++; - rc = do_readv_writev(PVFS_IO_READ, file, &pos, iter); + rc = do_readv_writev(ORANGEFS_IO_READ, file, &pos, iter); iocb->ki_pos = pos; return rc; } -static ssize_t pvfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *iter) +static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; loff_t pos; @@ -435,23 +435,23 @@ static ssize_t pvfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *iter) BUG_ON(iocb->private); - gossip_debug(GOSSIP_FILE_DEBUG, "pvfs2_file_write_iter\n"); + gossip_debug(GOSSIP_FILE_DEBUG, "orangefs_file_write_iter\n"); mutex_lock(&file->f_mapping->host->i_mutex); /* Make sure generic_write_checks sees an up to date inode size. */ if (file->f_flags & O_APPEND) { - rc = pvfs2_inode_getattr(file->f_mapping->host, - PVFS_ATTR_SYS_SIZE); + rc = orangefs_inode_getattr(file->f_mapping->host, + ORANGEFS_ATTR_SYS_SIZE); if (rc) { - gossip_err("%s: pvfs2_inode_getattr failed, rc:%zd:.\n", + gossip_err("%s: orangefs_inode_getattr failed, rc:%zd:.\n", __func__, rc); goto out; } } if (file->f_pos > i_size_read(file->f_mapping->host)) - pvfs2_i_size_write(file->f_mapping->host, file->f_pos); + orangefs_i_size_write(file->f_mapping->host, file->f_pos); rc = generic_write_checks(iocb, iter); @@ -468,7 +468,7 @@ static ssize_t pvfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *iter) */ pos = *(&iocb->ki_pos); - rc = do_readv_writev(PVFS_IO_WRITE, + rc = do_readv_writev(ORANGEFS_IO_WRITE, file, &pos, iter); @@ -479,7 +479,7 @@ static ssize_t pvfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *iter) } iocb->ki_pos = pos; - g_pvfs2_stats.writes++; + g_orangefs_stats.writes++; out: @@ -490,14 +490,14 @@ out: /* * Perform a miscellaneous operation on a file. */ -static long pvfs2_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { int ret = -ENOTTY; __u64 val = 0; unsigned long uval; gossip_debug(GOSSIP_FILE_DEBUG, - "pvfs2_ioctl: called with cmd %d\n", + "orangefs_ioctl: called with cmd %d\n", cmd); /* @@ -506,17 +506,17 @@ static long pvfs2_ioctl(struct file *file, unsigned int cmd, unsigned long arg) */ if (cmd == FS_IOC_GETFLAGS) { val = 0; - ret = pvfs2_inode_getxattr(file_inode(file), - PVFS2_XATTR_NAME_DEFAULT_PREFIX, - "user.pvfs2.meta_hint", - &val, sizeof(val)); + ret = orangefs_inode_getxattr(file_inode(file), + ORANGEFS_XATTR_NAME_DEFAULT_PREFIX, + "user.pvfs2.meta_hint", + &val, sizeof(val)); if (ret < 0 && ret != -ENODATA) return ret; else if (ret == -ENODATA) val = 0; uval = val; gossip_debug(GOSSIP_FILE_DEBUG, - "pvfs2_ioctl: FS_IOC_GETFLAGS: %llu\n", + "orangefs_ioctl: FS_IOC_GETFLAGS: %llu\n", (unsigned long long)uval); return put_user(uval, (int __user *)arg); } else if (cmd == FS_IOC_SETFLAGS) { @@ -524,25 +524,25 @@ static long pvfs2_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (get_user(uval, (int __user *)arg)) return -EFAULT; /* - * PVFS_MIRROR_FL is set internally when the mirroring mode + * ORANGEFS_MIRROR_FL is set internally when the mirroring mode * is turned on for a file. The user is not allowed to turn * on this bit, but the bit is present if the user first gets * the flags and then updates the flags with some new * settings. So, we ignore it in the following edit. bligon. */ - if ((uval & ~PVFS_MIRROR_FL) & + if ((uval & ~ORANGEFS_MIRROR_FL) & (~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NOATIME_FL))) { - gossip_err("pvfs2_ioctl: the FS_IOC_SETFLAGS only supports setting one of FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NOATIME_FL\n"); + gossip_err("orangefs_ioctl: the FS_IOC_SETFLAGS only supports setting one of FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NOATIME_FL\n"); return -EINVAL; } val = uval; gossip_debug(GOSSIP_FILE_DEBUG, - "pvfs2_ioctl: FS_IOC_SETFLAGS: %llu\n", + "orangefs_ioctl: FS_IOC_SETFLAGS: %llu\n", (unsigned long long)val); - ret = pvfs2_inode_setxattr(file_inode(file), - PVFS2_XATTR_NAME_DEFAULT_PREFIX, - "user.pvfs2.meta_hint", - &val, sizeof(val), 0); + ret = orangefs_inode_setxattr(file_inode(file), + ORANGEFS_XATTR_NAME_DEFAULT_PREFIX, + "user.pvfs2.meta_hint", + &val, sizeof(val), 0); } return ret; @@ -551,10 +551,10 @@ static long pvfs2_ioctl(struct file *file, unsigned int cmd, unsigned long arg) /* * Memory map a region of a file. */ -static int pvfs2_file_mmap(struct file *file, struct vm_area_struct *vma) +static int orangefs_file_mmap(struct file *file, struct vm_area_struct *vma) { gossip_debug(GOSSIP_FILE_DEBUG, - "pvfs2_file_mmap: called on %s\n", + "orangefs_file_mmap: called on %s\n", (file ? (char *)file->f_path.dentry->d_name.name : (char *)"Unknown")); @@ -575,13 +575,13 @@ static int pvfs2_file_mmap(struct file *file, struct vm_area_struct *vma) * * \note Not called when each file is closed. */ -static int pvfs2_file_release(struct inode *inode, struct file *file) +static int orangefs_file_release(struct inode *inode, struct file *file) { gossip_debug(GOSSIP_FILE_DEBUG, - "pvfs2_file_release: called on %s\n", + "orangefs_file_release: called on %s\n", file->f_path.dentry->d_name.name); - pvfs2_flush_inode(inode); + orangefs_flush_inode(inode); /* * remove all associated inode pages from the page cache and mmap @@ -599,35 +599,35 @@ static int pvfs2_file_release(struct inode *inode, struct file *file) /* * Push all data for a specific file onto permanent storage. */ -static int pvfs2_fsync(struct file *file, +static int orangefs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { int ret = -EINVAL; - struct pvfs2_inode_s *pvfs2_inode = - PVFS2_I(file->f_path.dentry->d_inode); - struct pvfs2_kernel_op_s *new_op = NULL; + struct orangefs_inode_s *orangefs_inode = + ORANGEFS_I(file->f_path.dentry->d_inode); + struct orangefs_kernel_op_s *new_op = NULL; /* required call */ filemap_write_and_wait_range(file->f_mapping, start, end); - new_op = op_alloc(PVFS2_VFS_OP_FSYNC); + new_op = op_alloc(ORANGEFS_VFS_OP_FSYNC); if (!new_op) return -ENOMEM; - new_op->upcall.req.fsync.refn = pvfs2_inode->refn; + new_op->upcall.req.fsync.refn = orangefs_inode->refn; ret = service_operation(new_op, - "pvfs2_fsync", + "orangefs_fsync", get_interruptible_flag(file->f_path.dentry->d_inode)); gossip_debug(GOSSIP_FILE_DEBUG, - "pvfs2_fsync got return value of %d\n", + "orangefs_fsync got return value of %d\n", ret); op_release(new_op); - pvfs2_flush_inode(file->f_path.dentry->d_inode); + orangefs_flush_inode(file->f_path.dentry->d_inode); return ret; } @@ -640,36 +640,36 @@ static int pvfs2_fsync(struct file *file, * Future upgrade could support SEEK_DATA and SEEK_HOLE but would * require much changes to the FS */ -static loff_t pvfs2_file_llseek(struct file *file, loff_t offset, int origin) +static loff_t orangefs_file_llseek(struct file *file, loff_t offset, int origin) { int ret = -EINVAL; struct inode *inode = file->f_path.dentry->d_inode; if (!inode) { - gossip_err("pvfs2_file_llseek: invalid inode (NULL)\n"); + gossip_err("orangefs_file_llseek: invalid inode (NULL)\n"); return ret; } - if (origin == PVFS2_SEEK_END) { + if (origin == ORANGEFS_SEEK_END) { /* * revalidate the inode's file size. * NOTE: We are only interested in file size here, * so we set mask accordingly. */ - ret = pvfs2_inode_getattr(inode, PVFS_ATTR_SYS_SIZE); + ret = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_SIZE); if (ret) { gossip_debug(GOSSIP_FILE_DEBUG, "%s:%s:%d calling make bad inode\n", __FILE__, __func__, __LINE__); - pvfs2_make_bad_inode(inode); + orangefs_make_bad_inode(inode); return ret; } } gossip_debug(GOSSIP_FILE_DEBUG, - "pvfs2_file_llseek: offset is %ld | origin is %d" + "orangefs_file_llseek: offset is %ld | origin is %d" " | inode size is %lu\n", (long)offset, origin, @@ -682,11 +682,11 @@ static loff_t pvfs2_file_llseek(struct file *file, loff_t offset, int origin) * Support local locks (locks that only this kernel knows about) * if Orangefs was mounted -o local_lock. */ -static int pvfs2_lock(struct file *filp, int cmd, struct file_lock *fl) +static int orangefs_lock(struct file *filp, int cmd, struct file_lock *fl) { int rc = -EINVAL; - if (PVFS2_SB(filp->f_inode->i_sb)->flags & PVFS2_OPT_LOCAL_LOCK) { + if (ORANGEFS_SB(filp->f_inode->i_sb)->flags & ORANGEFS_OPT_LOCAL_LOCK) { if (cmd == F_GETLK) { rc = 0; posix_test_lock(filp, fl); @@ -698,15 +698,15 @@ static int pvfs2_lock(struct file *filp, int cmd, struct file_lock *fl) return rc; } -/** PVFS2 implementation of VFS file operations */ -const struct file_operations pvfs2_file_operations = { - .llseek = pvfs2_file_llseek, - .read_iter = pvfs2_file_read_iter, - .write_iter = pvfs2_file_write_iter, - .lock = pvfs2_lock, - .unlocked_ioctl = pvfs2_ioctl, - .mmap = pvfs2_file_mmap, +/** ORANGEFS implementation of VFS file operations */ +const struct file_operations orangefs_file_operations = { + .llseek = orangefs_file_llseek, + .read_iter = orangefs_file_read_iter, + .write_iter = orangefs_file_write_iter, + .lock = orangefs_lock, + .unlocked_ioctl = orangefs_ioctl, + .mmap = orangefs_file_mmap, .open = generic_file_open, - .release = pvfs2_file_release, - .fsync = pvfs2_fsync, + .release = orangefs_file_release, + .fsync = orangefs_fsync, }; diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 70d1c1925ea3..58e83182d3dc 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -26,7 +26,7 @@ static int read_one_page(struct page *page) iov_iter_bvec(&to, ITER_BVEC | READ, &bv, 1, PAGE_SIZE); gossip_debug(GOSSIP_INODE_DEBUG, - "pvfs2_readpage called with page %p\n", + "orangefs_readpage called with page %p\n", page); max_block = ((inode->i_size / blocksize) + 1); @@ -34,10 +34,10 @@ static int read_one_page(struct page *page) if (page->index < max_block) { loff_t blockptr_offset = (((loff_t) page->index) << blockbits); - bytes_read = pvfs2_inode_read(inode, - &to, - &blockptr_offset, - inode->i_size); + bytes_read = orangefs_inode_read(inode, + &to, + &blockptr_offset, + inode->i_size); } /* this will only zero remaining unread portions of the page data */ iov_iter_zero(~0U, &to); @@ -57,12 +57,12 @@ static int read_one_page(struct page *page) return ret; } -static int pvfs2_readpage(struct file *file, struct page *page) +static int orangefs_readpage(struct file *file, struct page *page) { return read_one_page(page); } -static int pvfs2_readpages(struct file *file, +static int orangefs_readpages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) @@ -70,7 +70,7 @@ static int pvfs2_readpages(struct file *file, int page_idx; int ret; - gossip_debug(GOSSIP_INODE_DEBUG, "pvfs2_readpages called\n"); + gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_readpages called\n"); for (page_idx = 0; page_idx < nr_pages; page_idx++) { struct page *page; @@ -93,12 +93,12 @@ static int pvfs2_readpages(struct file *file, return 0; } -static void pvfs2_invalidatepage(struct page *page, +static void orangefs_invalidatepage(struct page *page, unsigned int offset, unsigned int length) { gossip_debug(GOSSIP_INODE_DEBUG, - "pvfs2_invalidatepage called on page %p " + "orangefs_invalidatepage called on page %p " "(offset is %u)\n", page, offset); @@ -109,10 +109,10 @@ static void pvfs2_invalidatepage(struct page *page, } -static int pvfs2_releasepage(struct page *page, gfp_t foo) +static int orangefs_releasepage(struct page *page, gfp_t foo) { gossip_debug(GOSSIP_INODE_DEBUG, - "pvfs2_releasepage called on page %p\n", + "orangefs_releasepage called on page %p\n", page); return 0; } @@ -131,32 +131,32 @@ static int pvfs2_releasepage(struct page *page, gfp_t foo) * loff_t offset) *{ * gossip_debug(GOSSIP_INODE_DEBUG, - * "pvfs2_direct_IO: %s\n", + * "orangefs_direct_IO: %s\n", * iocb->ki_filp->f_path.dentry->d_name.name); * * return -EINVAL; *} */ -struct backing_dev_info pvfs2_backing_dev_info = { - .name = "pvfs2", +struct backing_dev_info orangefs_backing_dev_info = { + .name = "orangefs", .ra_pages = 0, .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, }; -/** PVFS2 implementation of address space operations */ -const struct address_space_operations pvfs2_address_operations = { - .readpage = pvfs2_readpage, - .readpages = pvfs2_readpages, - .invalidatepage = pvfs2_invalidatepage, - .releasepage = pvfs2_releasepage, +/** ORANGEFS2 implementation of address space operations */ +const struct address_space_operations orangefs_address_operations = { + .readpage = orangefs_readpage, + .readpages = orangefs_readpages, + .invalidatepage = orangefs_invalidatepage, + .releasepage = orangefs_releasepage, /* .direct_IO = pvfs2_direct_IO */ }; -static int pvfs2_setattr_size(struct inode *inode, struct iattr *iattr) +static int orangefs_setattr_size(struct inode *inode, struct iattr *iattr) { - struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); - struct pvfs2_kernel_op_s *new_op; + struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); + struct orangefs_kernel_op_s *new_op; loff_t orig_size = i_size_read(inode); int ret = -EINVAL; @@ -164,17 +164,17 @@ static int pvfs2_setattr_size(struct inode *inode, struct iattr *iattr) "%s: %pU: Handle is %pU | fs_id %d | size is %llu\n", __func__, get_khandle_from_ino(inode), - &pvfs2_inode->refn.khandle, - pvfs2_inode->refn.fs_id, + &orangefs_inode->refn.khandle, + orangefs_inode->refn.fs_id, iattr->ia_size); truncate_setsize(inode, iattr->ia_size); - new_op = op_alloc(PVFS2_VFS_OP_TRUNCATE); + new_op = op_alloc(ORANGEFS_VFS_OP_TRUNCATE); if (!new_op) return -ENOMEM; - new_op->upcall.req.truncate.refn = pvfs2_inode->refn; + new_op->upcall.req.truncate.refn = orangefs_inode->refn; new_op->upcall.req.truncate.size = (__s64) iattr->ia_size; ret = service_operation(new_op, __func__, @@ -185,7 +185,7 @@ static int pvfs2_setattr_size(struct inode *inode, struct iattr *iattr) * the status value tells us if it went through ok or not */ gossip_debug(GOSSIP_INODE_DEBUG, - "pvfs2: pvfs2_truncate got return value of %d\n", + "orangefs: orangefs_truncate got return value of %d\n", ret); op_release(new_op); @@ -216,13 +216,13 @@ static int pvfs2_setattr_size(struct inode *inode, struct iattr *iattr) /* * Change attributes of an object referenced by dentry. */ -int pvfs2_setattr(struct dentry *dentry, struct iattr *iattr) +int orangefs_setattr(struct dentry *dentry, struct iattr *iattr) { int ret = -EINVAL; struct inode *inode = dentry->d_inode; gossip_debug(GOSSIP_INODE_DEBUG, - "pvfs2_setattr: called on %s\n", + "orangefs_setattr: called on %s\n", dentry->d_name.name); ret = inode_change_ok(inode, iattr); @@ -231,7 +231,7 @@ int pvfs2_setattr(struct dentry *dentry, struct iattr *iattr) if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size != i_size_read(inode)) { - ret = pvfs2_setattr_size(inode, iattr); + ret = orangefs_setattr_size(inode, iattr); if (ret) goto out; } @@ -239,9 +239,9 @@ int pvfs2_setattr(struct dentry *dentry, struct iattr *iattr) setattr_copy(inode, iattr); mark_inode_dirty(inode); - ret = pvfs2_inode_setattr(inode, iattr); + ret = orangefs_inode_setattr(inode, iattr); gossip_debug(GOSSIP_INODE_DEBUG, - "pvfs2_setattr: inode_setattr returned %d\n", + "orangefs_setattr: inode_setattr returned %d\n", ret); if (!ret && (iattr->ia_valid & ATTR_MODE)) @@ -249,23 +249,23 @@ int pvfs2_setattr(struct dentry *dentry, struct iattr *iattr) ret = posix_acl_chmod(inode, inode->i_mode); out: - gossip_debug(GOSSIP_INODE_DEBUG, "pvfs2_setattr: returning %d\n", ret); + gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_setattr: returning %d\n", ret); return ret; } /* * Obtain attributes of an object given a dentry */ -int pvfs2_getattr(struct vfsmount *mnt, +int orangefs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *kstat) { int ret = -ENOENT; struct inode *inode = dentry->d_inode; - struct pvfs2_inode_s *pvfs2_inode = NULL; + struct orangefs_inode_s *orangefs_inode = NULL; gossip_debug(GOSSIP_INODE_DEBUG, - "pvfs2_getattr: called on %s\n", + "orangefs_getattr: called on %s\n", dentry->d_name.name); /* @@ -273,12 +273,12 @@ int pvfs2_getattr(struct vfsmount *mnt, * fields/attributes of the inode would be refreshed. So again, we * dont have too much of a choice but refresh all the attributes. */ - ret = pvfs2_inode_getattr(inode, PVFS_ATTR_SYS_ALL_NOHINT); + ret = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT); if (ret == 0) { generic_fillattr(inode, kstat); /* override block size reported to stat */ - pvfs2_inode = PVFS2_I(inode); - kstat->blksize = pvfs2_inode->blksize; + orangefs_inode = ORANGEFS_I(inode); + kstat->blksize = orangefs_inode->blksize; } else { /* assume an I/O error and flag inode as bad */ gossip_debug(GOSSIP_INODE_DEBUG, @@ -286,39 +286,39 @@ int pvfs2_getattr(struct vfsmount *mnt, __FILE__, __func__, __LINE__); - pvfs2_make_bad_inode(inode); + orangefs_make_bad_inode(inode); } return ret; } -/* PVFS2 implementation of VFS inode operations for files */ -struct inode_operations pvfs2_file_inode_operations = { - .get_acl = pvfs2_get_acl, - .set_acl = pvfs2_set_acl, - .setattr = pvfs2_setattr, - .getattr = pvfs2_getattr, +/* ORANGEDS2 implementation of VFS inode operations for files */ +struct inode_operations orangefs_file_inode_operations = { + .get_acl = orangefs_get_acl, + .set_acl = orangefs_set_acl, + .setattr = orangefs_setattr, + .getattr = orangefs_getattr, .setxattr = generic_setxattr, .getxattr = generic_getxattr, - .listxattr = pvfs2_listxattr, + .listxattr = orangefs_listxattr, .removexattr = generic_removexattr, }; -static int pvfs2_init_iops(struct inode *inode) +static int orangefs_init_iops(struct inode *inode) { - inode->i_mapping->a_ops = &pvfs2_address_operations; + inode->i_mapping->a_ops = &orangefs_address_operations; switch (inode->i_mode & S_IFMT) { case S_IFREG: - inode->i_op = &pvfs2_file_inode_operations; - inode->i_fop = &pvfs2_file_operations; + inode->i_op = &orangefs_file_inode_operations; + inode->i_fop = &orangefs_file_operations; inode->i_blkbits = PAGE_CACHE_SHIFT; break; case S_IFLNK: - inode->i_op = &pvfs2_symlink_inode_operations; + inode->i_op = &orangefs_symlink_inode_operations; break; case S_IFDIR: - inode->i_op = &pvfs2_dir_inode_operations; - inode->i_fop = &pvfs2_dir_operations; + inode->i_op = &orangefs_dir_inode_operations; + inode->i_fop = &orangefs_dir_operations; break; default: gossip_debug(GOSSIP_INODE_DEBUG, @@ -331,75 +331,75 @@ static int pvfs2_init_iops(struct inode *inode) } /* - * Given a PVFS2 object identifier (fsid, handle), convert it into a ino_t type + * Given a ORANGEFS object identifier (fsid, handle), convert it into a ino_t type * that will be used as a hash-index from where the handle will * be searched for in the VFS hash table of inodes. */ -static inline ino_t pvfs2_handle_hash(struct pvfs2_object_kref *ref) +static inline ino_t orangefs_handle_hash(struct orangefs_object_kref *ref) { if (!ref) return 0; - return pvfs2_khandle_to_ino(&(ref->khandle)); + return orangefs_khandle_to_ino(&(ref->khandle)); } /* * Called to set up an inode from iget5_locked. */ -static int pvfs2_set_inode(struct inode *inode, void *data) +static int orangefs_set_inode(struct inode *inode, void *data) { - struct pvfs2_object_kref *ref = (struct pvfs2_object_kref *) data; - struct pvfs2_inode_s *pvfs2_inode = NULL; + struct orangefs_object_kref *ref = (struct orangefs_object_kref *) data; + struct orangefs_inode_s *orangefs_inode = NULL; /* Make sure that we have sane parameters */ if (!data || !inode) return 0; - pvfs2_inode = PVFS2_I(inode); - if (!pvfs2_inode) + orangefs_inode = ORANGEFS_I(inode); + if (!orangefs_inode) return 0; - pvfs2_inode->refn.fs_id = ref->fs_id; - pvfs2_inode->refn.khandle = ref->khandle; + orangefs_inode->refn.fs_id = ref->fs_id; + orangefs_inode->refn.khandle = ref->khandle; return 0; } /* * Called to determine if handles match. */ -static int pvfs2_test_inode(struct inode *inode, void *data) +static int orangefs_test_inode(struct inode *inode, void *data) { - struct pvfs2_object_kref *ref = (struct pvfs2_object_kref *) data; - struct pvfs2_inode_s *pvfs2_inode = NULL; + struct orangefs_object_kref *ref = (struct orangefs_object_kref *) data; + struct orangefs_inode_s *orangefs_inode = NULL; - pvfs2_inode = PVFS2_I(inode); - return (!PVFS_khandle_cmp(&(pvfs2_inode->refn.khandle), &(ref->khandle)) - && pvfs2_inode->refn.fs_id == ref->fs_id); + orangefs_inode = ORANGEFS_I(inode); + return (!ORANGEFS_khandle_cmp(&(orangefs_inode->refn.khandle), &(ref->khandle)) + && orangefs_inode->refn.fs_id == ref->fs_id); } /* - * Front-end to lookup the inode-cache maintained by the VFS using the PVFS2 + * Front-end to lookup the inode-cache maintained by the VFS using the ORANGEFS * file handle. * * @sb: the file system super block instance. - * @ref: The PVFS2 object for which we are trying to locate an inode structure. + * @ref: The ORANGEFS object for which we are trying to locate an inode structure. */ -struct inode *pvfs2_iget(struct super_block *sb, struct pvfs2_object_kref *ref) +struct inode *orangefs_iget(struct super_block *sb, struct orangefs_object_kref *ref) { struct inode *inode = NULL; unsigned long hash; int error; - hash = pvfs2_handle_hash(ref); - inode = iget5_locked(sb, hash, pvfs2_test_inode, pvfs2_set_inode, ref); + hash = orangefs_handle_hash(ref); + inode = iget5_locked(sb, hash, orangefs_test_inode, orangefs_set_inode, ref); if (!inode || !(inode->i_state & I_NEW)) return inode; - error = pvfs2_inode_getattr(inode, PVFS_ATTR_SYS_ALL_NOHINT); + error = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT); if (error) { iget_failed(inode); return ERR_PTR(error); } inode->i_ino = hash; /* needed for stat etc */ - pvfs2_init_iops(inode); + orangefs_init_iops(inode); unlock_new_inode(inode); gossip_debug(GOSSIP_INODE_DEBUG, @@ -415,15 +415,15 @@ struct inode *pvfs2_iget(struct super_block *sb, struct pvfs2_object_kref *ref) /* * Allocate an inode for a newly created file and insert it into the inode hash. */ -struct inode *pvfs2_new_inode(struct super_block *sb, struct inode *dir, - int mode, dev_t dev, struct pvfs2_object_kref *ref) +struct inode *orangefs_new_inode(struct super_block *sb, struct inode *dir, + int mode, dev_t dev, struct orangefs_object_kref *ref) { - unsigned long hash = pvfs2_handle_hash(ref); + unsigned long hash = orangefs_handle_hash(ref); struct inode *inode; int error; gossip_debug(GOSSIP_INODE_DEBUG, - "pvfs2_get_custom_inode_common: called\n" + "orangefs_get_custom_inode_common: called\n" "(sb is %p | MAJOR(dev)=%u | MINOR(dev)=%u mode=%o)\n", sb, MAJOR(dev), @@ -434,14 +434,14 @@ struct inode *pvfs2_new_inode(struct super_block *sb, struct inode *dir, if (!inode) return NULL; - pvfs2_set_inode(inode, ref); + orangefs_set_inode(inode, ref); inode->i_ino = hash; /* needed for stat etc */ - error = pvfs2_inode_getattr(inode, PVFS_ATTR_SYS_ALL_NOHINT); + error = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT); if (error) goto out_iput; - pvfs2_init_iops(inode); + orangefs_init_iops(inode); inode->i_mode = mode; inode->i_uid = current_fsuid(); @@ -450,14 +450,14 @@ struct inode *pvfs2_new_inode(struct super_block *sb, struct inode *dir, inode->i_size = PAGE_CACHE_SIZE; inode->i_rdev = dev; - error = insert_inode_locked4(inode, hash, pvfs2_test_inode, ref); + error = insert_inode_locked4(inode, hash, orangefs_test_inode, ref); if (error < 0) goto out_iput; gossip_debug(GOSSIP_INODE_DEBUG, "Initializing ACL's for inode %pU\n", get_khandle_from_ino(inode)); - pvfs2_init_acl(inode, dir); + orangefs_init_acl(inode, dir); return inode; out_iput: diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index 39f96ace0289..333c87c8b0f5 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -14,34 +14,34 @@ /* * Get a newly allocated inode to go with a negative dentry. */ -static int pvfs2_create(struct inode *dir, +static int orangefs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool exclusive) { - struct pvfs2_inode_s *parent = PVFS2_I(dir); - struct pvfs2_kernel_op_s *new_op; + struct orangefs_inode_s *parent = ORANGEFS_I(dir); + struct orangefs_kernel_op_s *new_op; struct inode *inode; int ret; gossip_debug(GOSSIP_NAME_DEBUG, "%s: called\n", __func__); - new_op = op_alloc(PVFS2_VFS_OP_CREATE); + new_op = op_alloc(ORANGEFS_VFS_OP_CREATE); if (!new_op) return -ENOMEM; new_op->upcall.req.create.parent_refn = parent->refn; fill_default_sys_attrs(new_op->upcall.req.create.attributes, - PVFS_TYPE_METAFILE, mode); + ORANGEFS_TYPE_METAFILE, mode); strncpy(new_op->upcall.req.create.d_name, - dentry->d_name.name, PVFS2_NAME_LEN); + dentry->d_name.name, ORANGEFS_NAME_LEN); ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); gossip_debug(GOSSIP_NAME_DEBUG, - "Create Got PVFS2 handle %pU on fsid %d (ret=%d)\n", + "Create Got ORANGEFS handle %pU on fsid %d (ret=%d)\n", &new_op->downcall.resp.create.refn.khandle, new_op->downcall.resp.create.refn.fs_id, ret); @@ -52,10 +52,10 @@ static int pvfs2_create(struct inode *dir, goto out; } - inode = pvfs2_new_inode(dir->i_sb, dir, S_IFREG | mode, 0, + inode = orangefs_new_inode(dir->i_sb, dir, S_IFREG | mode, 0, &new_op->downcall.resp.create.refn); if (IS_ERR(inode)) { - gossip_err("*** Failed to allocate pvfs2 file inode\n"); + gossip_err("*** Failed to allocate orangefs file inode\n"); ret = PTR_ERR(inode); goto out; } @@ -86,11 +86,11 @@ out: * Attempt to resolve an object name (dentry->d_name), parent handle, and * fsid into a handle for the object. */ -static struct dentry *pvfs2_lookup(struct inode *dir, struct dentry *dentry, +static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { - struct pvfs2_inode_s *parent = PVFS2_I(dir); - struct pvfs2_kernel_op_s *new_op; + struct orangefs_inode_s *parent = ORANGEFS_I(dir); + struct orangefs_kernel_op_s *new_op; struct inode *inode; struct dentry *res; int ret = -EINVAL; @@ -106,10 +106,10 @@ static struct dentry *pvfs2_lookup(struct inode *dir, struct dentry *dentry, gossip_debug(GOSSIP_NAME_DEBUG, "%s called on %s\n", __func__, dentry->d_name.name); - if (dentry->d_name.len > (PVFS2_NAME_LEN - 1)) + if (dentry->d_name.len > (ORANGEFS_NAME_LEN - 1)) return ERR_PTR(-ENAMETOOLONG); - new_op = op_alloc(PVFS2_VFS_OP_LOOKUP); + new_op = op_alloc(ORANGEFS_VFS_OP_LOOKUP); if (!new_op) return ERR_PTR(-ENOMEM); @@ -123,7 +123,7 @@ static struct dentry *pvfs2_lookup(struct inode *dir, struct dentry *dentry, new_op->upcall.req.lookup.parent_refn = parent->refn; strncpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name, - PVFS2_NAME_LEN); + ORANGEFS_NAME_LEN); gossip_debug(GOSSIP_NAME_DEBUG, "%s: doing lookup on %s under %pU,%d (follow=%s)\n", @@ -132,7 +132,7 @@ static struct dentry *pvfs2_lookup(struct inode *dir, struct dentry *dentry, &new_op->upcall.req.lookup.parent_refn.khandle, new_op->upcall.req.lookup.parent_refn.fs_id, ((new_op->upcall.req.lookup.sym_follow == - PVFS2_LOOKUP_LINK_FOLLOW) ? "yes" : "no")); + ORANGEFS_LOOKUP_LINK_FOLLOW) ? "yes" : "no")); ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); @@ -158,7 +158,7 @@ static struct dentry *pvfs2_lookup(struct inode *dir, struct dentry *dentry, */ gossip_debug(GOSSIP_NAME_DEBUG, - "pvfs2_lookup: Adding *negative* dentry " + "orangefs_lookup: Adding *negative* dentry " "%p for %s\n", dentry, dentry->d_name.name); @@ -173,7 +173,7 @@ static struct dentry *pvfs2_lookup(struct inode *dir, struct dentry *dentry, goto out; } - inode = pvfs2_iget(dir->i_sb, &new_op->downcall.resp.lookup.refn); + inode = orangefs_iget(dir->i_sb, &new_op->downcall.resp.lookup.refn); if (IS_ERR(inode)) { gossip_debug(GOSSIP_NAME_DEBUG, "error %ld from iget\n", PTR_ERR(inode)); @@ -202,11 +202,11 @@ out: } /* return 0 on success; non-zero otherwise */ -static int pvfs2_unlink(struct inode *dir, struct dentry *dentry) +static int orangefs_unlink(struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; - struct pvfs2_inode_s *parent = PVFS2_I(dir); - struct pvfs2_kernel_op_s *new_op; + struct orangefs_inode_s *parent = ORANGEFS_I(dir); + struct orangefs_kernel_op_s *new_op; int ret; gossip_debug(GOSSIP_NAME_DEBUG, @@ -218,15 +218,15 @@ static int pvfs2_unlink(struct inode *dir, struct dentry *dentry) &parent->refn.khandle, parent->refn.fs_id); - new_op = op_alloc(PVFS2_VFS_OP_REMOVE); + new_op = op_alloc(ORANGEFS_VFS_OP_REMOVE); if (!new_op) return -ENOMEM; new_op->upcall.req.remove.parent_refn = parent->refn; strncpy(new_op->upcall.req.remove.d_name, dentry->d_name.name, - PVFS2_NAME_LEN); + ORANGEFS_NAME_LEN); - ret = service_operation(new_op, "pvfs2_unlink", + ret = service_operation(new_op, "orangefs_unlink", get_interruptible_flag(inode)); /* when request is serviced properly, free req op struct */ @@ -242,12 +242,12 @@ static int pvfs2_unlink(struct inode *dir, struct dentry *dentry) return ret; } -static int pvfs2_symlink(struct inode *dir, +static int orangefs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { - struct pvfs2_inode_s *parent = PVFS2_I(dir); - struct pvfs2_kernel_op_s *new_op; + struct orangefs_inode_s *parent = ORANGEFS_I(dir); + struct orangefs_kernel_op_s *new_op; struct inode *inode; int mode = 755; int ret; @@ -257,25 +257,25 @@ static int pvfs2_symlink(struct inode *dir, if (!symname) return -EINVAL; - new_op = op_alloc(PVFS2_VFS_OP_SYMLINK); + new_op = op_alloc(ORANGEFS_VFS_OP_SYMLINK); if (!new_op) return -ENOMEM; new_op->upcall.req.sym.parent_refn = parent->refn; fill_default_sys_attrs(new_op->upcall.req.sym.attributes, - PVFS_TYPE_SYMLINK, + ORANGEFS_TYPE_SYMLINK, mode); strncpy(new_op->upcall.req.sym.entry_name, dentry->d_name.name, - PVFS2_NAME_LEN); - strncpy(new_op->upcall.req.sym.target, symname, PVFS2_NAME_LEN); + ORANGEFS_NAME_LEN); + strncpy(new_op->upcall.req.sym.target, symname, ORANGEFS_NAME_LEN); ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); gossip_debug(GOSSIP_NAME_DEBUG, - "Symlink Got PVFS2 handle %pU on fsid %d (ret=%d)\n", + "Symlink Got ORANGEFS handle %pU on fsid %d (ret=%d)\n", &new_op->downcall.resp.sym.refn.khandle, new_op->downcall.resp.sym.refn.fs_id, ret); @@ -286,11 +286,11 @@ static int pvfs2_symlink(struct inode *dir, goto out; } - inode = pvfs2_new_inode(dir->i_sb, dir, S_IFLNK | mode, 0, + inode = orangefs_new_inode(dir->i_sb, dir, S_IFLNK | mode, 0, &new_op->downcall.resp.sym.refn); if (IS_ERR(inode)) { gossip_err - ("*** Failed to allocate pvfs2 symlink inode\n"); + ("*** Failed to allocate orangefs symlink inode\n"); ret = PTR_ERR(inode); goto out; } @@ -316,29 +316,29 @@ out: return ret; } -static int pvfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { - struct pvfs2_inode_s *parent = PVFS2_I(dir); - struct pvfs2_kernel_op_s *new_op; + struct orangefs_inode_s *parent = ORANGEFS_I(dir); + struct orangefs_kernel_op_s *new_op; struct inode *inode; int ret; - new_op = op_alloc(PVFS2_VFS_OP_MKDIR); + new_op = op_alloc(ORANGEFS_VFS_OP_MKDIR); if (!new_op) return -ENOMEM; new_op->upcall.req.mkdir.parent_refn = parent->refn; fill_default_sys_attrs(new_op->upcall.req.mkdir.attributes, - PVFS_TYPE_DIRECTORY, mode); + ORANGEFS_TYPE_DIRECTORY, mode); strncpy(new_op->upcall.req.mkdir.d_name, - dentry->d_name.name, PVFS2_NAME_LEN); + dentry->d_name.name, ORANGEFS_NAME_LEN); ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); gossip_debug(GOSSIP_NAME_DEBUG, - "Mkdir Got PVFS2 handle %pU on fsid %d\n", + "Mkdir Got ORANGEFS handle %pU on fsid %d\n", &new_op->downcall.resp.mkdir.refn.khandle, new_op->downcall.resp.mkdir.refn.fs_id); @@ -349,10 +349,10 @@ static int pvfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto out; } - inode = pvfs2_new_inode(dir->i_sb, dir, S_IFDIR | mode, 0, + inode = orangefs_new_inode(dir->i_sb, dir, S_IFDIR | mode, 0, &new_op->downcall.resp.mkdir.refn); if (IS_ERR(inode)) { - gossip_err("*** Failed to allocate pvfs2 dir inode\n"); + gossip_err("*** Failed to allocate orangefs dir inode\n"); ret = PTR_ERR(inode); goto out; } @@ -381,42 +381,42 @@ out: return ret; } -static int pvfs2_rename(struct inode *old_dir, +static int orangefs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { - struct pvfs2_kernel_op_s *new_op; + struct orangefs_kernel_op_s *new_op; int ret; gossip_debug(GOSSIP_NAME_DEBUG, - "pvfs2_rename: called (%s/%s => %s/%s) ct=%d\n", + "orangefs_rename: called (%s/%s => %s/%s) ct=%d\n", old_dentry->d_parent->d_name.name, old_dentry->d_name.name, new_dentry->d_parent->d_name.name, new_dentry->d_name.name, d_count(new_dentry)); - new_op = op_alloc(PVFS2_VFS_OP_RENAME); + new_op = op_alloc(ORANGEFS_VFS_OP_RENAME); if (!new_op) return -EINVAL; - new_op->upcall.req.rename.old_parent_refn = PVFS2_I(old_dir)->refn; - new_op->upcall.req.rename.new_parent_refn = PVFS2_I(new_dir)->refn; + new_op->upcall.req.rename.old_parent_refn = ORANGEFS_I(old_dir)->refn; + new_op->upcall.req.rename.new_parent_refn = ORANGEFS_I(new_dir)->refn; strncpy(new_op->upcall.req.rename.d_old_name, old_dentry->d_name.name, - PVFS2_NAME_LEN); + ORANGEFS_NAME_LEN); strncpy(new_op->upcall.req.rename.d_new_name, new_dentry->d_name.name, - PVFS2_NAME_LEN); + ORANGEFS_NAME_LEN); ret = service_operation(new_op, - "pvfs2_rename", + "orangefs_rename", get_interruptible_flag(old_dentry->d_inode)); gossip_debug(GOSSIP_NAME_DEBUG, - "pvfs2_rename: got downcall status %d\n", + "orangefs_rename: got downcall status %d\n", ret); if (new_dentry->d_inode) @@ -426,21 +426,21 @@ static int pvfs2_rename(struct inode *old_dir, return ret; } -/* PVFS2 implementation of VFS inode operations for directories */ -struct inode_operations pvfs2_dir_inode_operations = { - .lookup = pvfs2_lookup, - .get_acl = pvfs2_get_acl, - .set_acl = pvfs2_set_acl, - .create = pvfs2_create, - .unlink = pvfs2_unlink, - .symlink = pvfs2_symlink, - .mkdir = pvfs2_mkdir, - .rmdir = pvfs2_unlink, - .rename = pvfs2_rename, - .setattr = pvfs2_setattr, - .getattr = pvfs2_getattr, +/* ORANGEFS implementation of VFS inode operations for directories */ +struct inode_operations orangefs_dir_inode_operations = { + .lookup = orangefs_lookup, + .get_acl = orangefs_get_acl, + .set_acl = orangefs_set_acl, + .create = orangefs_create, + .unlink = orangefs_unlink, + .symlink = orangefs_symlink, + .mkdir = orangefs_mkdir, + .rmdir = orangefs_unlink, + .rename = orangefs_rename, + .setattr = orangefs_setattr, + .getattr = orangefs_getattr, .setxattr = generic_setxattr, .getxattr = generic_getxattr, .removexattr = generic_removexattr, - .listxattr = pvfs2_listxattr, + .listxattr = orangefs_listxattr, }; diff --git a/fs/orangefs/protocol.h b/fs/orangefs/protocol.h index 85f611fe0536..5f10ebc83e76 100644 --- a/fs/orangefs/protocol.h +++ b/fs/orangefs/protocol.h @@ -20,13 +20,13 @@ extern int cdm_element_count; #define ORANGEFS_KMOD_DEBUG_HELP_FILE "debug-help" #define ORANGEFS_KMOD_DEBUG_FILE "kernel-debug" #define ORANGEFS_CLIENT_DEBUG_FILE "client-debug" -#define PVFS2_VERBOSE "verbose" -#define PVFS2_ALL "all" +#define ORANGEFS_VERBOSE "verbose" +#define ORANGEFS_ALL "all" /* pvfs2-config.h ***********************************************************/ -#define PVFS2_VERSION_MAJOR 2 -#define PVFS2_VERSION_MINOR 9 -#define PVFS2_VERSION_SUB 0 +#define ORANGEFS_VERSION_MAJOR 2 +#define ORANGEFS_VERSION_MINOR 9 +#define ORANGEFS_VERSION_SUB 0 /* khandle stuff ***********************************************************/ @@ -38,15 +38,15 @@ extern int cdm_element_count; * The kernel module will always use the first four bytes and * the last four bytes as an inum. */ -struct pvfs2_khandle { +struct orangefs_khandle { unsigned char u[16]; } __aligned(8); /* * kernel version of an object ref. */ -struct pvfs2_object_kref { - struct pvfs2_khandle khandle; +struct orangefs_object_kref { + struct orangefs_khandle khandle; __s32 fs_id; __s32 __pad1; }; @@ -55,8 +55,8 @@ struct pvfs2_object_kref { * compare 2 khandles assumes little endian thus from large address to * small address */ -static inline int PVFS_khandle_cmp(const struct pvfs2_khandle *kh1, - const struct pvfs2_khandle *kh2) +static inline int ORANGEFS_khandle_cmp(const struct orangefs_khandle *kh1, + const struct orangefs_khandle *kh2) { int i; @@ -70,7 +70,7 @@ static inline int PVFS_khandle_cmp(const struct pvfs2_khandle *kh1, return 0; } -static inline void PVFS_khandle_to(const struct pvfs2_khandle *kh, +static inline void ORANGEFS_khandle_to(const struct orangefs_khandle *kh, void *p, int size) { @@ -79,7 +79,7 @@ static inline void PVFS_khandle_to(const struct pvfs2_khandle *kh, } -static inline void PVFS_khandle_from(struct pvfs2_khandle *kh, +static inline void ORANGEFS_khandle_from(struct orangefs_khandle *kh, void *p, int size) { memset(kh, 0, 16); @@ -88,152 +88,152 @@ static inline void PVFS_khandle_from(struct pvfs2_khandle *kh, } /* pvfs2-types.h ************************************************************/ -typedef __u32 PVFS_uid; -typedef __u32 PVFS_gid; -typedef __s32 PVFS_fs_id; -typedef __u32 PVFS_permissions; -typedef __u64 PVFS_time; -typedef __s64 PVFS_size; -typedef __u64 PVFS_flags; -typedef __u64 PVFS_ds_position; -typedef __s32 PVFS_error; -typedef __s64 PVFS_offset; +typedef __u32 ORANGEFS_uid; +typedef __u32 ORANGEFS_gid; +typedef __s32 ORANGEFS_fs_id; +typedef __u32 ORANGEFS_permissions; +typedef __u64 ORANGEFS_time; +typedef __s64 ORANGEFS_size; +typedef __u64 ORANGEFS_flags; +typedef __u64 ORANGEFS_ds_position; +typedef __s32 ORANGEFS_error; +typedef __s64 ORANGEFS_offset; -#define PVFS2_SUPER_MAGIC 0x20030528 +#define ORANGEFS_SUPER_MAGIC 0x20030528 /* - * PVFS2 error codes are a signed 32-bit integer. Error codes are negative, but + * ORANGEFS error codes are a signed 32-bit integer. Error codes are negative, but * the sign is stripped before decoding. */ /* Bit 31 is not used since it is the sign. */ /* - * Bit 30 specifies that this is a PVFS2 error. A PVFS2 error is either an - * encoded errno value or a PVFS2 protocol error. + * Bit 30 specifies that this is a ORANGEFS error. A ORANGEFS error is either an + * encoded errno value or a ORANGEFS protocol error. */ -#define PVFS_ERROR_BIT (1 << 30) +#define ORANGEFS_ERROR_BIT (1 << 30) /* - * Bit 29 specifies that this is a PVFS2 protocol error and not an encoded + * Bit 29 specifies that this is a ORANGEFS protocol error and not an encoded * errno value. */ -#define PVFS_NON_ERRNO_ERROR_BIT (1 << 29) +#define ORANGEFS_NON_ERRNO_ERROR_BIT (1 << 29) /* * Bits 9, 8, and 7 specify the error class, which encodes the section of * server code the error originated in for logging purposes. It is not used * in the kernel except to be masked out. */ -#define PVFS_ERROR_CLASS_BITS 0x380 +#define ORANGEFS_ERROR_CLASS_BITS 0x380 /* Bits 6 - 0 are reserved for the actual error code. */ -#define PVFS_ERROR_NUMBER_BITS 0x7f +#define ORANGEFS_ERROR_NUMBER_BITS 0x7f /* Encoded errno values are decoded by PINT_errno_mapping in pvfs2-utils.c. */ -/* Our own PVFS2 protocol error codes. */ -#define PVFS_ECANCEL (1|PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT) -#define PVFS_EDEVINIT (2|PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT) -#define PVFS_EDETAIL (3|PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT) -#define PVFS_EHOSTNTFD (4|PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT) -#define PVFS_EADDRNTFD (5|PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT) -#define PVFS_ENORECVR (6|PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT) -#define PVFS_ETRYAGAIN (7|PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT) -#define PVFS_ENOTPVFS (8|PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT) -#define PVFS_ESECURITY (9|PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT) +/* Our own ORANGEFS protocol error codes. */ +#define ORANGEFS_ECANCEL (1|ORANGEFS_NON_ERRNO_ERROR_BIT|ORANGEFS_ERROR_BIT) +#define ORANGEFS_EDEVINIT (2|ORANGEFS_NON_ERRNO_ERROR_BIT|ORANGEFS_ERROR_BIT) +#define ORANGEFS_EDETAIL (3|ORANGEFS_NON_ERRNO_ERROR_BIT|ORANGEFS_ERROR_BIT) +#define ORANGEFS_EHOSTNTFD (4|ORANGEFS_NON_ERRNO_ERROR_BIT|ORANGEFS_ERROR_BIT) +#define ORANGEFS_EADDRNTFD (5|ORANGEFS_NON_ERRNO_ERROR_BIT|ORANGEFS_ERROR_BIT) +#define ORANGEFS_ENORECVR (6|ORANGEFS_NON_ERRNO_ERROR_BIT|ORANGEFS_ERROR_BIT) +#define ORANGEFS_ETRYAGAIN (7|ORANGEFS_NON_ERRNO_ERROR_BIT|ORANGEFS_ERROR_BIT) +#define ORANGEFS_ENOTPVFS (8|ORANGEFS_NON_ERRNO_ERROR_BIT|ORANGEFS_ERROR_BIT) +#define ORANGEFS_ESECURITY (9|ORANGEFS_NON_ERRNO_ERROR_BIT|ORANGEFS_ERROR_BIT) /* permission bits */ -#define PVFS_O_EXECUTE (1 << 0) -#define PVFS_O_WRITE (1 << 1) -#define PVFS_O_READ (1 << 2) -#define PVFS_G_EXECUTE (1 << 3) -#define PVFS_G_WRITE (1 << 4) -#define PVFS_G_READ (1 << 5) -#define PVFS_U_EXECUTE (1 << 6) -#define PVFS_U_WRITE (1 << 7) -#define PVFS_U_READ (1 << 8) -/* no PVFS_U_VTX (sticky bit) */ -#define PVFS_G_SGID (1 << 10) -#define PVFS_U_SUID (1 << 11) +#define ORANGEFS_O_EXECUTE (1 << 0) +#define ORANGEFS_O_WRITE (1 << 1) +#define ORANGEFS_O_READ (1 << 2) +#define ORANGEFS_G_EXECUTE (1 << 3) +#define ORANGEFS_G_WRITE (1 << 4) +#define ORANGEFS_G_READ (1 << 5) +#define ORANGEFS_U_EXECUTE (1 << 6) +#define ORANGEFS_U_WRITE (1 << 7) +#define ORANGEFS_U_READ (1 << 8) +/* no ORANGEFS_U_VTX (sticky bit) */ +#define ORANGEFS_G_SGID (1 << 10) +#define ORANGEFS_U_SUID (1 << 11) /* definition taken from stdint.h */ #define INT32_MAX (2147483647) -#define PVFS_ITERATE_START (INT32_MAX - 1) -#define PVFS_ITERATE_END (INT32_MAX - 2) -#define PVFS_ITERATE_NEXT (INT32_MAX - 3) -#define PVFS_READDIR_START PVFS_ITERATE_START -#define PVFS_READDIR_END PVFS_ITERATE_END -#define PVFS_IMMUTABLE_FL FS_IMMUTABLE_FL -#define PVFS_APPEND_FL FS_APPEND_FL -#define PVFS_NOATIME_FL FS_NOATIME_FL -#define PVFS_MIRROR_FL 0x01000000ULL -#define PVFS_O_EXECUTE (1 << 0) -#define PVFS_FS_ID_NULL ((__s32)0) -#define PVFS_ATTR_SYS_UID (1 << 0) -#define PVFS_ATTR_SYS_GID (1 << 1) -#define PVFS_ATTR_SYS_PERM (1 << 2) -#define PVFS_ATTR_SYS_ATIME (1 << 3) -#define PVFS_ATTR_SYS_CTIME (1 << 4) -#define PVFS_ATTR_SYS_MTIME (1 << 5) -#define PVFS_ATTR_SYS_TYPE (1 << 6) -#define PVFS_ATTR_SYS_ATIME_SET (1 << 7) -#define PVFS_ATTR_SYS_MTIME_SET (1 << 8) -#define PVFS_ATTR_SYS_SIZE (1 << 20) -#define PVFS_ATTR_SYS_LNK_TARGET (1 << 24) -#define PVFS_ATTR_SYS_DFILE_COUNT (1 << 25) -#define PVFS_ATTR_SYS_DIRENT_COUNT (1 << 26) -#define PVFS_ATTR_SYS_BLKSIZE (1 << 28) -#define PVFS_ATTR_SYS_MIRROR_COPIES_COUNT (1 << 29) -#define PVFS_ATTR_SYS_COMMON_ALL \ - (PVFS_ATTR_SYS_UID | \ - PVFS_ATTR_SYS_GID | \ - PVFS_ATTR_SYS_PERM | \ - PVFS_ATTR_SYS_ATIME | \ - PVFS_ATTR_SYS_CTIME | \ - PVFS_ATTR_SYS_MTIME | \ - PVFS_ATTR_SYS_TYPE) +#define ORANGEFS_ITERATE_START (INT32_MAX - 1) +#define ORANGEFS_ITERATE_END (INT32_MAX - 2) +#define ORANGEFS_ITERATE_NEXT (INT32_MAX - 3) +#define ORANGEFS_READDIR_START ORANGEFS_ITERATE_START +#define ORANGEFS_READDIR_END ORANGEFS_ITERATE_END +#define ORANGEFS_IMMUTABLE_FL FS_IMMUTABLE_FL +#define ORANGEFS_APPEND_FL FS_APPEND_FL +#define ORANGEFS_NOATIME_FL FS_NOATIME_FL +#define ORANGEFS_MIRROR_FL 0x01000000ULL +#define ORANGEFS_O_EXECUTE (1 << 0) +#define ORANGEFS_FS_ID_NULL ((__s32)0) +#define ORANGEFS_ATTR_SYS_UID (1 << 0) +#define ORANGEFS_ATTR_SYS_GID (1 << 1) +#define ORANGEFS_ATTR_SYS_PERM (1 << 2) +#define ORANGEFS_ATTR_SYS_ATIME (1 << 3) +#define ORANGEFS_ATTR_SYS_CTIME (1 << 4) +#define ORANGEFS_ATTR_SYS_MTIME (1 << 5) +#define ORANGEFS_ATTR_SYS_TYPE (1 << 6) +#define ORANGEFS_ATTR_SYS_ATIME_SET (1 << 7) +#define ORANGEFS_ATTR_SYS_MTIME_SET (1 << 8) +#define ORANGEFS_ATTR_SYS_SIZE (1 << 20) +#define ORANGEFS_ATTR_SYS_LNK_TARGET (1 << 24) +#define ORANGEFS_ATTR_SYS_DFILE_COUNT (1 << 25) +#define ORANGEFS_ATTR_SYS_DIRENT_COUNT (1 << 26) +#define ORANGEFS_ATTR_SYS_BLKSIZE (1 << 28) +#define ORANGEFS_ATTR_SYS_MIRROR_COPIES_COUNT (1 << 29) +#define ORANGEFS_ATTR_SYS_COMMON_ALL \ + (ORANGEFS_ATTR_SYS_UID | \ + ORANGEFS_ATTR_SYS_GID | \ + ORANGEFS_ATTR_SYS_PERM | \ + ORANGEFS_ATTR_SYS_ATIME | \ + ORANGEFS_ATTR_SYS_CTIME | \ + ORANGEFS_ATTR_SYS_MTIME | \ + ORANGEFS_ATTR_SYS_TYPE) -#define PVFS_ATTR_SYS_ALL_SETABLE \ -(PVFS_ATTR_SYS_COMMON_ALL-PVFS_ATTR_SYS_TYPE) +#define ORANGEFS_ATTR_SYS_ALL_SETABLE \ +(ORANGEFS_ATTR_SYS_COMMON_ALL-ORANGEFS_ATTR_SYS_TYPE) -#define PVFS_ATTR_SYS_ALL_NOHINT \ - (PVFS_ATTR_SYS_COMMON_ALL | \ - PVFS_ATTR_SYS_SIZE | \ - PVFS_ATTR_SYS_LNK_TARGET | \ - PVFS_ATTR_SYS_DFILE_COUNT | \ - PVFS_ATTR_SYS_MIRROR_COPIES_COUNT | \ - PVFS_ATTR_SYS_DIRENT_COUNT | \ - PVFS_ATTR_SYS_BLKSIZE) -#define PVFS_XATTR_REPLACE 0x2 -#define PVFS_XATTR_CREATE 0x1 -#define PVFS_MAX_SERVER_ADDR_LEN 256 -#define PVFS_NAME_MAX 256 +#define ORANGEFS_ATTR_SYS_ALL_NOHINT \ + (ORANGEFS_ATTR_SYS_COMMON_ALL | \ + ORANGEFS_ATTR_SYS_SIZE | \ + ORANGEFS_ATTR_SYS_LNK_TARGET | \ + ORANGEFS_ATTR_SYS_DFILE_COUNT | \ + ORANGEFS_ATTR_SYS_MIRROR_COPIES_COUNT | \ + ORANGEFS_ATTR_SYS_DIRENT_COUNT | \ + ORANGEFS_ATTR_SYS_BLKSIZE) +#define ORANGEFS_XATTR_REPLACE 0x2 +#define ORANGEFS_XATTR_CREATE 0x1 +#define ORANGEFS_MAX_SERVER_ADDR_LEN 256 +#define ORANGEFS_NAME_MAX 256 /* * max extended attribute name len as imposed by the VFS and exploited for the * upcall request types. * NOTE: Please retain them as multiples of 8 even if you wish to change them * This is *NECESSARY* for supporting 32 bit user-space binaries on a 64-bit * kernel. Due to implementation within DBPF, this really needs to be - * PVFS_NAME_MAX, which it was the same value as, but no reason to let it + * ORANGEFS_NAME_MAX, which it was the same value as, but no reason to let it * break if that changes in the future. */ -#define PVFS_MAX_XATTR_NAMELEN PVFS_NAME_MAX /* Not the same as +#define ORANGEFS_MAX_XATTR_NAMELEN ORANGEFS_NAME_MAX /* Not the same as * XATTR_NAME_MAX defined * by */ -#define PVFS_MAX_XATTR_VALUELEN 8192 /* Not the same as XATTR_SIZE_MAX +#define ORANGEFS_MAX_XATTR_VALUELEN 8192 /* Not the same as XATTR_SIZE_MAX * defined by */ -#define PVFS_MAX_XATTR_LISTLEN 16 /* Not the same as XATTR_LIST_MAX +#define ORANGEFS_MAX_XATTR_LISTLEN 16 /* Not the same as XATTR_LIST_MAX * defined by */ /* - * PVFS I/O operation types, used in both system and server interfaces. + * ORANGEFS I/O operation types, used in both system and server interfaces. */ -enum PVFS_io_type { - PVFS_IO_READ = 1, - PVFS_IO_WRITE = 2 +enum ORANGEFS_io_type { + ORANGEFS_IO_READ = 1, + ORANGEFS_IO_WRITE = 2 }; /* @@ -241,21 +241,21 @@ enum PVFS_io_type { * batch and low threshold sizes may need to be modified to reflect this * change. */ -enum pvfs2_ds_type { - PVFS_TYPE_NONE = 0, - PVFS_TYPE_METAFILE = (1 << 0), - PVFS_TYPE_DATAFILE = (1 << 1), - PVFS_TYPE_DIRECTORY = (1 << 2), - PVFS_TYPE_SYMLINK = (1 << 3), - PVFS_TYPE_DIRDATA = (1 << 4), - PVFS_TYPE_INTERNAL = (1 << 5) /* for the server's private use */ +enum orangefs_ds_type { + ORANGEFS_TYPE_NONE = 0, + ORANGEFS_TYPE_METAFILE = (1 << 0), + ORANGEFS_TYPE_DATAFILE = (1 << 1), + ORANGEFS_TYPE_DIRECTORY = (1 << 2), + ORANGEFS_TYPE_SYMLINK = (1 << 3), + ORANGEFS_TYPE_DIRDATA = (1 << 4), + ORANGEFS_TYPE_INTERNAL = (1 << 5) /* for the server's private use */ }; /* - * PVFS_certificate simply stores a buffer with the buffer size. + * ORANGEFS_certificate simply stores a buffer with the buffer size. * The buffer can be converted to an OpenSSL X509 struct for use. */ -struct PVFS_certificate { +struct ORANGEFS_certificate { __u32 buf_size; unsigned char *buf; }; @@ -264,7 +264,7 @@ struct PVFS_certificate { * A credential identifies a user and is signed by the client/user * private key. */ -struct PVFS_credential { +struct ORANGEFS_credential { __u32 userid; /* user id */ __u32 num_groups; /* length of group_array */ __u32 *group_array; /* groups for which the user is a member */ @@ -272,25 +272,25 @@ struct PVFS_credential { __u64 timeout; /* seconds after epoch to time out */ __u32 sig_size; /* length of the signature in bytes */ unsigned char *signature; /* digital signature */ - struct PVFS_certificate certificate; /* user certificate buffer */ + struct ORANGEFS_certificate certificate; /* user certificate buffer */ }; -#define extra_size_PVFS_credential (PVFS_REQ_LIMIT_GROUPS * \ +#define extra_size_ORANGEFS_credential (ORANGEFS_REQ_LIMIT_GROUPS * \ sizeof(__u32) + \ - PVFS_REQ_LIMIT_ISSUER + \ - PVFS_REQ_LIMIT_SIGNATURE + \ - extra_size_PVFS_certificate) + ORANGEFS_REQ_LIMIT_ISSUER + \ + ORANGEFS_REQ_LIMIT_SIGNATURE + \ + extra_size_ORANGEFS_certificate) /* This structure is used by the VFS-client interaction alone */ -struct PVFS_keyval_pair { - char key[PVFS_MAX_XATTR_NAMELEN]; +struct ORANGEFS_keyval_pair { + char key[ORANGEFS_MAX_XATTR_NAMELEN]; __s32 key_sz; /* __s32 for portable, fixed-size structures */ __s32 val_sz; - char val[PVFS_MAX_XATTR_VALUELEN]; + char val[ORANGEFS_MAX_XATTR_VALUELEN]; }; /* pvfs2-sysint.h ***********************************************************/ /* Describes attributes for a file, directory, or symlink. */ -struct PVFS_sys_attr_s { +struct ORANGEFS_sys_attr_s { __u32 owner; __u32 group; __u32 perms; @@ -323,18 +323,18 @@ struct PVFS_sys_attr_s { char *dist_params; __s64 dirent_count; - enum pvfs2_ds_type objtype; + enum orangefs_ds_type objtype; __u64 flags; __u32 mask; __s64 blksize; }; -#define PVFS2_LOOKUP_LINK_NO_FOLLOW 0 -#define PVFS2_LOOKUP_LINK_FOLLOW 1 +#define ORANGEFS_LOOKUP_LINK_NO_FOLLOW 0 +#define ORANGEFS_LOOKUP_LINK_FOLLOW 1 /* pint-dev.h ***************************************************************/ -/* parameter structure used in PVFS_DEV_DEBUG ioctl command */ +/* parameter structure used in ORANGEFS_DEV_DEBUG ioctl command */ struct dev_mask_info_s { enum { KERNEL_MASK, @@ -349,7 +349,7 @@ struct dev_mask2_info_s { }; /* pvfs2-util.h *************************************************************/ -__s32 PVFS_util_translate_mode(int mode); +__s32 ORANGEFS_util_translate_mode(int mode); /* pvfs2-debug.h ************************************************************/ #include "pvfs2-debug.h" @@ -359,9 +359,9 @@ __s32 PVFS_util_translate_mode(int mode); #define lld(x) (long long)(x) /* pint-dev-shared.h ********************************************************/ -#define PVFS_DEV_MAGIC 'k' +#define ORANGEFS_DEV_MAGIC 'k' -#define PVFS2_READDIR_DEFAULT_DESC_COUNT 5 +#define ORANGEFS_READDIR_DEFAULT_DESC_COUNT 5 #define DEV_GET_MAGIC 0x1 #define DEV_GET_MAX_UPSIZE 0x2 @@ -376,39 +376,39 @@ __s32 PVFS_util_translate_mode(int mode); /* supported ioctls, codes are with respect to user-space */ enum { - PVFS_DEV_GET_MAGIC = _IOW(PVFS_DEV_MAGIC, DEV_GET_MAGIC, __s32), - PVFS_DEV_GET_MAX_UPSIZE = - _IOW(PVFS_DEV_MAGIC, DEV_GET_MAX_UPSIZE, __s32), - PVFS_DEV_GET_MAX_DOWNSIZE = - _IOW(PVFS_DEV_MAGIC, DEV_GET_MAX_DOWNSIZE, __s32), - PVFS_DEV_MAP = _IO(PVFS_DEV_MAGIC, DEV_MAP), - PVFS_DEV_REMOUNT_ALL = _IO(PVFS_DEV_MAGIC, DEV_REMOUNT_ALL), - PVFS_DEV_DEBUG = _IOR(PVFS_DEV_MAGIC, DEV_DEBUG, __s32), - PVFS_DEV_UPSTREAM = _IOW(PVFS_DEV_MAGIC, DEV_UPSTREAM, int), - PVFS_DEV_CLIENT_MASK = _IOW(PVFS_DEV_MAGIC, + ORANGEFS_DEV_GET_MAGIC = _IOW(ORANGEFS_DEV_MAGIC, DEV_GET_MAGIC, __s32), + ORANGEFS_DEV_GET_MAX_UPSIZE = + _IOW(ORANGEFS_DEV_MAGIC, DEV_GET_MAX_UPSIZE, __s32), + ORANGEFS_DEV_GET_MAX_DOWNSIZE = + _IOW(ORANGEFS_DEV_MAGIC, DEV_GET_MAX_DOWNSIZE, __s32), + ORANGEFS_DEV_MAP = _IO(ORANGEFS_DEV_MAGIC, DEV_MAP), + ORANGEFS_DEV_REMOUNT_ALL = _IO(ORANGEFS_DEV_MAGIC, DEV_REMOUNT_ALL), + ORANGEFS_DEV_DEBUG = _IOR(ORANGEFS_DEV_MAGIC, DEV_DEBUG, __s32), + ORANGEFS_DEV_UPSTREAM = _IOW(ORANGEFS_DEV_MAGIC, DEV_UPSTREAM, int), + ORANGEFS_DEV_CLIENT_MASK = _IOW(ORANGEFS_DEV_MAGIC, DEV_CLIENT_MASK, struct dev_mask2_info_s), - PVFS_DEV_CLIENT_STRING = _IOW(PVFS_DEV_MAGIC, + ORANGEFS_DEV_CLIENT_STRING = _IOW(ORANGEFS_DEV_MAGIC, DEV_CLIENT_STRING, char *), - PVFS_DEV_MAXNR = DEV_MAX_NR, + ORANGEFS_DEV_MAXNR = DEV_MAX_NR, }; /* * version number for use in communicating between kernel space and user * space. Zero signifies the upstream version of the kernel module. */ -#define PVFS_KERNEL_PROTO_VERSION 0 +#define ORANGEFS_KERNEL_PROTO_VERSION 0 /* - * describes memory regions to map in the PVFS_DEV_MAP ioctl. + * describes memory regions to map in the ORANGEFS_DEV_MAP ioctl. * NOTE: See devpvfs2-req.c for 32 bit compat structure. * Since this structure has a variable-sized layout that is different * on 32 and 64 bit platforms, we need to normalize to a 64 bit layout * on such systems before servicing ioctl calls from user-space binaries * that may be 32 bit! */ -struct PVFS_dev_map_desc { +struct ORANGEFS_dev_map_desc { void *ptr; __s32 total_size; __s32 size; diff --git a/fs/orangefs/pvfs2-bufmap.c b/fs/orangefs/pvfs2-bufmap.c index c7b0f3560734..345287e871b1 100644 --- a/fs/orangefs/pvfs2-bufmap.c +++ b/fs/orangefs/pvfs2-bufmap.c @@ -7,9 +7,9 @@ #include "pvfs2-kernel.h" #include "pvfs2-bufmap.h" -DECLARE_WAIT_QUEUE_HEAD(pvfs2_bufmap_init_waitq); +DECLARE_WAIT_QUEUE_HEAD(orangefs_bufmap_init_waitq); -static struct pvfs2_bufmap { +static struct orangefs_bufmap { atomic_t refcnt; int desc_size; @@ -19,21 +19,21 @@ static struct pvfs2_bufmap { int page_count; struct page **page_array; - struct pvfs_bufmap_desc *desc_array; + struct orangefs_bufmap_desc *desc_array; /* array to track usage of buffer descriptors */ int *buffer_index_array; spinlock_t buffer_index_lock; /* array to track usage of buffer descriptors for readdir */ - int readdir_index_array[PVFS2_READDIR_DEFAULT_DESC_COUNT]; + int readdir_index_array[ORANGEFS_READDIR_DEFAULT_DESC_COUNT]; spinlock_t readdir_index_lock; -} *__pvfs2_bufmap; +} *__orangefs_bufmap; -static DEFINE_SPINLOCK(pvfs2_bufmap_lock); +static DEFINE_SPINLOCK(orangefs_bufmap_lock); static void -pvfs2_bufmap_unmap(struct pvfs2_bufmap *bufmap) +orangefs_bufmap_unmap(struct orangefs_bufmap *bufmap) { int i; @@ -42,7 +42,7 @@ pvfs2_bufmap_unmap(struct pvfs2_bufmap *bufmap) } static void -pvfs2_bufmap_free(struct pvfs2_bufmap *bufmap) +orangefs_bufmap_free(struct orangefs_bufmap *bufmap) { kfree(bufmap->page_array); kfree(bufmap->desc_array); @@ -50,45 +50,45 @@ pvfs2_bufmap_free(struct pvfs2_bufmap *bufmap) kfree(bufmap); } -struct pvfs2_bufmap *pvfs2_bufmap_ref(void) +struct orangefs_bufmap *orangefs_bufmap_ref(void) { - struct pvfs2_bufmap *bufmap = NULL; + struct orangefs_bufmap *bufmap = NULL; - spin_lock(&pvfs2_bufmap_lock); - if (__pvfs2_bufmap) { - bufmap = __pvfs2_bufmap; + spin_lock(&orangefs_bufmap_lock); + if (__orangefs_bufmap) { + bufmap = __orangefs_bufmap; atomic_inc(&bufmap->refcnt); } - spin_unlock(&pvfs2_bufmap_lock); + spin_unlock(&orangefs_bufmap_lock); return bufmap; } -void pvfs2_bufmap_unref(struct pvfs2_bufmap *bufmap) +void orangefs_bufmap_unref(struct orangefs_bufmap *bufmap) { - if (atomic_dec_and_lock(&bufmap->refcnt, &pvfs2_bufmap_lock)) { - __pvfs2_bufmap = NULL; - spin_unlock(&pvfs2_bufmap_lock); + if (atomic_dec_and_lock(&bufmap->refcnt, &orangefs_bufmap_lock)) { + __orangefs_bufmap = NULL; + spin_unlock(&orangefs_bufmap_lock); - pvfs2_bufmap_unmap(bufmap); - pvfs2_bufmap_free(bufmap); + orangefs_bufmap_unmap(bufmap); + orangefs_bufmap_free(bufmap); } } -inline int pvfs_bufmap_size_query(void) +inline int orangefs_bufmap_size_query(void) { - struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref(); + struct orangefs_bufmap *bufmap = orangefs_bufmap_ref(); int size = bufmap ? bufmap->desc_size : 0; - pvfs2_bufmap_unref(bufmap); + orangefs_bufmap_unref(bufmap); return size; } -inline int pvfs_bufmap_shift_query(void) +inline int orangefs_bufmap_shift_query(void) { - struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref(); + struct orangefs_bufmap *bufmap = orangefs_bufmap_ref(); int shift = bufmap ? bufmap->desc_shift : 0; - pvfs2_bufmap_unref(bufmap); + orangefs_bufmap_unref(bufmap); return shift; } @@ -105,14 +105,14 @@ static DECLARE_WAIT_QUEUE_HEAD(readdir_waitq); */ int get_bufmap_init(void) { - return __pvfs2_bufmap ? 1 : 0; + return __orangefs_bufmap ? 1 : 0; } -static struct pvfs2_bufmap * -pvfs2_bufmap_alloc(struct PVFS_dev_map_desc *user_desc) +static struct orangefs_bufmap * +orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc *user_desc) { - struct pvfs2_bufmap *bufmap; + struct orangefs_bufmap *bufmap; bufmap = kzalloc(sizeof(*bufmap), GFP_KERNEL); if (!bufmap) @@ -128,17 +128,17 @@ pvfs2_bufmap_alloc(struct PVFS_dev_map_desc *user_desc) bufmap->buffer_index_array = kcalloc(bufmap->desc_count, sizeof(int), GFP_KERNEL); if (!bufmap->buffer_index_array) { - gossip_err("pvfs2: could not allocate %d buffer indices\n", + gossip_err("orangefs: could not allocate %d buffer indices\n", bufmap->desc_count); goto out_free_bufmap; } spin_lock_init(&bufmap->readdir_index_lock); bufmap->desc_array = - kcalloc(bufmap->desc_count, sizeof(struct pvfs_bufmap_desc), + kcalloc(bufmap->desc_count, sizeof(struct orangefs_bufmap_desc), GFP_KERNEL); if (!bufmap->desc_array) { - gossip_err("pvfs2: could not allocate %d descriptors\n", + gossip_err("orangefs: could not allocate %d descriptors\n", bufmap->desc_count); goto out_free_index_array; } @@ -164,8 +164,8 @@ out: } static int -pvfs2_bufmap_map(struct pvfs2_bufmap *bufmap, - struct PVFS_dev_map_desc *user_desc) +orangefs_bufmap_map(struct orangefs_bufmap *bufmap, + struct ORANGEFS_dev_map_desc *user_desc) { int pages_per_desc = bufmap->desc_size / PAGE_SIZE; int offset = 0, ret, i; @@ -178,7 +178,7 @@ pvfs2_bufmap_map(struct pvfs2_bufmap *bufmap, return ret; if (ret != bufmap->page_count) { - gossip_err("pvfs2 error: asked for %d pages, only got %d.\n", + gossip_err("orangefs error: asked for %d pages, only got %d.\n", bufmap->page_count, ret); for (i = 0; i < ret; i++) { @@ -210,19 +210,19 @@ pvfs2_bufmap_map(struct pvfs2_bufmap *bufmap, } /* - * pvfs_bufmap_initialize() + * orangefs_bufmap_initialize() * * initializes the mapped buffer interface * * returns 0 on success, -errno on failure */ -int pvfs_bufmap_initialize(struct PVFS_dev_map_desc *user_desc) +int orangefs_bufmap_initialize(struct ORANGEFS_dev_map_desc *user_desc) { - struct pvfs2_bufmap *bufmap; + struct orangefs_bufmap *bufmap; int ret = -EINVAL; gossip_debug(GOSSIP_BUFMAP_DEBUG, - "pvfs_bufmap_initialize: called (ptr (" + "orangefs_bufmap_initialize: called (ptr (" "%p) sz (%d) cnt(%d).\n", user_desc->ptr, user_desc->size, @@ -234,21 +234,21 @@ int pvfs_bufmap_initialize(struct PVFS_dev_map_desc *user_desc) */ if (PAGE_ALIGN((unsigned long)user_desc->ptr) != (unsigned long)user_desc->ptr) { - gossip_err("pvfs2 error: memory alignment (front). %p\n", + gossip_err("orangefs error: memory alignment (front). %p\n", user_desc->ptr); goto out; } if (PAGE_ALIGN(((unsigned long)user_desc->ptr + user_desc->total_size)) != (unsigned long)(user_desc->ptr + user_desc->total_size)) { - gossip_err("pvfs2 error: memory alignment (back).(%p + %d)\n", + gossip_err("orangefs error: memory alignment (back).(%p + %d)\n", user_desc->ptr, user_desc->total_size); goto out; } if (user_desc->total_size != (user_desc->size * user_desc->count)) { - gossip_err("pvfs2 error: user provided an oddly sized buffer: (%d, %d, %d)\n", + gossip_err("orangefs error: user provided an oddly sized buffer: (%d, %d, %d)\n", user_desc->total_size, user_desc->size, user_desc->count); @@ -256,33 +256,33 @@ int pvfs_bufmap_initialize(struct PVFS_dev_map_desc *user_desc) } if ((user_desc->size % PAGE_SIZE) != 0) { - gossip_err("pvfs2 error: bufmap size not page size divisible (%d).\n", + gossip_err("orangefs error: bufmap size not page size divisible (%d).\n", user_desc->size); goto out; } ret = -ENOMEM; - bufmap = pvfs2_bufmap_alloc(user_desc); + bufmap = orangefs_bufmap_alloc(user_desc); if (!bufmap) goto out; - ret = pvfs2_bufmap_map(bufmap, user_desc); + ret = orangefs_bufmap_map(bufmap, user_desc); if (ret) goto out_free_bufmap; - spin_lock(&pvfs2_bufmap_lock); - if (__pvfs2_bufmap) { - spin_unlock(&pvfs2_bufmap_lock); - gossip_err("pvfs2: error: bufmap already initialized.\n"); + spin_lock(&orangefs_bufmap_lock); + if (__orangefs_bufmap) { + spin_unlock(&orangefs_bufmap_lock); + gossip_err("orangefs: error: bufmap already initialized.\n"); ret = -EALREADY; goto out_unmap_bufmap; } - __pvfs2_bufmap = bufmap; - spin_unlock(&pvfs2_bufmap_lock); + __orangefs_bufmap = bufmap; + spin_unlock(&orangefs_bufmap_lock); /* - * If there are operations in pvfs2_bufmap_init_waitq, wake them up. + * If there are operations in orangefs_bufmap_init_waitq, wake them up. * This scenario occurs when the client-core is restarted and I/O * requests in the in-progress or waiting tables are restarted. I/O * requests cannot be restarted until the shared memory system is @@ -291,35 +291,35 @@ int pvfs_bufmap_initialize(struct PVFS_dev_map_desc *user_desc) * are also on a timer, so they don't wait forever just in case the * client-core doesn't come back up. */ - wake_up_interruptible(&pvfs2_bufmap_init_waitq); + wake_up_interruptible(&orangefs_bufmap_init_waitq); gossip_debug(GOSSIP_BUFMAP_DEBUG, - "pvfs_bufmap_initialize: exiting normally\n"); + "orangefs_bufmap_initialize: exiting normally\n"); return 0; out_unmap_bufmap: - pvfs2_bufmap_unmap(bufmap); + orangefs_bufmap_unmap(bufmap); out_free_bufmap: - pvfs2_bufmap_free(bufmap); + orangefs_bufmap_free(bufmap); out: return ret; } /* - * pvfs_bufmap_finalize() + * orangefs_bufmap_finalize() * * shuts down the mapped buffer interface and releases any resources * associated with it * * no return value */ -void pvfs_bufmap_finalize(void) +void orangefs_bufmap_finalize(void) { - gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs2_bufmap_finalize: called\n"); - BUG_ON(!__pvfs2_bufmap); - pvfs2_bufmap_unref(__pvfs2_bufmap); + gossip_debug(GOSSIP_BUFMAP_DEBUG, "orangefs_bufmap_finalize: called\n"); + BUG_ON(!__orangefs_bufmap); + orangefs_bufmap_unref(__orangefs_bufmap); gossip_debug(GOSSIP_BUFMAP_DEBUG, - "pvfs2_bufmap_finalize: exiting normally\n"); + "orangefs_bufmap_finalize: exiting normally\n"); } struct slot_args { @@ -377,7 +377,7 @@ static int wait_for_a_slot(struct slot_args *slargs, int *buffer_index) continue; } - gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs2: %s interrupted.\n", + gossip_debug(GOSSIP_BUFMAP_DEBUG, "orangefs: %s interrupted.\n", __func__); ret = -EINTR; break; @@ -406,21 +406,21 @@ static void put_back_slot(struct slot_args *slargs, int buffer_index) } /* - * pvfs_bufmap_get() + * orangefs_bufmap_get() * * gets a free mapped buffer descriptor, will sleep until one becomes * available if necessary * * returns 0 on success, -errno on failure */ -int pvfs_bufmap_get(struct pvfs2_bufmap **mapp, int *buffer_index) +int orangefs_bufmap_get(struct orangefs_bufmap **mapp, int *buffer_index) { - struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref(); + struct orangefs_bufmap *bufmap = orangefs_bufmap_ref(); struct slot_args slargs; int ret; if (!bufmap) { - gossip_err("pvfs2: please confirm that pvfs2-client daemon is running.\n"); + gossip_err("orangefs: please confirm that pvfs2-client daemon is running.\n"); return -EIO; } @@ -430,19 +430,19 @@ int pvfs_bufmap_get(struct pvfs2_bufmap **mapp, int *buffer_index) slargs.slot_wq = &bufmap_waitq; ret = wait_for_a_slot(&slargs, buffer_index); if (ret) - pvfs2_bufmap_unref(bufmap); + orangefs_bufmap_unref(bufmap); *mapp = bufmap; return ret; } /* - * pvfs_bufmap_put() + * orangefs_bufmap_put() * * returns a mapped buffer descriptor to the collection * * no return value */ -void pvfs_bufmap_put(struct pvfs2_bufmap *bufmap, int buffer_index) +void orangefs_bufmap_put(struct orangefs_bufmap *bufmap, int buffer_index) { struct slot_args slargs; @@ -451,7 +451,7 @@ void pvfs_bufmap_put(struct pvfs2_bufmap *bufmap, int buffer_index) slargs.slot_lock = &bufmap->buffer_index_lock; slargs.slot_wq = &bufmap_waitq; put_back_slot(&slargs, buffer_index); - pvfs2_bufmap_unref(bufmap); + orangefs_bufmap_unref(bufmap); } /* @@ -465,46 +465,46 @@ void pvfs_bufmap_put(struct pvfs2_bufmap *bufmap, int buffer_index) * * returns 0 on success, -errno on failure */ -int readdir_index_get(struct pvfs2_bufmap **mapp, int *buffer_index) +int readdir_index_get(struct orangefs_bufmap **mapp, int *buffer_index) { - struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref(); + struct orangefs_bufmap *bufmap = orangefs_bufmap_ref(); struct slot_args slargs; int ret; if (!bufmap) { - gossip_err("pvfs2: please confirm that pvfs2-client daemon is running.\n"); + gossip_err("orangefs: please confirm that pvfs2-client daemon is running.\n"); return -EIO; } - slargs.slot_count = PVFS2_READDIR_DEFAULT_DESC_COUNT; + slargs.slot_count = ORANGEFS_READDIR_DEFAULT_DESC_COUNT; slargs.slot_array = bufmap->readdir_index_array; slargs.slot_lock = &bufmap->readdir_index_lock; slargs.slot_wq = &readdir_waitq; ret = wait_for_a_slot(&slargs, buffer_index); if (ret) - pvfs2_bufmap_unref(bufmap); + orangefs_bufmap_unref(bufmap); *mapp = bufmap; return ret; } -void readdir_index_put(struct pvfs2_bufmap *bufmap, int buffer_index) +void readdir_index_put(struct orangefs_bufmap *bufmap, int buffer_index) { struct slot_args slargs; - slargs.slot_count = PVFS2_READDIR_DEFAULT_DESC_COUNT; + slargs.slot_count = ORANGEFS_READDIR_DEFAULT_DESC_COUNT; slargs.slot_array = bufmap->readdir_index_array; slargs.slot_lock = &bufmap->readdir_index_lock; slargs.slot_wq = &readdir_waitq; put_back_slot(&slargs, buffer_index); - pvfs2_bufmap_unref(bufmap); + orangefs_bufmap_unref(bufmap); } -int pvfs_bufmap_copy_from_iovec(struct pvfs2_bufmap *bufmap, +int orangefs_bufmap_copy_from_iovec(struct orangefs_bufmap *bufmap, struct iov_iter *iter, int buffer_index, size_t size) { - struct pvfs_bufmap_desc *to = &bufmap->desc_array[buffer_index]; + struct orangefs_bufmap_desc *to = &bufmap->desc_array[buffer_index]; int i; gossip_debug(GOSSIP_BUFMAP_DEBUG, @@ -531,12 +531,12 @@ int pvfs_bufmap_copy_from_iovec(struct pvfs2_bufmap *bufmap, * a file being read. * */ -int pvfs_bufmap_copy_to_iovec(struct pvfs2_bufmap *bufmap, +int orangefs_bufmap_copy_to_iovec(struct orangefs_bufmap *bufmap, struct iov_iter *iter, int buffer_index, size_t size) { - struct pvfs_bufmap_desc *from = &bufmap->desc_array[buffer_index]; + struct orangefs_bufmap_desc *from = &bufmap->desc_array[buffer_index]; int i; gossip_debug(GOSSIP_BUFMAP_DEBUG, diff --git a/fs/orangefs/pvfs2-bufmap.h b/fs/orangefs/pvfs2-bufmap.h index d1aedb52a877..91d1755c231a 100644 --- a/fs/orangefs/pvfs2-bufmap.h +++ b/fs/orangefs/pvfs2-bufmap.h @@ -4,59 +4,59 @@ * See COPYING in top-level directory. */ -#ifndef __PVFS2_BUFMAP_H -#define __PVFS2_BUFMAP_H +#ifndef __ORANGEFS_BUFMAP_H +#define __ORANGEFS_BUFMAP_H /* used to describe mapped buffers */ -struct pvfs_bufmap_desc { +struct orangefs_bufmap_desc { void *uaddr; /* user space address pointer */ struct page **page_array; /* array of mapped pages */ int array_count; /* size of above arrays */ struct list_head list_link; }; -struct pvfs2_bufmap; +struct orangefs_bufmap; -struct pvfs2_bufmap *pvfs2_bufmap_ref(void); -void pvfs2_bufmap_unref(struct pvfs2_bufmap *bufmap); +struct orangefs_bufmap *orangefs_bufmap_ref(void); +void orangefs_bufmap_unref(struct orangefs_bufmap *bufmap); /* - * pvfs_bufmap_size_query is now an inline function because buffer + * orangefs_bufmap_size_query is now an inline function because buffer * sizes are not hardcoded */ -int pvfs_bufmap_size_query(void); +int orangefs_bufmap_size_query(void); -int pvfs_bufmap_shift_query(void); +int orangefs_bufmap_shift_query(void); -int pvfs_bufmap_initialize(struct PVFS_dev_map_desc *user_desc); +int orangefs_bufmap_initialize(struct ORANGEFS_dev_map_desc *user_desc); int get_bufmap_init(void); -void pvfs_bufmap_finalize(void); +void orangefs_bufmap_finalize(void); -int pvfs_bufmap_get(struct pvfs2_bufmap **mapp, int *buffer_index); +int orangefs_bufmap_get(struct orangefs_bufmap **mapp, int *buffer_index); -void pvfs_bufmap_put(struct pvfs2_bufmap *bufmap, int buffer_index); +void orangefs_bufmap_put(struct orangefs_bufmap *bufmap, int buffer_index); -int readdir_index_get(struct pvfs2_bufmap **mapp, int *buffer_index); +int readdir_index_get(struct orangefs_bufmap **mapp, int *buffer_index); -void readdir_index_put(struct pvfs2_bufmap *bufmap, int buffer_index); +void readdir_index_put(struct orangefs_bufmap *bufmap, int buffer_index); -int pvfs_bufmap_copy_from_iovec(struct pvfs2_bufmap *bufmap, +int orangefs_bufmap_copy_from_iovec(struct orangefs_bufmap *bufmap, struct iov_iter *iter, int buffer_index, size_t size); -int pvfs_bufmap_copy_to_iovec(struct pvfs2_bufmap *bufmap, +int orangefs_bufmap_copy_to_iovec(struct orangefs_bufmap *bufmap, struct iov_iter *iter, int buffer_index, size_t size); -size_t pvfs_bufmap_copy_to_user_task_iovec(struct task_struct *tsk, +size_t orangefs_bufmap_copy_to_user_task_iovec(struct task_struct *tsk, struct iovec *iovec, unsigned long nr_segs, - struct pvfs2_bufmap *bufmap, + struct orangefs_bufmap *bufmap, int buffer_index, size_t bytes_to_be_copied); -#endif /* __PVFS2_BUFMAP_H */ +#endif /* __ORANGEFS_BUFMAP_H */ diff --git a/fs/orangefs/pvfs2-cache.c b/fs/orangefs/pvfs2-cache.c index f982616a4349..a224831770f4 100644 --- a/fs/orangefs/pvfs2-cache.c +++ b/fs/orangefs/pvfs2-cache.c @@ -11,27 +11,27 @@ static __u64 next_tag_value; static DEFINE_SPINLOCK(next_tag_value_lock); -/* the pvfs2 memory caches */ +/* the orangefs memory caches */ -/* a cache for pvfs2 upcall/downcall operations */ +/* a cache for orangefs upcall/downcall operations */ static struct kmem_cache *op_cache; /* a cache for device (/dev/pvfs2-req) communication */ static struct kmem_cache *dev_req_cache; -/* a cache for pvfs2_kiocb objects (i.e pvfs2 iocb structures ) */ -static struct kmem_cache *pvfs2_kiocb_cache; +/* a cache for orangefs_kiocb objects (i.e orangefs iocb structures ) */ +static struct kmem_cache *orangefs_kiocb_cache; int op_cache_initialize(void) { - op_cache = kmem_cache_create("pvfs2_op_cache", - sizeof(struct pvfs2_kernel_op_s), + op_cache = kmem_cache_create("orangefs_op_cache", + sizeof(struct orangefs_kernel_op_s), 0, - PVFS2_CACHE_CREATE_FLAGS, + ORANGEFS_CACHE_CREATE_FLAGS, NULL); if (!op_cache) { - gossip_err("Cannot create pvfs2_op_cache\n"); + gossip_err("Cannot create orangefs_op_cache\n"); return -ENOMEM; } @@ -48,72 +48,72 @@ int op_cache_finalize(void) return 0; } -char *get_opname_string(struct pvfs2_kernel_op_s *new_op) +char *get_opname_string(struct orangefs_kernel_op_s *new_op) { if (new_op) { __s32 type = new_op->upcall.type; - if (type == PVFS2_VFS_OP_FILE_IO) + if (type == ORANGEFS_VFS_OP_FILE_IO) return "OP_FILE_IO"; - else if (type == PVFS2_VFS_OP_LOOKUP) + else if (type == ORANGEFS_VFS_OP_LOOKUP) return "OP_LOOKUP"; - else if (type == PVFS2_VFS_OP_CREATE) + else if (type == ORANGEFS_VFS_OP_CREATE) return "OP_CREATE"; - else if (type == PVFS2_VFS_OP_GETATTR) + else if (type == ORANGEFS_VFS_OP_GETATTR) return "OP_GETATTR"; - else if (type == PVFS2_VFS_OP_REMOVE) + else if (type == ORANGEFS_VFS_OP_REMOVE) return "OP_REMOVE"; - else if (type == PVFS2_VFS_OP_MKDIR) + else if (type == ORANGEFS_VFS_OP_MKDIR) return "OP_MKDIR"; - else if (type == PVFS2_VFS_OP_READDIR) + else if (type == ORANGEFS_VFS_OP_READDIR) return "OP_READDIR"; - else if (type == PVFS2_VFS_OP_READDIRPLUS) + else if (type == ORANGEFS_VFS_OP_READDIRPLUS) return "OP_READDIRPLUS"; - else if (type == PVFS2_VFS_OP_SETATTR) + else if (type == ORANGEFS_VFS_OP_SETATTR) return "OP_SETATTR"; - else if (type == PVFS2_VFS_OP_SYMLINK) + else if (type == ORANGEFS_VFS_OP_SYMLINK) return "OP_SYMLINK"; - else if (type == PVFS2_VFS_OP_RENAME) + else if (type == ORANGEFS_VFS_OP_RENAME) return "OP_RENAME"; - else if (type == PVFS2_VFS_OP_STATFS) + else if (type == ORANGEFS_VFS_OP_STATFS) return "OP_STATFS"; - else if (type == PVFS2_VFS_OP_TRUNCATE) + else if (type == ORANGEFS_VFS_OP_TRUNCATE) return "OP_TRUNCATE"; - else if (type == PVFS2_VFS_OP_MMAP_RA_FLUSH) + else if (type == ORANGEFS_VFS_OP_MMAP_RA_FLUSH) return "OP_MMAP_RA_FLUSH"; - else if (type == PVFS2_VFS_OP_FS_MOUNT) + else if (type == ORANGEFS_VFS_OP_FS_MOUNT) return "OP_FS_MOUNT"; - else if (type == PVFS2_VFS_OP_FS_UMOUNT) + else if (type == ORANGEFS_VFS_OP_FS_UMOUNT) return "OP_FS_UMOUNT"; - else if (type == PVFS2_VFS_OP_GETXATTR) + else if (type == ORANGEFS_VFS_OP_GETXATTR) return "OP_GETXATTR"; - else if (type == PVFS2_VFS_OP_SETXATTR) + else if (type == ORANGEFS_VFS_OP_SETXATTR) return "OP_SETXATTR"; - else if (type == PVFS2_VFS_OP_LISTXATTR) + else if (type == ORANGEFS_VFS_OP_LISTXATTR) return "OP_LISTXATTR"; - else if (type == PVFS2_VFS_OP_REMOVEXATTR) + else if (type == ORANGEFS_VFS_OP_REMOVEXATTR) return "OP_REMOVEXATTR"; - else if (type == PVFS2_VFS_OP_PARAM) + else if (type == ORANGEFS_VFS_OP_PARAM) return "OP_PARAM"; - else if (type == PVFS2_VFS_OP_PERF_COUNT) + else if (type == ORANGEFS_VFS_OP_PERF_COUNT) return "OP_PERF_COUNT"; - else if (type == PVFS2_VFS_OP_CANCEL) + else if (type == ORANGEFS_VFS_OP_CANCEL) return "OP_CANCEL"; - else if (type == PVFS2_VFS_OP_FSYNC) + else if (type == ORANGEFS_VFS_OP_FSYNC) return "OP_FSYNC"; - else if (type == PVFS2_VFS_OP_FSKEY) + else if (type == ORANGEFS_VFS_OP_FSKEY) return "OP_FSKEY"; } return "OP_UNKNOWN?"; } -struct pvfs2_kernel_op_s *op_alloc(__s32 type) +struct orangefs_kernel_op_s *op_alloc(__s32 type) { - struct pvfs2_kernel_op_s *new_op = NULL; + struct orangefs_kernel_op_s *new_op = NULL; - new_op = kmem_cache_alloc(op_cache, PVFS2_CACHE_ALLOC_FLAGS); + new_op = kmem_cache_alloc(op_cache, ORANGEFS_CACHE_ALLOC_FLAGS); if (new_op) { - memset(new_op, 0, sizeof(struct pvfs2_kernel_op_s)); + memset(new_op, 0, sizeof(struct orangefs_kernel_op_s)); INIT_LIST_HEAD(&new_op->list); spin_lock_init(&new_op->lock); @@ -122,7 +122,7 @@ struct pvfs2_kernel_op_s *op_alloc(__s32 type) init_waitqueue_head(&new_op->io_completion_waitq); atomic_set(&new_op->aio_ref_count, 0); - pvfs2_op_initialize(new_op); + orangefs_op_initialize(new_op); /* initialize the op specific tag and upcall credentials */ spin_lock(&next_tag_value_lock); @@ -149,15 +149,15 @@ struct pvfs2_kernel_op_s *op_alloc(__s32 type) return new_op; } -void op_release(struct pvfs2_kernel_op_s *pvfs2_op) +void op_release(struct orangefs_kernel_op_s *orangefs_op) { - if (pvfs2_op) { + if (orangefs_op) { gossip_debug(GOSSIP_CACHE_DEBUG, "Releasing OP (%p: %llu)\n", - pvfs2_op, - llu(pvfs2_op->tag)); - pvfs2_op_initialize(pvfs2_op); - kmem_cache_free(op_cache, pvfs2_op); + orangefs_op, + llu(orangefs_op->tag)); + orangefs_op_initialize(orangefs_op); + kmem_cache_free(op_cache, orangefs_op); } else { gossip_err("NULL pointer in op_release\n"); } @@ -165,14 +165,14 @@ void op_release(struct pvfs2_kernel_op_s *pvfs2_op) int dev_req_cache_initialize(void) { - dev_req_cache = kmem_cache_create("pvfs2_devreqcache", + dev_req_cache = kmem_cache_create("orangefs_devreqcache", MAX_ALIGNED_DEV_REQ_DOWNSIZE, 0, - PVFS2_CACHE_CREATE_FLAGS, + ORANGEFS_CACHE_CREATE_FLAGS, NULL); if (!dev_req_cache) { - gossip_err("Cannot create pvfs2_dev_req_cache\n"); + gossip_err("Cannot create orangefs_dev_req_cache\n"); return -ENOMEM; } return 0; @@ -188,7 +188,7 @@ void *dev_req_alloc(void) { void *buffer; - buffer = kmem_cache_alloc(dev_req_cache, PVFS2_CACHE_ALLOC_FLAGS); + buffer = kmem_cache_alloc(dev_req_cache, ORANGEFS_CACHE_ALLOC_FLAGS); if (buffer == NULL) gossip_err("Failed to allocate from dev_req_cache\n"); else @@ -206,14 +206,14 @@ void dev_req_release(void *buffer) int kiocb_cache_initialize(void) { - pvfs2_kiocb_cache = kmem_cache_create("pvfs2_kiocbcache", - sizeof(struct pvfs2_kiocb_s), + orangefs_kiocb_cache = kmem_cache_create("orangefs_kiocbcache", + sizeof(struct orangefs_kiocb_s), 0, - PVFS2_CACHE_CREATE_FLAGS, + ORANGEFS_CACHE_CREATE_FLAGS, NULL); - if (!pvfs2_kiocb_cache) { - gossip_err("Cannot create pvfs2_kiocb_cache!\n"); + if (!orangefs_kiocb_cache) { + gossip_err("Cannot create orangefs_kiocb_cache!\n"); return -ENOMEM; } return 0; @@ -221,26 +221,26 @@ int kiocb_cache_initialize(void) int kiocb_cache_finalize(void) { - kmem_cache_destroy(pvfs2_kiocb_cache); + kmem_cache_destroy(orangefs_kiocb_cache); return 0; } -struct pvfs2_kiocb_s *kiocb_alloc(void) +struct orangefs_kiocb_s *kiocb_alloc(void) { - struct pvfs2_kiocb_s *x = NULL; + struct orangefs_kiocb_s *x = NULL; - x = kmem_cache_alloc(pvfs2_kiocb_cache, PVFS2_CACHE_ALLOC_FLAGS); + x = kmem_cache_alloc(orangefs_kiocb_cache, ORANGEFS_CACHE_ALLOC_FLAGS); if (x == NULL) gossip_err("kiocb_alloc: kmem_cache_alloc failed!\n"); else - memset(x, 0, sizeof(struct pvfs2_kiocb_s)); + memset(x, 0, sizeof(struct orangefs_kiocb_s)); return x; } -void kiocb_release(struct pvfs2_kiocb_s *x) +void kiocb_release(struct orangefs_kiocb_s *x) { if (x) - kmem_cache_free(pvfs2_kiocb_cache, x); + kmem_cache_free(orangefs_kiocb_cache, x); else gossip_err("kiocb_release: kmem_cache_free NULL pointer!\n"); } diff --git a/fs/orangefs/pvfs2-debug.h b/fs/orangefs/pvfs2-debug.h index fd71d6c84cf6..e6b4baa5e8fb 100644 --- a/fs/orangefs/pvfs2-debug.h +++ b/fs/orangefs/pvfs2-debug.h @@ -5,12 +5,12 @@ */ /* This file just defines debugging masks to be used with the gossip - * logging utility. All debugging masks for PVFS2 are kept here to make + * logging utility. All debugging masks for ORANGEFS are kept here to make * sure we don't have collisions. */ -#ifndef __PVFS2_DEBUG_H -#define __PVFS2_DEBUG_H +#ifndef __ORANGEFS_DEBUG_H +#define __ORANGEFS_DEBUG_H #ifdef __KERNEL__ #include @@ -90,7 +90,7 @@ GOSSIP_BMI_DEBUG_MX + \ GOSSIP_BMI_DEBUG_PORTALS)) -const char *PVFS_debug_get_next_debug_keyword(int position); +const char *ORANGEFS_debug_get_next_debug_keyword(int position); #define GOSSIP_SUPER_DEBUG ((__u64)1 << 0) #define GOSSIP_INODE_DEBUG ((__u64)1 << 1) @@ -113,10 +113,10 @@ const char *PVFS_debug_get_next_debug_keyword(int position); #define GOSSIP_MAX_DEBUG (((__u64)1 << GOSSIP_MAX_NR) - 1) /*function prototypes*/ -__u64 PVFS_kmod_eventlog_to_mask(const char *event_logging); -__u64 PVFS_debug_eventlog_to_mask(const char *event_logging); -char *PVFS_debug_mask_to_eventlog(__u64 mask); -char *PVFS_kmod_mask_to_eventlog(__u64 mask); +__u64 ORANGEFS_kmod_eventlog_to_mask(const char *event_logging); +__u64 ORANGEFS_debug_eventlog_to_mask(const char *event_logging); +char *ORANGEFS_debug_mask_to_eventlog(__u64 mask); +char *ORANGEFS_kmod_mask_to_eventlog(__u64 mask); /* a private internal type */ struct __keyword_mask_s { @@ -289,4 +289,4 @@ static const int num_kmod_keyword_mask_map = (int) static const int num_keyword_mask_map = (int) (sizeof(s_keyword_mask_map) / sizeof(struct __keyword_mask_s)); -#endif /* __PVFS2_DEBUG_H */ +#endif /* __ORANGEFS_DEBUG_H */ diff --git a/fs/orangefs/pvfs2-debugfs.c b/fs/orangefs/pvfs2-debugfs.c index ba5bfef7a3f3..315dc538b723 100644 --- a/fs/orangefs/pvfs2-debugfs.c +++ b/fs/orangefs/pvfs2-debugfs.c @@ -95,7 +95,7 @@ static const struct file_operations kernel_debug_fops = { * initialize kmod debug operations, create orangefs debugfs dir and * ORANGEFS_KMOD_DEBUG_HELP_FILE. */ -int pvfs2_debugfs_init(void) +int orangefs_debugfs_init(void) { int rc = -ENOMEM; @@ -117,12 +117,12 @@ int pvfs2_debugfs_init(void) out: if (rc) - pvfs2_debugfs_cleanup(); + orangefs_debugfs_cleanup(); return rc; } -void pvfs2_debugfs_cleanup(void) +void orangefs_debugfs_cleanup(void) { debugfs_remove_recursive(debug_dir); } @@ -196,7 +196,7 @@ static int help_show(struct seq_file *m, void *v) /* * initialize the kernel-debug file. */ -int pvfs2_kernel_debug_init(void) +int orangefs_kernel_debug_init(void) { int rc = -ENOMEM; @@ -205,11 +205,11 @@ int pvfs2_kernel_debug_init(void) gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: start\n", __func__); - k_buffer = kzalloc(PVFS2_MAX_DEBUG_STRING_LEN, GFP_KERNEL); + k_buffer = kzalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL); if (!k_buffer) goto out; - if (strlen(kernel_debug_string) + 1 < PVFS2_MAX_DEBUG_STRING_LEN) { + if (strlen(kernel_debug_string) + 1 < ORANGEFS_MAX_DEBUG_STRING_LEN) { strcpy(k_buffer, kernel_debug_string); strcat(k_buffer, "\n"); } else { @@ -233,7 +233,7 @@ int pvfs2_kernel_debug_init(void) out: if (rc) - pvfs2_debugfs_cleanup(); + orangefs_debugfs_cleanup(); gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: rc:%d:\n", __func__, rc); return rc; @@ -242,7 +242,7 @@ out: /* * initialize the client-debug file. */ -int pvfs2_client_debug_init(void) +int orangefs_client_debug_init(void) { int rc = -ENOMEM; @@ -250,11 +250,11 @@ int pvfs2_client_debug_init(void) gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: start\n", __func__); - c_buffer = kzalloc(PVFS2_MAX_DEBUG_STRING_LEN, GFP_KERNEL); + c_buffer = kzalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL); if (!c_buffer) goto out; - if (strlen(client_debug_string) + 1 < PVFS2_MAX_DEBUG_STRING_LEN) { + if (strlen(client_debug_string) + 1 < ORANGEFS_MAX_DEBUG_STRING_LEN) { strcpy(c_buffer, client_debug_string); strcat(c_buffer, "\n"); } else { @@ -278,7 +278,7 @@ int pvfs2_client_debug_init(void) out: if (rc) - pvfs2_debugfs_cleanup(); + orangefs_debugfs_cleanup(); gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: rc:%d:\n", __func__, rc); return rc; @@ -320,7 +320,7 @@ static ssize_t orangefs_debug_read(struct file *file, gossip_debug(GOSSIP_DEBUGFS_DEBUG, "orangefs_debug_read: start\n"); - buf = kmalloc(PVFS2_MAX_DEBUG_STRING_LEN, GFP_KERNEL); + buf = kmalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL); if (!buf) goto out; @@ -349,7 +349,7 @@ static ssize_t orangefs_debug_write(struct file *file, int rc = -EFAULT; size_t silly = 0; char *debug_string; - struct pvfs2_kernel_op_s *new_op = NULL; + struct orangefs_kernel_op_s *new_op = NULL; struct client_debug_mask c_mask = { NULL, 0, 0 }; gossip_debug(GOSSIP_DEBUGFS_DEBUG, @@ -360,15 +360,15 @@ static ssize_t orangefs_debug_write(struct file *file, * Thwart users who try to jamb a ridiculous number * of bytes into the debug file... */ - if (count > PVFS2_MAX_DEBUG_STRING_LEN + 1) { + if (count > ORANGEFS_MAX_DEBUG_STRING_LEN + 1) { silly = count; - count = PVFS2_MAX_DEBUG_STRING_LEN + 1; + count = ORANGEFS_MAX_DEBUG_STRING_LEN + 1; } - buf = kmalloc(PVFS2_MAX_DEBUG_STRING_LEN, GFP_KERNEL); + buf = kmalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL); if (!buf) goto out; - memset(buf, 0, PVFS2_MAX_DEBUG_STRING_LEN); + memset(buf, 0, ORANGEFS_MAX_DEBUG_STRING_LEN); if (copy_from_user(buf, ubuf, count - 1)) { gossip_debug(GOSSIP_DEBUGFS_DEBUG, @@ -407,18 +407,18 @@ static ssize_t orangefs_debug_write(struct file *file, debug_mask_to_string(&c_mask, 1); debug_string = client_debug_string; - new_op = op_alloc(PVFS2_VFS_OP_PARAM); + new_op = op_alloc(ORANGEFS_VFS_OP_PARAM); if (!new_op) { pr_info("%s: op_alloc failed!\n", __func__); goto out; } new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_TWO_MASK_VALUES; - new_op->upcall.req.param.type = PVFS2_PARAM_REQUEST_SET; + ORANGEFS_PARAM_REQUEST_OP_TWO_MASK_VALUES; + new_op->upcall.req.param.type = ORANGEFS_PARAM_REQUEST_SET; memset(new_op->upcall.req.param.s_value, 0, - PVFS2_MAX_DEBUG_STRING_LEN); + ORANGEFS_MAX_DEBUG_STRING_LEN); sprintf(new_op->upcall.req.param.s_value, "%llx %llx\n", c_mask.mask1, @@ -426,8 +426,8 @@ static ssize_t orangefs_debug_write(struct file *file, /* service_operation returns 0 on success... */ rc = service_operation(new_op, - "pvfs2_param", - PVFS2_OP_INTERRUPTIBLE); + "orangefs_param", + ORANGEFS_OP_INTERRUPTIBLE); if (rc) gossip_debug(GOSSIP_DEBUGFS_DEBUG, @@ -439,7 +439,7 @@ static ssize_t orangefs_debug_write(struct file *file, } mutex_lock(&orangefs_debug_lock); - memset(file->f_inode->i_private, 0, PVFS2_MAX_DEBUG_STRING_LEN); + memset(file->f_inode->i_private, 0, ORANGEFS_MAX_DEBUG_STRING_LEN); sprintf((char *)file->f_inode->i_private, "%s\n", debug_string); mutex_unlock(&orangefs_debug_lock); diff --git a/fs/orangefs/pvfs2-debugfs.h b/fs/orangefs/pvfs2-debugfs.h index a66b7d08c14d..e4828c0e3ef9 100644 --- a/fs/orangefs/pvfs2-debugfs.h +++ b/fs/orangefs/pvfs2-debugfs.h @@ -1,3 +1,3 @@ -int pvfs2_debugfs_init(void); -int pvfs2_kernel_debug_init(void); -void pvfs2_debugfs_cleanup(void); +int orangefs_debugfs_init(void); +int orangefs_kernel_debug_init(void); +void orangefs_debugfs_cleanup(void); diff --git a/fs/orangefs/pvfs2-dev-proto.h b/fs/orangefs/pvfs2-dev-proto.h index 71ab56df4ad7..dc1951dd7045 100644 --- a/fs/orangefs/pvfs2-dev-proto.h +++ b/fs/orangefs/pvfs2-dev-proto.h @@ -4,8 +4,8 @@ * See COPYING in top-level directory. */ -#ifndef _PVFS2_DEV_PROTO_H -#define _PVFS2_DEV_PROTO_H +#ifndef _ORANGEFS_DEV_PROTO_H +#define _ORANGEFS_DEV_PROTO_H /* * types and constants shared between user space and kernel space for @@ -13,46 +13,46 @@ */ /* - * valid pvfs2 kernel operation types + * valid orangefs kernel operation types */ -#define PVFS2_VFS_OP_INVALID 0xFF000000 -#define PVFS2_VFS_OP_FILE_IO 0xFF000001 -#define PVFS2_VFS_OP_LOOKUP 0xFF000002 -#define PVFS2_VFS_OP_CREATE 0xFF000003 -#define PVFS2_VFS_OP_GETATTR 0xFF000004 -#define PVFS2_VFS_OP_REMOVE 0xFF000005 -#define PVFS2_VFS_OP_MKDIR 0xFF000006 -#define PVFS2_VFS_OP_READDIR 0xFF000007 -#define PVFS2_VFS_OP_SETATTR 0xFF000008 -#define PVFS2_VFS_OP_SYMLINK 0xFF000009 -#define PVFS2_VFS_OP_RENAME 0xFF00000A -#define PVFS2_VFS_OP_STATFS 0xFF00000B -#define PVFS2_VFS_OP_TRUNCATE 0xFF00000C -#define PVFS2_VFS_OP_MMAP_RA_FLUSH 0xFF00000D -#define PVFS2_VFS_OP_FS_MOUNT 0xFF00000E -#define PVFS2_VFS_OP_FS_UMOUNT 0xFF00000F -#define PVFS2_VFS_OP_GETXATTR 0xFF000010 -#define PVFS2_VFS_OP_SETXATTR 0xFF000011 -#define PVFS2_VFS_OP_LISTXATTR 0xFF000012 -#define PVFS2_VFS_OP_REMOVEXATTR 0xFF000013 -#define PVFS2_VFS_OP_PARAM 0xFF000014 -#define PVFS2_VFS_OP_PERF_COUNT 0xFF000015 -#define PVFS2_VFS_OP_CANCEL 0xFF00EE00 -#define PVFS2_VFS_OP_FSYNC 0xFF00EE01 -#define PVFS2_VFS_OP_FSKEY 0xFF00EE02 -#define PVFS2_VFS_OP_READDIRPLUS 0xFF00EE03 +#define ORANGEFS_VFS_OP_INVALID 0xFF000000 +#define ORANGEFS_VFS_OP_FILE_IO 0xFF000001 +#define ORANGEFS_VFS_OP_LOOKUP 0xFF000002 +#define ORANGEFS_VFS_OP_CREATE 0xFF000003 +#define ORANGEFS_VFS_OP_GETATTR 0xFF000004 +#define ORANGEFS_VFS_OP_REMOVE 0xFF000005 +#define ORANGEFS_VFS_OP_MKDIR 0xFF000006 +#define ORANGEFS_VFS_OP_READDIR 0xFF000007 +#define ORANGEFS_VFS_OP_SETATTR 0xFF000008 +#define ORANGEFS_VFS_OP_SYMLINK 0xFF000009 +#define ORANGEFS_VFS_OP_RENAME 0xFF00000A +#define ORANGEFS_VFS_OP_STATFS 0xFF00000B +#define ORANGEFS_VFS_OP_TRUNCATE 0xFF00000C +#define ORANGEFS_VFS_OP_MMAP_RA_FLUSH 0xFF00000D +#define ORANGEFS_VFS_OP_FS_MOUNT 0xFF00000E +#define ORANGEFS_VFS_OP_FS_UMOUNT 0xFF00000F +#define ORANGEFS_VFS_OP_GETXATTR 0xFF000010 +#define ORANGEFS_VFS_OP_SETXATTR 0xFF000011 +#define ORANGEFS_VFS_OP_LISTXATTR 0xFF000012 +#define ORANGEFS_VFS_OP_REMOVEXATTR 0xFF000013 +#define ORANGEFS_VFS_OP_PARAM 0xFF000014 +#define ORANGEFS_VFS_OP_PERF_COUNT 0xFF000015 +#define ORANGEFS_VFS_OP_CANCEL 0xFF00EE00 +#define ORANGEFS_VFS_OP_FSYNC 0xFF00EE01 +#define ORANGEFS_VFS_OP_FSKEY 0xFF00EE02 +#define ORANGEFS_VFS_OP_READDIRPLUS 0xFF00EE03 /* * Misc constants. Please retain them as multiples of 8! * Otherwise 32-64 bit interactions will be messed up :) */ -#define PVFS2_NAME_LEN 0x00000100 -#define PVFS2_MAX_DEBUG_STRING_LEN 0x00000400 -#define PVFS2_MAX_DEBUG_ARRAY_LEN 0x00000800 +#define ORANGEFS_NAME_LEN 0x00000100 +#define ORANGEFS_MAX_DEBUG_STRING_LEN 0x00000400 +#define ORANGEFS_MAX_DEBUG_ARRAY_LEN 0x00000800 /* - * MAX_DIRENT_COUNT cannot be larger than PVFS_REQ_LIMIT_LISTATTR. - * The value of PVFS_REQ_LIMIT_LISTATTR has been changed from 113 to 60 + * MAX_DIRENT_COUNT cannot be larger than ORANGEFS_REQ_LIMIT_LISTATTR. + * The value of ORANGEFS_REQ_LIMIT_LISTATTR has been changed from 113 to 60 * to accomodate an attribute object with mirrored handles. * MAX_DIRENT_COUNT is replaced by MAX_DIRENT_COUNT_READDIR and * MAX_DIRENT_COUNT_READDIRPLUS, since readdir doesn't trigger a listattr diff --git a/fs/orangefs/pvfs2-kernel.h b/fs/orangefs/pvfs2-kernel.h index 4295e263e25b..33fcf3bccd2e 100644 --- a/fs/orangefs/pvfs2-kernel.h +++ b/fs/orangefs/pvfs2-kernel.h @@ -5,18 +5,18 @@ */ /* - * The PVFS2 Linux kernel support allows PVFS2 volumes to be mounted and + * The ORANGEFS Linux kernel support allows ORANGEFS volumes to be mounted and * accessed through the Linux VFS (i.e. using standard I/O system calls). * This support is only needed on clients that wish to mount the file system. * */ /* - * Declarations and macros for the PVFS2 Linux kernel support. + * Declarations and macros for the ORANGEFS Linux kernel support. */ -#ifndef __PVFS2KERNEL_H -#define __PVFS2KERNEL_H +#ifndef __ORANGEFSKERNEL_H +#define __ORANGEFSKERNEL_H #include #include @@ -55,30 +55,30 @@ #include "pvfs2-dev-proto.h" -#ifdef PVFS2_KERNEL_DEBUG -#define PVFS2_DEFAULT_OP_TIMEOUT_SECS 10 +#ifdef ORANGEFS_KERNEL_DEBUG +#define ORANGEFS_DEFAULT_OP_TIMEOUT_SECS 10 #else -#define PVFS2_DEFAULT_OP_TIMEOUT_SECS 20 +#define ORANGEFS_DEFAULT_OP_TIMEOUT_SECS 20 #endif -#define PVFS2_BUFMAP_WAIT_TIMEOUT_SECS 30 +#define ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS 30 -#define PVFS2_DEFAULT_SLOT_TIMEOUT_SECS 900 /* 15 minutes */ +#define ORANGEFS_DEFAULT_SLOT_TIMEOUT_SECS 900 /* 15 minutes */ -#define PVFS2_REQDEVICE_NAME "pvfs2-req" +#define ORANGEFS_REQDEVICE_NAME "pvfs2-req" -#define PVFS2_DEVREQ_MAGIC 0x20030529 -#define PVFS2_LINK_MAX 0x000000FF -#define PVFS2_PURGE_RETRY_COUNT 0x00000005 -#define PVFS2_SEEK_END 0x00000002 -#define PVFS2_MAX_NUM_OPTIONS 0x00000004 -#define PVFS2_MAX_MOUNT_OPT_LEN 0x00000080 -#define PVFS2_MAX_FSKEY_LEN 64 +#define ORANGEFS_DEVREQ_MAGIC 0x20030529 +#define ORANGEFS_LINK_MAX 0x000000FF +#define ORANGEFS_PURGE_RETRY_COUNT 0x00000005 +#define ORANGEFS_SEEK_END 0x00000002 +#define ORANGEFS_MAX_NUM_OPTIONS 0x00000004 +#define ORANGEFS_MAX_MOUNT_OPT_LEN 0x00000080 +#define ORANGEFS_MAX_FSKEY_LEN 64 #define MAX_DEV_REQ_UPSIZE (2*sizeof(__s32) + \ -sizeof(__u64) + sizeof(struct pvfs2_upcall_s)) +sizeof(__u64) + sizeof(struct orangefs_upcall_s)) #define MAX_DEV_REQ_DOWNSIZE (2*sizeof(__s32) + \ -sizeof(__u64) + sizeof(struct pvfs2_downcall_s)) +sizeof(__u64) + sizeof(struct orangefs_downcall_s)) #define BITS_PER_LONG_DIV_8 (BITS_PER_LONG >> 3) @@ -104,7 +104,7 @@ sizeof(__u64) + sizeof(struct pvfs2_downcall_s)) MAX_DEV_REQ_DOWNSIZE)) /* - * valid pvfs2 kernel operation states + * valid orangefs kernel operation states * * unknown - op was just initialized * waiting - op is on request_list (upward bound) @@ -113,7 +113,7 @@ sizeof(__u64) + sizeof(struct pvfs2_downcall_s)) * purged - op has to start a timer since client-core * exited uncleanly before servicing op */ -enum pvfs2_vfs_op_states { +enum orangefs_vfs_op_states { OP_VFS_STATE_UNKNOWN = 0, OP_VFS_STATE_WAITING = 1, OP_VFS_STATE_INPROGR = 2, @@ -156,9 +156,9 @@ enum pvfs2_vfs_op_states { /* * Defines for controlling whether I/O upcalls are for async or sync operations */ -enum PVFS_async_io_type { - PVFS_VFS_SYNC_IO = 0, - PVFS_VFS_ASYNC_IO = 1, +enum ORANGEFS_async_io_type { + ORANGEFS_VFS_SYNC_IO = 0, + ORANGEFS_VFS_ASYNC_IO = 1, }; /* @@ -172,24 +172,24 @@ struct client_debug_mask { }; /* - * pvfs2 kernel memory related flags + * orangefs kernel memory related flags */ -#if ((defined PVFS2_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB)) -#define PVFS2_CACHE_CREATE_FLAGS SLAB_RED_ZONE +#if ((defined ORANGEFS_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB)) +#define ORANGEFS_CACHE_CREATE_FLAGS SLAB_RED_ZONE #else -#define PVFS2_CACHE_CREATE_FLAGS 0 -#endif /* ((defined PVFS2_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB)) */ +#define ORANGEFS_CACHE_CREATE_FLAGS 0 +#endif /* ((defined ORANGEFS_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB)) */ -#define PVFS2_CACHE_ALLOC_FLAGS (GFP_KERNEL) -#define PVFS2_GFP_FLAGS (GFP_KERNEL) -#define PVFS2_BUFMAP_GFP_FLAGS (GFP_KERNEL) +#define ORANGEFS_CACHE_ALLOC_FLAGS (GFP_KERNEL) +#define ORANGEFS_GFP_FLAGS (GFP_KERNEL) +#define ORANGEFS_BUFMAP_GFP_FLAGS (GFP_KERNEL) -/* pvfs2 xattr and acl related defines */ -#define PVFS2_XATTR_INDEX_POSIX_ACL_ACCESS 1 -#define PVFS2_XATTR_INDEX_POSIX_ACL_DEFAULT 2 -#define PVFS2_XATTR_INDEX_TRUSTED 3 -#define PVFS2_XATTR_INDEX_DEFAULT 4 +/* orangefs xattr and acl related defines */ +#define ORANGEFS_XATTR_INDEX_POSIX_ACL_ACCESS 1 +#define ORANGEFS_XATTR_INDEX_POSIX_ACL_DEFAULT 2 +#define ORANGEFS_XATTR_INDEX_TRUSTED 3 +#define ORANGEFS_XATTR_INDEX_DEFAULT 4 #if 0 #ifndef POSIX_ACL_XATTR_ACCESS @@ -200,17 +200,17 @@ struct client_debug_mask { #endif #endif -#define PVFS2_XATTR_NAME_ACL_ACCESS POSIX_ACL_XATTR_ACCESS -#define PVFS2_XATTR_NAME_ACL_DEFAULT POSIX_ACL_XATTR_DEFAULT -#define PVFS2_XATTR_NAME_TRUSTED_PREFIX "trusted." -#define PVFS2_XATTR_NAME_DEFAULT_PREFIX "" +#define ORANGEFS_XATTR_NAME_ACL_ACCESS POSIX_ACL_XATTR_ACCESS +#define ORANGEFS_XATTR_NAME_ACL_DEFAULT POSIX_ACL_XATTR_DEFAULT +#define ORANGEFS_XATTR_NAME_TRUSTED_PREFIX "trusted." +#define ORANGEFS_XATTR_NAME_DEFAULT_PREFIX "" -/* these functions are defined in pvfs2-utils.c */ +/* these functions are defined in orangefs-utils.c */ int orangefs_prepare_cdm_array(char *debug_array_string); int orangefs_prepare_debugfs_help_string(int); -/* defined in pvfs2-debugfs.c */ -int pvfs2_client_debug_init(void); +/* defined in orangefs-debugfs.c */ +int orangefs_client_debug_init(void); void debug_string_to_mask(char *, void *, int); void do_c_mask(int, char *, struct client_debug_mask **); @@ -222,17 +222,17 @@ void do_c_string(void *, int); int check_amalgam_keyword(void *, int); int keyword_is_amalgam(char *); -/*these variables are defined in pvfs2-mod.c */ -extern char kernel_debug_string[PVFS2_MAX_DEBUG_STRING_LEN]; -extern char client_debug_string[PVFS2_MAX_DEBUG_STRING_LEN]; -extern char client_debug_array_string[PVFS2_MAX_DEBUG_STRING_LEN]; +/*these variables are defined in orangefs-mod.c */ +extern char kernel_debug_string[ORANGEFS_MAX_DEBUG_STRING_LEN]; +extern char client_debug_string[ORANGEFS_MAX_DEBUG_STRING_LEN]; +extern char client_debug_array_string[ORANGEFS_MAX_DEBUG_STRING_LEN]; extern unsigned int kernel_mask_set_mod_init; -extern int pvfs2_init_acl(struct inode *inode, struct inode *dir); -extern const struct xattr_handler *pvfs2_xattr_handlers[]; +extern int orangefs_init_acl(struct inode *inode, struct inode *dir); +extern const struct xattr_handler *orangefs_xattr_handlers[]; -extern struct posix_acl *pvfs2_get_acl(struct inode *inode, int type); -extern int pvfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type); +extern struct posix_acl *orangefs_get_acl(struct inode *inode, int type); +extern int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type); /* * Redefine xtvec structure so that we could move helper functions out of @@ -244,10 +244,10 @@ struct xtvec { }; /* - * pvfs2 data structures + * orangefs data structures */ -struct pvfs2_kernel_op_s { - enum pvfs2_vfs_op_states op_state; +struct orangefs_kernel_op_s { + enum orangefs_vfs_op_states op_state; __u64 tag; /* @@ -257,8 +257,8 @@ struct pvfs2_kernel_op_s { */ int uses_shared_memory; - struct pvfs2_upcall_s upcall; - struct pvfs2_downcall_s downcall; + struct orangefs_upcall_s upcall; + struct orangefs_downcall_s downcall; wait_queue_head_t waitq; spinlock_t lock; @@ -268,7 +268,7 @@ struct pvfs2_kernel_op_s { /* VFS aio fields */ - /* used by the async I/O code to stash the pvfs2_kiocb_s structure */ + /* used by the async I/O code to stash the orangefs_kiocb_s structure */ void *priv; /* used again for the async I/O code for deallocation */ @@ -279,14 +279,14 @@ struct pvfs2_kernel_op_s { struct list_head list; }; -/* per inode private pvfs2 info */ -struct pvfs2_inode_s { - struct pvfs2_object_kref refn; - char link_target[PVFS_NAME_MAX]; +/* per inode private orangefs info */ +struct orangefs_inode_s { + struct orangefs_object_kref refn; + char link_target[ORANGEFS_NAME_MAX]; __s64 blksize; /* * Reading/Writing Extended attributes need to acquire the appropriate - * reader/writer semaphore on the pvfs2_inode_s structure. + * reader/writer semaphore on the orangefs_inode_s structure. */ struct rw_semaphore xattr_sem; @@ -299,7 +299,7 @@ struct pvfs2_inode_s { */ unsigned long pinode_flags; - /* All allocated pvfs2_inode_s objects are chained to a list */ + /* All allocated orangefs_inode_s objects are chained to a list */ struct list_head list; }; @@ -324,15 +324,15 @@ struct pvfs2_inode_s { #define SetModeFlag(pinode) set_bit(P_MODE_FLAG, &(pinode)->pinode_flags) #define ModeFlag(pinode) test_bit(P_MODE_FLAG, &(pinode)->pinode_flags) -/* per superblock private pvfs2 info */ -struct pvfs2_sb_info_s { - struct pvfs2_khandle root_khandle; +/* per superblock private orangefs info */ +struct orangefs_sb_info_s { + struct orangefs_khandle root_khandle; __s32 fs_id; int id; int flags; -#define PVFS2_OPT_INTR 0x01 -#define PVFS2_OPT_LOCAL_LOCK 0x02 - char devname[PVFS_MAX_SERVER_ADDR_LEN]; +#define ORANGEFS_OPT_INTR 0x01 +#define ORANGEFS_OPT_LOCAL_LOCK 0x02 + char devname[ORANGEFS_MAX_SERVER_ADDR_LEN]; struct super_block *sb; int mount_pending; struct list_head list; @@ -344,7 +344,7 @@ struct pvfs2_sb_info_s { * or even completion notification so that the VFS client-side daemon * can free up its vfs_request slots. */ -struct pvfs2_kiocb_s { +struct orangefs_kiocb_s { /* the pointer to the task that initiated the AIO */ struct task_struct *tsk; @@ -352,11 +352,11 @@ struct pvfs2_kiocb_s { struct kiocb *kiocb; /* buffer index that was used for the I/O */ - struct pvfs2_bufmap *bufmap; + struct orangefs_bufmap *bufmap; int buffer_index; - /* pvfs2 kernel operation type */ - struct pvfs2_kernel_op_s *op; + /* orangefs kernel operation type */ + struct orangefs_kernel_op_s *op; /* The user space buffers from/to which I/O is being staged */ struct iovec *iov; @@ -377,31 +377,31 @@ struct pvfs2_kiocb_s { int needs_cleanup; }; -struct pvfs2_stats { +struct orangefs_stats { unsigned long cache_hits; unsigned long cache_misses; unsigned long reads; unsigned long writes; }; -extern struct pvfs2_stats g_pvfs2_stats; +extern struct orangefs_stats g_orangefs_stats; /* * NOTE: See Documentation/filesystems/porting for information * on implementing FOO_I and properly accessing fs private data */ -static inline struct pvfs2_inode_s *PVFS2_I(struct inode *inode) +static inline struct orangefs_inode_s *ORANGEFS_I(struct inode *inode) { - return container_of(inode, struct pvfs2_inode_s, vfs_inode); + return container_of(inode, struct orangefs_inode_s, vfs_inode); } -static inline struct pvfs2_sb_info_s *PVFS2_SB(struct super_block *sb) +static inline struct orangefs_sb_info_s *ORANGEFS_SB(struct super_block *sb) { - return (struct pvfs2_sb_info_s *) sb->s_fs_info; + return (struct orangefs_sb_info_s *) sb->s_fs_info; } /* ino_t descends from "unsigned long", 8 bytes, 64 bits. */ -static inline ino_t pvfs2_khandle_to_ino(struct pvfs2_khandle *khandle) +static inline ino_t orangefs_khandle_to_ino(struct orangefs_khandle *khandle) { union { unsigned char u[8]; @@ -420,23 +420,23 @@ static inline ino_t pvfs2_khandle_to_ino(struct pvfs2_khandle *khandle) return ihandle.ino; } -static inline struct pvfs2_khandle *get_khandle_from_ino(struct inode *inode) +static inline struct orangefs_khandle *get_khandle_from_ino(struct inode *inode) { - return &(PVFS2_I(inode)->refn.khandle); + return &(ORANGEFS_I(inode)->refn.khandle); } static inline __s32 get_fsid_from_ino(struct inode *inode) { - return PVFS2_I(inode)->refn.fs_id; + return ORANGEFS_I(inode)->refn.fs_id; } static inline ino_t get_ino_from_khandle(struct inode *inode) { - struct pvfs2_khandle *khandle; + struct orangefs_khandle *khandle; ino_t ino; khandle = get_khandle_from_ino(inode); - ino = pvfs2_khandle_to_ino(khandle); + ino = orangefs_khandle_to_ino(khandle); return ino; } @@ -450,17 +450,17 @@ static inline int is_root_handle(struct inode *inode) gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: root handle: %pU, this handle: %pU:\n", __func__, - &PVFS2_SB(inode->i_sb)->root_khandle, + &ORANGEFS_SB(inode->i_sb)->root_khandle, get_khandle_from_ino(inode)); - if (PVFS_khandle_cmp(&(PVFS2_SB(inode->i_sb)->root_khandle), + if (ORANGEFS_khandle_cmp(&(ORANGEFS_SB(inode->i_sb)->root_khandle), get_khandle_from_ino(inode))) return 0; else return 1; } -static inline int match_handle(struct pvfs2_khandle resp_handle, +static inline int match_handle(struct orangefs_khandle resp_handle, struct inode *inode) { gossip_debug(GOSSIP_DCACHE_DEBUG, @@ -469,57 +469,57 @@ static inline int match_handle(struct pvfs2_khandle resp_handle, &resp_handle, get_khandle_from_ino(inode)); - if (PVFS_khandle_cmp(&resp_handle, get_khandle_from_ino(inode))) + if (ORANGEFS_khandle_cmp(&resp_handle, get_khandle_from_ino(inode))) return 0; else return 1; } /* - * defined in pvfs2-cache.c + * defined in orangefs-cache.c */ int op_cache_initialize(void); int op_cache_finalize(void); -struct pvfs2_kernel_op_s *op_alloc(__s32 type); -char *get_opname_string(struct pvfs2_kernel_op_s *new_op); -void op_release(struct pvfs2_kernel_op_s *op); +struct orangefs_kernel_op_s *op_alloc(__s32 type); +char *get_opname_string(struct orangefs_kernel_op_s *new_op); +void op_release(struct orangefs_kernel_op_s *op); int dev_req_cache_initialize(void); int dev_req_cache_finalize(void); void *dev_req_alloc(void); void dev_req_release(void *); -int pvfs2_inode_cache_initialize(void); -int pvfs2_inode_cache_finalize(void); +int orangefs_inode_cache_initialize(void); +int orangefs_inode_cache_finalize(void); int kiocb_cache_initialize(void); int kiocb_cache_finalize(void); -struct pvfs2_kiocb_s *kiocb_alloc(void); -void kiocb_release(struct pvfs2_kiocb_s *ptr); +struct orangefs_kiocb_s *kiocb_alloc(void); +void kiocb_release(struct orangefs_kiocb_s *ptr); /* - * defined in pvfs2-mod.c + * defined in orangefs-mod.c */ void purge_inprogress_ops(void); /* * defined in waitqueue.c */ -int wait_for_matching_downcall(struct pvfs2_kernel_op_s *op); -int wait_for_cancellation_downcall(struct pvfs2_kernel_op_s *op); -void pvfs2_clean_up_interrupted_operation(struct pvfs2_kernel_op_s *op); +int wait_for_matching_downcall(struct orangefs_kernel_op_s *op); +int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *op); +void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *op); void purge_waiting_ops(void); /* * defined in super.c */ -struct dentry *pvfs2_mount(struct file_system_type *fst, +struct dentry *orangefs_mount(struct file_system_type *fst, int flags, const char *devname, void *data); -void pvfs2_kill_sb(struct super_block *sb); -int pvfs2_remount(struct super_block *sb); +void orangefs_kill_sb(struct super_block *sb); +int orangefs_remount(struct super_block *sb); int fsid_key_table_initialize(void); void fsid_key_table_finalize(void); @@ -527,175 +527,175 @@ void fsid_key_table_finalize(void); /* * defined in inode.c */ -__u32 convert_to_pvfs2_mask(unsigned long lite_mask); -struct inode *pvfs2_new_inode(struct super_block *sb, +__u32 convert_to_orangefs_mask(unsigned long lite_mask); +struct inode *orangefs_new_inode(struct super_block *sb, struct inode *dir, int mode, dev_t dev, - struct pvfs2_object_kref *ref); + struct orangefs_object_kref *ref); -int pvfs2_setattr(struct dentry *dentry, struct iattr *iattr); +int orangefs_setattr(struct dentry *dentry, struct iattr *iattr); -int pvfs2_getattr(struct vfsmount *mnt, +int orangefs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *kstat); /* * defined in xattr.c */ -int pvfs2_setxattr(struct dentry *dentry, +int orangefs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); -ssize_t pvfs2_getxattr(struct dentry *dentry, +ssize_t orangefs_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size); -ssize_t pvfs2_listxattr(struct dentry *dentry, char *buffer, size_t size); +ssize_t orangefs_listxattr(struct dentry *dentry, char *buffer, size_t size); /* * defined in namei.c */ -struct inode *pvfs2_iget(struct super_block *sb, - struct pvfs2_object_kref *ref); +struct inode *orangefs_iget(struct super_block *sb, + struct orangefs_object_kref *ref); -ssize_t pvfs2_inode_read(struct inode *inode, - struct iov_iter *iter, - loff_t *offset, - loff_t readahead_size); +ssize_t orangefs_inode_read(struct inode *inode, + struct iov_iter *iter, + loff_t *offset, + loff_t readahead_size); /* - * defined in devpvfs2-req.c + * defined in devorangefs-req.c */ -int pvfs2_dev_init(void); -void pvfs2_dev_cleanup(void); +int orangefs_dev_init(void); +void orangefs_dev_cleanup(void); int is_daemon_in_service(void); int fs_mount_pending(__s32 fsid); /* - * defined in pvfs2-utils.c + * defined in orangefs-utils.c */ -__s32 fsid_of_op(struct pvfs2_kernel_op_s *op); +__s32 fsid_of_op(struct orangefs_kernel_op_s *op); -int pvfs2_flush_inode(struct inode *inode); +int orangefs_flush_inode(struct inode *inode); -ssize_t pvfs2_inode_getxattr(struct inode *inode, +ssize_t orangefs_inode_getxattr(struct inode *inode, const char *prefix, const char *name, void *buffer, size_t size); -int pvfs2_inode_setxattr(struct inode *inode, +int orangefs_inode_setxattr(struct inode *inode, const char *prefix, const char *name, const void *value, size_t size, int flags); -int pvfs2_inode_getattr(struct inode *inode, __u32 mask); +int orangefs_inode_getattr(struct inode *inode, __u32 mask); -int pvfs2_inode_setattr(struct inode *inode, struct iattr *iattr); +int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr); -void pvfs2_op_initialize(struct pvfs2_kernel_op_s *op); +void orangefs_op_initialize(struct orangefs_kernel_op_s *op); -void pvfs2_make_bad_inode(struct inode *inode); +void orangefs_make_bad_inode(struct inode *inode); void block_signals(sigset_t *); void set_signals(sigset_t *); -int pvfs2_unmount_sb(struct super_block *sb); +int orangefs_unmount_sb(struct super_block *sb); -int pvfs2_cancel_op_in_progress(__u64 tag); +int orangefs_cancel_op_in_progress(__u64 tag); -static inline __u64 pvfs2_convert_time_field(const struct timespec *ts) +static inline __u64 orangefs_convert_time_field(const struct timespec *ts) { return (__u64)ts->tv_sec; } -int pvfs2_normalize_to_errno(__s32 error_code); +int orangefs_normalize_to_errno(__s32 error_code); extern struct mutex devreq_mutex; extern struct mutex request_mutex; extern int debug; extern int op_timeout_secs; extern int slot_timeout_secs; -extern struct list_head pvfs2_superblocks; -extern spinlock_t pvfs2_superblocks_lock; -extern struct list_head pvfs2_request_list; -extern spinlock_t pvfs2_request_list_lock; -extern wait_queue_head_t pvfs2_request_list_waitq; +extern struct list_head orangefs_superblocks; +extern spinlock_t orangefs_superblocks_lock; +extern struct list_head orangefs_request_list; +extern spinlock_t orangefs_request_list_lock; +extern wait_queue_head_t orangefs_request_list_waitq; extern struct list_head *htable_ops_in_progress; extern spinlock_t htable_ops_in_progress_lock; extern int hash_table_size; -extern const struct address_space_operations pvfs2_address_operations; -extern struct backing_dev_info pvfs2_backing_dev_info; -extern struct inode_operations pvfs2_file_inode_operations; -extern const struct file_operations pvfs2_file_operations; -extern struct inode_operations pvfs2_symlink_inode_operations; -extern struct inode_operations pvfs2_dir_inode_operations; -extern const struct file_operations pvfs2_dir_operations; -extern const struct dentry_operations pvfs2_dentry_operations; -extern const struct file_operations pvfs2_devreq_file_operations; +extern const struct address_space_operations orangefs_address_operations; +extern struct backing_dev_info orangefs_backing_dev_info; +extern struct inode_operations orangefs_file_inode_operations; +extern const struct file_operations orangefs_file_operations; +extern struct inode_operations orangefs_symlink_inode_operations; +extern struct inode_operations orangefs_dir_inode_operations; +extern const struct file_operations orangefs_dir_operations; +extern const struct dentry_operations orangefs_dentry_operations; +extern const struct file_operations orangefs_devreq_file_operations; -extern wait_queue_head_t pvfs2_bufmap_init_waitq; +extern wait_queue_head_t orangefs_bufmap_init_waitq; /* * misc convenience macros */ #define add_op_to_request_list(op) \ do { \ - spin_lock(&pvfs2_request_list_lock); \ + spin_lock(&orangefs_request_list_lock); \ spin_lock(&op->lock); \ set_op_state_waiting(op); \ - list_add_tail(&op->list, &pvfs2_request_list); \ - spin_unlock(&pvfs2_request_list_lock); \ + list_add_tail(&op->list, &orangefs_request_list); \ + spin_unlock(&orangefs_request_list_lock); \ spin_unlock(&op->lock); \ - wake_up_interruptible(&pvfs2_request_list_waitq); \ + wake_up_interruptible(&orangefs_request_list_waitq); \ } while (0) #define add_priority_op_to_request_list(op) \ do { \ - spin_lock(&pvfs2_request_list_lock); \ + spin_lock(&orangefs_request_list_lock); \ spin_lock(&op->lock); \ set_op_state_waiting(op); \ \ - list_add(&op->list, &pvfs2_request_list); \ - spin_unlock(&pvfs2_request_list_lock); \ + list_add(&op->list, &orangefs_request_list); \ + spin_unlock(&orangefs_request_list_lock); \ spin_unlock(&op->lock); \ - wake_up_interruptible(&pvfs2_request_list_waitq); \ + wake_up_interruptible(&orangefs_request_list_waitq); \ } while (0) #define remove_op_from_request_list(op) \ do { \ struct list_head *tmp = NULL; \ struct list_head *tmp_safe = NULL; \ - struct pvfs2_kernel_op_s *tmp_op = NULL; \ + struct orangefs_kernel_op_s *tmp_op = NULL; \ \ - spin_lock(&pvfs2_request_list_lock); \ - list_for_each_safe(tmp, tmp_safe, &pvfs2_request_list) { \ + spin_lock(&orangefs_request_list_lock); \ + list_for_each_safe(tmp, tmp_safe, &orangefs_request_list) { \ tmp_op = list_entry(tmp, \ - struct pvfs2_kernel_op_s, \ + struct orangefs_kernel_op_s, \ list); \ if (tmp_op && (tmp_op == op)) { \ list_del(&tmp_op->list); \ break; \ } \ } \ - spin_unlock(&pvfs2_request_list_lock); \ + spin_unlock(&orangefs_request_list_lock); \ } while (0) -#define PVFS2_OP_INTERRUPTIBLE 1 /* service_operation() is interruptible */ -#define PVFS2_OP_PRIORITY 2 /* service_operation() is high priority */ -#define PVFS2_OP_CANCELLATION 4 /* this is a cancellation */ -#define PVFS2_OP_NO_SEMAPHORE 8 /* don't acquire semaphore */ -#define PVFS2_OP_ASYNC 16 /* Queue it, but don't wait */ +#define ORANGEFS_OP_INTERRUPTIBLE 1 /* service_operation() is interruptible */ +#define ORANGEFS_OP_PRIORITY 2 /* service_operation() is high priority */ +#define ORANGEFS_OP_CANCELLATION 4 /* this is a cancellation */ +#define ORANGEFS_OP_NO_SEMAPHORE 8 /* don't acquire semaphore */ +#define ORANGEFS_OP_ASYNC 16 /* Queue it, but don't wait */ -int service_operation(struct pvfs2_kernel_op_s *op, +int service_operation(struct orangefs_kernel_op_s *op, const char *op_name, int flags); @@ -719,7 +719,7 @@ int service_operation(struct pvfs2_kernel_op_s *op, * sent and have handle_error * take care of this situation as well.. * - * if a pvfs2 sysint level error occured and i/o has been completed, + * if a orangefs sysint level error occured and i/o has been completed, * there is no need to cancel the operation, as the user has finished * using the bufmap page and so there is no danger in this case. in * this case, we wake up the device normally so that it may free the @@ -731,77 +731,77 @@ int service_operation(struct pvfs2_kernel_op_s *op, #define handle_io_error() \ do { \ if (!op_state_serviced(new_op)) { \ - pvfs2_cancel_op_in_progress(new_op->tag); \ + orangefs_cancel_op_in_progress(new_op->tag); \ op_release(new_op); \ } else { \ wake_up_daemon_for_return(new_op); \ } \ new_op = NULL; \ - pvfs_bufmap_put(bufmap, buffer_index); \ + orangefs_bufmap_put(bufmap, buffer_index); \ buffer_index = -1; \ } while (0) #define get_interruptible_flag(inode) \ - ((PVFS2_SB(inode->i_sb)->flags & PVFS2_OPT_INTR) ? \ - PVFS2_OP_INTERRUPTIBLE : 0) + ((ORANGEFS_SB(inode->i_sb)->flags & ORANGEFS_OPT_INTR) ? \ + ORANGEFS_OP_INTERRUPTIBLE : 0) -#define add_pvfs2_sb(sb) \ +#define add_orangefs_sb(sb) \ do { \ gossip_debug(GOSSIP_SUPER_DEBUG, \ - "Adding SB %p to pvfs2 superblocks\n", \ - PVFS2_SB(sb)); \ - spin_lock(&pvfs2_superblocks_lock); \ - list_add_tail(&PVFS2_SB(sb)->list, &pvfs2_superblocks); \ - spin_unlock(&pvfs2_superblocks_lock); \ + "Adding SB %p to orangefs superblocks\n", \ + ORANGEFS_SB(sb)); \ + spin_lock(&orangefs_superblocks_lock); \ + list_add_tail(&ORANGEFS_SB(sb)->list, &orangefs_superblocks); \ + spin_unlock(&orangefs_superblocks_lock); \ } while (0) -#define remove_pvfs2_sb(sb) \ +#define remove_orangefs_sb(sb) \ do { \ struct list_head *tmp = NULL; \ struct list_head *tmp_safe = NULL; \ - struct pvfs2_sb_info_s *pvfs2_sb = NULL; \ + struct orangefs_sb_info_s *orangefs_sb = NULL; \ \ - spin_lock(&pvfs2_superblocks_lock); \ - list_for_each_safe(tmp, tmp_safe, &pvfs2_superblocks) { \ - pvfs2_sb = list_entry(tmp, \ - struct pvfs2_sb_info_s, \ + spin_lock(&orangefs_superblocks_lock); \ + list_for_each_safe(tmp, tmp_safe, &orangefs_superblocks) { \ + orangefs_sb = list_entry(tmp, \ + struct orangefs_sb_info_s, \ list); \ - if (pvfs2_sb && (pvfs2_sb->sb == sb)) { \ + if (orangefs_sb && (orangefs_sb->sb == sb)) { \ gossip_debug(GOSSIP_SUPER_DEBUG, \ - "Removing SB %p from pvfs2 superblocks\n", \ - pvfs2_sb); \ - list_del(&pvfs2_sb->list); \ + "Removing SB %p from orangefs superblocks\n", \ + orangefs_sb); \ + list_del(&orangefs_sb->list); \ break; \ } \ } \ - spin_unlock(&pvfs2_superblocks_lock); \ + spin_unlock(&orangefs_superblocks_lock); \ } while (0) -#define pvfs2_lock_inode(inode) spin_lock(&inode->i_lock) -#define pvfs2_unlock_inode(inode) spin_unlock(&inode->i_lock) +#define orangefs_lock_inode(inode) spin_lock(&inode->i_lock) +#define orangefs_unlock_inode(inode) spin_unlock(&inode->i_lock) #define fill_default_sys_attrs(sys_attr, type, mode) \ do { \ sys_attr.owner = from_kuid(current_user_ns(), current_fsuid()); \ sys_attr.group = from_kgid(current_user_ns(), current_fsgid()); \ sys_attr.size = 0; \ - sys_attr.perms = PVFS_util_translate_mode(mode); \ + sys_attr.perms = ORANGEFS_util_translate_mode(mode); \ sys_attr.objtype = type; \ - sys_attr.mask = PVFS_ATTR_SYS_ALL_SETABLE; \ + sys_attr.mask = ORANGEFS_ATTR_SYS_ALL_SETABLE; \ } while (0) -#define pvfs2_inode_lock(__i) mutex_lock(&(__i)->i_mutex) +#define orangefs_inode_lock(__i) mutex_lock(&(__i)->i_mutex) -#define pvfs2_inode_unlock(__i) mutex_unlock(&(__i)->i_mutex) +#define orangefs_inode_unlock(__i) mutex_unlock(&(__i)->i_mutex) -static inline void pvfs2_i_size_write(struct inode *inode, loff_t i_size) +static inline void orangefs_i_size_write(struct inode *inode, loff_t i_size) { #if BITS_PER_LONG == 32 && defined(CONFIG_SMP) - pvfs2_inode_lock(inode); + ornagefs_inode_lock(inode); #endif i_size_write(inode, i_size); #if BITS_PER_LONG == 32 && defined(CONFIG_SMP) - pvfs2_inode_unlock(inode); + orangefs_inode_unlock(inode); #endif } @@ -816,4 +816,4 @@ static inline unsigned int diff(struct timeval *end, struct timeval *begin) return (end->tv_sec * 1000000) + end->tv_usec; } -#endif /* __PVFS2KERNEL_H */ +#endif /* __ORANGEFSKERNEL_H */ diff --git a/fs/orangefs/pvfs2-mod.c b/fs/orangefs/pvfs2-mod.c index d848c90413d1..d8642908a917 100644 --- a/fs/orangefs/pvfs2-mod.c +++ b/fs/orangefs/pvfs2-mod.c @@ -12,9 +12,9 @@ #include "pvfs2-debugfs.h" #include "pvfs2-sysfs.h" -/* PVFS2_VERSION is a ./configure define */ -#ifndef PVFS2_VERSION -#define PVFS2_VERSION "Unknown" +/* ORANGEFS_VERSION is a ./configure define */ +#ifndef ORANGEFS_VERSION +#define ORANGEFS_VERSION "Unknown" #endif /* @@ -25,9 +25,9 @@ struct client_debug_mask *cdm_array; int cdm_element_count; -char kernel_debug_string[PVFS2_MAX_DEBUG_STRING_LEN] = "none"; -char client_debug_string[PVFS2_MAX_DEBUG_STRING_LEN]; -char client_debug_array_string[PVFS2_MAX_DEBUG_STRING_LEN]; +char kernel_debug_string[ORANGEFS_MAX_DEBUG_STRING_LEN] = "none"; +char client_debug_string[ORANGEFS_MAX_DEBUG_STRING_LEN]; +char client_debug_array_string[ORANGEFS_MAX_DEBUG_STRING_LEN]; char *debug_help_string; int help_string_initialized; @@ -36,7 +36,7 @@ struct dentry *client_debug_dentry; struct dentry *debug_dir; int client_verbose_index; int client_all_index; -struct pvfs2_stats g_pvfs2_stats; +struct orangefs_stats g_orangefs_stats; /* the size of the hash tables for ops in progress */ int hash_table_size = 509; @@ -45,22 +45,22 @@ static ulong module_parm_debug_mask; __u64 gossip_debug_mask; struct client_debug_mask client_debug_mask = { NULL, 0, 0 }; unsigned int kernel_mask_set_mod_init; /* implicitly false */ -int op_timeout_secs = PVFS2_DEFAULT_OP_TIMEOUT_SECS; -int slot_timeout_secs = PVFS2_DEFAULT_SLOT_TIMEOUT_SECS; +int op_timeout_secs = ORANGEFS_DEFAULT_OP_TIMEOUT_SECS; +int slot_timeout_secs = ORANGEFS_DEFAULT_SLOT_TIMEOUT_SECS; MODULE_LICENSE("GPL"); -MODULE_AUTHOR("PVFS2 Development Team"); -MODULE_DESCRIPTION("The Linux Kernel VFS interface to PVFS2"); -MODULE_PARM_DESC(module_parm_debug_mask, "debugging level (see pvfs2-debug.h for values)"); +MODULE_AUTHOR("ORANGEFS Development Team"); +MODULE_DESCRIPTION("The Linux Kernel VFS interface to ORANGEFS"); +MODULE_PARM_DESC(module_parm_debug_mask, "debugging level (see orangefs-debug.h for values)"); MODULE_PARM_DESC(op_timeout_secs, "Operation timeout in seconds"); MODULE_PARM_DESC(slot_timeout_secs, "Slot timeout in seconds"); MODULE_PARM_DESC(hash_table_size, "size of hash table for operations in progress"); -static struct file_system_type pvfs2_fs_type = { +static struct file_system_type orangefs_fs_type = { .name = "pvfs2", - .mount = pvfs2_mount, - .kill_sb = pvfs2_kill_sb, + .mount = orangefs_mount, + .kill_sb = orangefs_kill_sb, .owner = THIS_MODULE, }; @@ -85,15 +85,15 @@ struct list_head *htable_ops_in_progress; DEFINE_SPINLOCK(htable_ops_in_progress_lock); /* list for queueing upcall operations */ -LIST_HEAD(pvfs2_request_list); +LIST_HEAD(orangefs_request_list); -/* used to protect the above pvfs2_request_list */ -DEFINE_SPINLOCK(pvfs2_request_list_lock); +/* used to protect the above orangefs_request_list */ +DEFINE_SPINLOCK(orangefs_request_list_lock); /* used for incoming request notification */ -DECLARE_WAIT_QUEUE_HEAD(pvfs2_request_list_waitq); +DECLARE_WAIT_QUEUE_HEAD(orangefs_request_list_waitq); -static int __init pvfs2_init(void) +static int __init orangefs_init(void) { int ret = -1; __u32 i = 0; @@ -112,7 +112,7 @@ static int __init pvfs2_init(void) /* * if the mask has a non-zero value, then indicate that the mask - * was set when the kernel module was loaded. The pvfs2 dev ioctl + * was set when the kernel module was loaded. The orangefs dev ioctl * command will look at this boolean to determine if the kernel's * debug mask should be overwritten when the client-core is started. */ @@ -120,11 +120,11 @@ static int __init pvfs2_init(void) kernel_mask_set_mod_init = true; /* print information message to the system log */ - pr_info("pvfs2: pvfs2_init called with debug mask: :%s: :%llx:\n", + pr_info("orangefs: orangefs_init called with debug mask: :%s: :%llx:\n", kernel_debug_string, (unsigned long long)gossip_debug_mask); - ret = bdi_init(&pvfs2_backing_dev_info); + ret = bdi_init(&orangefs_backing_dev_info); if (ret) return ret; @@ -144,7 +144,7 @@ static int __init pvfs2_init(void) if (ret < 0) goto cleanup_op; - ret = pvfs2_inode_cache_initialize(); + ret = orangefs_inode_cache_initialize(); if (ret < 0) goto cleanup_req; @@ -153,9 +153,9 @@ static int __init pvfs2_init(void) goto cleanup_inode; /* Initialize the pvfsdev subsystem. */ - ret = pvfs2_dev_init(); + ret = orangefs_dev_init(); if (ret < 0) { - gossip_err("pvfs2: could not initialize device subsystem %d!\n", + gossip_err("orangefs: could not initialize device subsystem %d!\n", ret); goto cleanup_kiocb; } @@ -197,17 +197,17 @@ static int __init pvfs2_init(void) if (ret) goto out; - pvfs2_debugfs_init(); - pvfs2_kernel_debug_init(); + orangefs_debugfs_init(); + orangefs_kernel_debug_init(); orangefs_sysfs_init(); - ret = register_filesystem(&pvfs2_fs_type); + ret = register_filesystem(&orangefs_fs_type); if (ret == 0) { - pr_info("pvfs2: module version %s loaded\n", PVFS2_VERSION); + pr_info("orangefs: module version %s loaded\n", ORANGEFS_VERSION); return 0; } - pvfs2_debugfs_cleanup(); + orangefs_debugfs_cleanup(); orangefs_sysfs_exit(); fsid_key_table_finalize(); @@ -215,13 +215,13 @@ cleanup_progress_table: kfree(htable_ops_in_progress); cleanup_device: - pvfs2_dev_cleanup(); + orangefs_dev_cleanup(); cleanup_kiocb: kiocb_cache_finalize(); cleanup_inode: - pvfs2_inode_cache_finalize(); + orangefs_inode_cache_finalize(); cleanup_req: dev_req_cache_finalize(); @@ -230,29 +230,29 @@ cleanup_op: op_cache_finalize(); err: - bdi_destroy(&pvfs2_backing_dev_info); + bdi_destroy(&orangefs_backing_dev_info); out: return ret; } -static void __exit pvfs2_exit(void) +static void __exit orangefs_exit(void) { int i = 0; - struct pvfs2_kernel_op_s *cur_op = NULL; + struct orangefs_kernel_op_s *cur_op = NULL; - gossip_debug(GOSSIP_INIT_DEBUG, "pvfs2: pvfs2_exit called\n"); + gossip_debug(GOSSIP_INIT_DEBUG, "orangefs: orangefs_exit called\n"); - unregister_filesystem(&pvfs2_fs_type); - pvfs2_debugfs_cleanup(); + unregister_filesystem(&orangefs_fs_type); + orangefs_debugfs_cleanup(); orangefs_sysfs_exit(); fsid_key_table_finalize(); - pvfs2_dev_cleanup(); + orangefs_dev_cleanup(); /* clear out all pending upcall op requests */ - spin_lock(&pvfs2_request_list_lock); - while (!list_empty(&pvfs2_request_list)) { - cur_op = list_entry(pvfs2_request_list.next, - struct pvfs2_kernel_op_s, + spin_lock(&orangefs_request_list_lock); + while (!list_empty(&orangefs_request_list)) { + cur_op = list_entry(orangefs_request_list.next, + struct orangefs_kernel_op_s, list); list_del(&cur_op->list); gossip_debug(GOSSIP_INIT_DEBUG, @@ -260,26 +260,26 @@ static void __exit pvfs2_exit(void) cur_op->upcall.type); op_release(cur_op); } - spin_unlock(&pvfs2_request_list_lock); + spin_unlock(&orangefs_request_list_lock); for (i = 0; i < hash_table_size; i++) while (!list_empty(&htable_ops_in_progress[i])) { cur_op = list_entry(htable_ops_in_progress[i].next, - struct pvfs2_kernel_op_s, + struct orangefs_kernel_op_s, list); op_release(cur_op); } kiocb_cache_finalize(); - pvfs2_inode_cache_finalize(); + orangefs_inode_cache_finalize(); dev_req_cache_finalize(); op_cache_finalize(); kfree(htable_ops_in_progress); - bdi_destroy(&pvfs2_backing_dev_info); + bdi_destroy(&orangefs_backing_dev_info); - pr_info("pvfs2: module version %s unloaded\n", PVFS2_VERSION); + pr_info("orangefs: module version %s unloaded\n", ORANGEFS_VERSION); } /* @@ -291,8 +291,8 @@ void purge_inprogress_ops(void) int i; for (i = 0; i < hash_table_size; i++) { - struct pvfs2_kernel_op_s *op; - struct pvfs2_kernel_op_s *next; + struct orangefs_kernel_op_s *op; + struct orangefs_kernel_op_s *next; list_for_each_entry_safe(op, next, @@ -311,5 +311,5 @@ void purge_inprogress_ops(void) } } -module_init(pvfs2_init); -module_exit(pvfs2_exit); +module_init(orangefs_init); +module_exit(orangefs_exit); diff --git a/fs/orangefs/pvfs2-sysfs.c b/fs/orangefs/pvfs2-sysfs.c index ea635b5e431b..f04de2593c79 100644 --- a/fs/orangefs/pvfs2-sysfs.c +++ b/fs/orangefs/pvfs2-sysfs.c @@ -669,13 +669,13 @@ static ssize_t sysfs_int_show(char *kobj_id, char *buf, void *attr) rc = scnprintf(buf, PAGE_SIZE, "%lu\n", - g_pvfs2_stats.reads); + g_orangefs_stats.reads); goto out; } else if (!strcmp(stats_orangefs_attr->attr.name, "writes")) { rc = scnprintf(buf, PAGE_SIZE, "%lu\n", - g_pvfs2_stats.writes); + g_orangefs_stats.writes); goto out; } else { goto out; @@ -752,7 +752,7 @@ out: */ static int sysfs_service_op_show(char *kobj_id, char *buf, void *attr) { - struct pvfs2_kernel_op_s *new_op = NULL; + struct orangefs_kernel_op_s *new_op = NULL; int rc = 0; char *ser_op_type = NULL; struct orangefs_attribute *orangefs_attr; @@ -768,9 +768,9 @@ static int sysfs_service_op_show(char *kobj_id, char *buf, void *attr) kobj_id); if (strcmp(kobj_id, PC_KOBJ_ID)) - op_alloc_type = PVFS2_VFS_OP_PARAM; + op_alloc_type = ORANGEFS_VFS_OP_PARAM; else - op_alloc_type = PVFS2_VFS_OP_PERF_COUNT; + op_alloc_type = ORANGEFS_VFS_OP_PERF_COUNT; new_op = op_alloc(op_alloc_type); if (!new_op) { @@ -788,113 +788,113 @@ static int sysfs_service_op_show(char *kobj_id, char *buf, void *attr) } if (strcmp(kobj_id, PC_KOBJ_ID)) - new_op->upcall.req.param.type = PVFS2_PARAM_REQUEST_GET; + new_op->upcall.req.param.type = ORANGEFS_PARAM_REQUEST_GET; if (!strcmp(kobj_id, ORANGEFS_KOBJ_ID)) { orangefs_attr = (struct orangefs_attribute *)attr; if (!strcmp(orangefs_attr->attr.name, "perf_history_size")) new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_PERF_HISTORY_SIZE; + ORANGEFS_PARAM_REQUEST_OP_PERF_HISTORY_SIZE; else if (!strcmp(orangefs_attr->attr.name, "perf_time_interval_secs")) new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_PERF_TIME_INTERVAL_SECS; + ORANGEFS_PARAM_REQUEST_OP_PERF_TIME_INTERVAL_SECS; else if (!strcmp(orangefs_attr->attr.name, "perf_counter_reset")) new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_PERF_RESET; + ORANGEFS_PARAM_REQUEST_OP_PERF_RESET; } else if (!strcmp(kobj_id, ACACHE_KOBJ_ID)) { acache_attr = (struct acache_orangefs_attribute *)attr; if (!strcmp(acache_attr->attr.name, "timeout_msecs")) new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_ACACHE_TIMEOUT_MSECS; + ORANGEFS_PARAM_REQUEST_OP_ACACHE_TIMEOUT_MSECS; if (!strcmp(acache_attr->attr.name, "hard_limit")) new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_ACACHE_HARD_LIMIT; + ORANGEFS_PARAM_REQUEST_OP_ACACHE_HARD_LIMIT; if (!strcmp(acache_attr->attr.name, "soft_limit")) new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_ACACHE_SOFT_LIMIT; + ORANGEFS_PARAM_REQUEST_OP_ACACHE_SOFT_LIMIT; if (!strcmp(acache_attr->attr.name, "reclaim_percentage")) new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_ACACHE_RECLAIM_PERCENTAGE; + ORANGEFS_PARAM_REQUEST_OP_ACACHE_RECLAIM_PERCENTAGE; } else if (!strcmp(kobj_id, CAPCACHE_KOBJ_ID)) { capcache_attr = (struct capcache_orangefs_attribute *)attr; if (!strcmp(capcache_attr->attr.name, "timeout_secs")) new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_CAPCACHE_TIMEOUT_SECS; + ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_TIMEOUT_SECS; if (!strcmp(capcache_attr->attr.name, "hard_limit")) new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_CAPCACHE_HARD_LIMIT; + ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_HARD_LIMIT; if (!strcmp(capcache_attr->attr.name, "soft_limit")) new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_CAPCACHE_SOFT_LIMIT; + ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_SOFT_LIMIT; if (!strcmp(capcache_attr->attr.name, "reclaim_percentage")) new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_CAPCACHE_RECLAIM_PERCENTAGE; + ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_RECLAIM_PERCENTAGE; } else if (!strcmp(kobj_id, CCACHE_KOBJ_ID)) { ccache_attr = (struct ccache_orangefs_attribute *)attr; if (!strcmp(ccache_attr->attr.name, "timeout_secs")) new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_CCACHE_TIMEOUT_SECS; + ORANGEFS_PARAM_REQUEST_OP_CCACHE_TIMEOUT_SECS; if (!strcmp(ccache_attr->attr.name, "hard_limit")) new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_CCACHE_HARD_LIMIT; + ORANGEFS_PARAM_REQUEST_OP_CCACHE_HARD_LIMIT; if (!strcmp(ccache_attr->attr.name, "soft_limit")) new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_CCACHE_SOFT_LIMIT; + ORANGEFS_PARAM_REQUEST_OP_CCACHE_SOFT_LIMIT; if (!strcmp(ccache_attr->attr.name, "reclaim_percentage")) new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_CCACHE_RECLAIM_PERCENTAGE; + ORANGEFS_PARAM_REQUEST_OP_CCACHE_RECLAIM_PERCENTAGE; } else if (!strcmp(kobj_id, NCACHE_KOBJ_ID)) { ncache_attr = (struct ncache_orangefs_attribute *)attr; if (!strcmp(ncache_attr->attr.name, "timeout_msecs")) new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_NCACHE_TIMEOUT_MSECS; + ORANGEFS_PARAM_REQUEST_OP_NCACHE_TIMEOUT_MSECS; if (!strcmp(ncache_attr->attr.name, "hard_limit")) new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_NCACHE_HARD_LIMIT; + ORANGEFS_PARAM_REQUEST_OP_NCACHE_HARD_LIMIT; if (!strcmp(ncache_attr->attr.name, "soft_limit")) new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_NCACHE_SOFT_LIMIT; + ORANGEFS_PARAM_REQUEST_OP_NCACHE_SOFT_LIMIT; if (!strcmp(ncache_attr->attr.name, "reclaim_percentage")) new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_NCACHE_RECLAIM_PERCENTAGE; + ORANGEFS_PARAM_REQUEST_OP_NCACHE_RECLAIM_PERCENTAGE; } else if (!strcmp(kobj_id, PC_KOBJ_ID)) { pc_attr = (struct pc_orangefs_attribute *)attr; if (!strcmp(pc_attr->attr.name, ACACHE_KOBJ_ID)) new_op->upcall.req.perf_count.type = - PVFS2_PERF_COUNT_REQUEST_ACACHE; + ORANGEFS_PERF_COUNT_REQUEST_ACACHE; if (!strcmp(pc_attr->attr.name, CAPCACHE_KOBJ_ID)) new_op->upcall.req.perf_count.type = - PVFS2_PERF_COUNT_REQUEST_CAPCACHE; + ORANGEFS_PERF_COUNT_REQUEST_CAPCACHE; if (!strcmp(pc_attr->attr.name, NCACHE_KOBJ_ID)) new_op->upcall.req.perf_count.type = - PVFS2_PERF_COUNT_REQUEST_NCACHE; + ORANGEFS_PERF_COUNT_REQUEST_NCACHE; } else { gossip_err("sysfs_service_op_show: unknown kobj_id:%s:\n", @@ -905,15 +905,15 @@ static int sysfs_service_op_show(char *kobj_id, char *buf, void *attr) if (strcmp(kobj_id, PC_KOBJ_ID)) - ser_op_type = "pvfs2_param"; + ser_op_type = "orangefs_param"; else - ser_op_type = "pvfs2_perf_count"; + ser_op_type = "orangefs_perf_count"; /* * The service_operation will return an errno return code on * error, and zero on success. */ - rc = service_operation(new_op, ser_op_type, PVFS2_OP_INTERRUPTIBLE); + rc = service_operation(new_op, ser_op_type, ORANGEFS_OP_INTERRUPTIBLE); out: if (!rc) { @@ -1025,7 +1025,7 @@ static ssize_t */ static int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr) { - struct pvfs2_kernel_op_s *new_op = NULL; + struct orangefs_kernel_op_s *new_op = NULL; int val = 0; int rc = 0; struct orangefs_attribute *orangefs_attr; @@ -1038,7 +1038,7 @@ static int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr) "sysfs_service_op_store: id:%s:\n", kobj_id); - new_op = op_alloc(PVFS2_VFS_OP_PARAM); + new_op = op_alloc(ORANGEFS_VFS_OP_PARAM); if (!new_op) { rc = -ENOMEM; goto out; @@ -1066,7 +1066,7 @@ static int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr) if (!strcmp(orangefs_attr->attr.name, "perf_history_size")) { if (val > 0) { new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_PERF_HISTORY_SIZE; + ORANGEFS_PARAM_REQUEST_OP_PERF_HISTORY_SIZE; } else { rc = 0; goto out; @@ -1075,7 +1075,7 @@ static int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr) "perf_time_interval_secs")) { if (val > 0) { new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_PERF_TIME_INTERVAL_SECS; + ORANGEFS_PARAM_REQUEST_OP_PERF_TIME_INTERVAL_SECS; } else { rc = 0; goto out; @@ -1084,7 +1084,7 @@ static int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr) "perf_counter_reset")) { if ((val == 0) || (val == 1)) { new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_PERF_RESET; + ORANGEFS_PARAM_REQUEST_OP_PERF_RESET; } else { rc = 0; goto out; @@ -1097,7 +1097,7 @@ static int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr) if (!strcmp(acache_attr->attr.name, "hard_limit")) { if (val > -1) { new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_ACACHE_HARD_LIMIT; + ORANGEFS_PARAM_REQUEST_OP_ACACHE_HARD_LIMIT; } else { rc = 0; goto out; @@ -1105,7 +1105,7 @@ static int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr) } else if (!strcmp(acache_attr->attr.name, "soft_limit")) { if (val > -1) { new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_ACACHE_SOFT_LIMIT; + ORANGEFS_PARAM_REQUEST_OP_ACACHE_SOFT_LIMIT; } else { rc = 0; goto out; @@ -1114,7 +1114,7 @@ static int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr) "reclaim_percentage")) { if ((val > -1) && (val < 101)) { new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_ACACHE_RECLAIM_PERCENTAGE; + ORANGEFS_PARAM_REQUEST_OP_ACACHE_RECLAIM_PERCENTAGE; } else { rc = 0; goto out; @@ -1122,7 +1122,7 @@ static int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr) } else if (!strcmp(acache_attr->attr.name, "timeout_msecs")) { if (val > -1) { new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_ACACHE_TIMEOUT_MSECS; + ORANGEFS_PARAM_REQUEST_OP_ACACHE_TIMEOUT_MSECS; } else { rc = 0; goto out; @@ -1135,7 +1135,7 @@ static int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr) if (!strcmp(capcache_attr->attr.name, "hard_limit")) { if (val > -1) { new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_CAPCACHE_HARD_LIMIT; + ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_HARD_LIMIT; } else { rc = 0; goto out; @@ -1143,7 +1143,7 @@ static int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr) } else if (!strcmp(capcache_attr->attr.name, "soft_limit")) { if (val > -1) { new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_CAPCACHE_SOFT_LIMIT; + ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_SOFT_LIMIT; } else { rc = 0; goto out; @@ -1152,7 +1152,7 @@ static int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr) "reclaim_percentage")) { if ((val > -1) && (val < 101)) { new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_CAPCACHE_RECLAIM_PERCENTAGE; + ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_RECLAIM_PERCENTAGE; } else { rc = 0; goto out; @@ -1160,7 +1160,7 @@ static int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr) } else if (!strcmp(capcache_attr->attr.name, "timeout_secs")) { if (val > -1) { new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_CAPCACHE_TIMEOUT_SECS; + ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_TIMEOUT_SECS; } else { rc = 0; goto out; @@ -1173,7 +1173,7 @@ static int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr) if (!strcmp(ccache_attr->attr.name, "hard_limit")) { if (val > -1) { new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_CCACHE_HARD_LIMIT; + ORANGEFS_PARAM_REQUEST_OP_CCACHE_HARD_LIMIT; } else { rc = 0; goto out; @@ -1181,7 +1181,7 @@ static int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr) } else if (!strcmp(ccache_attr->attr.name, "soft_limit")) { if (val > -1) { new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_CCACHE_SOFT_LIMIT; + ORANGEFS_PARAM_REQUEST_OP_CCACHE_SOFT_LIMIT; } else { rc = 0; goto out; @@ -1190,7 +1190,7 @@ static int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr) "reclaim_percentage")) { if ((val > -1) && (val < 101)) { new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_CCACHE_RECLAIM_PERCENTAGE; + ORANGEFS_PARAM_REQUEST_OP_CCACHE_RECLAIM_PERCENTAGE; } else { rc = 0; goto out; @@ -1198,7 +1198,7 @@ static int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr) } else if (!strcmp(ccache_attr->attr.name, "timeout_secs")) { if (val > -1) { new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_CCACHE_TIMEOUT_SECS; + ORANGEFS_PARAM_REQUEST_OP_CCACHE_TIMEOUT_SECS; } else { rc = 0; goto out; @@ -1211,7 +1211,7 @@ static int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr) if (!strcmp(ncache_attr->attr.name, "hard_limit")) { if (val > -1) { new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_NCACHE_HARD_LIMIT; + ORANGEFS_PARAM_REQUEST_OP_NCACHE_HARD_LIMIT; } else { rc = 0; goto out; @@ -1219,7 +1219,7 @@ static int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr) } else if (!strcmp(ncache_attr->attr.name, "soft_limit")) { if (val > -1) { new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_NCACHE_SOFT_LIMIT; + ORANGEFS_PARAM_REQUEST_OP_NCACHE_SOFT_LIMIT; } else { rc = 0; goto out; @@ -1228,7 +1228,7 @@ static int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr) "reclaim_percentage")) { if ((val > -1) && (val < 101)) { new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_NCACHE_RECLAIM_PERCENTAGE; + ORANGEFS_PARAM_REQUEST_OP_NCACHE_RECLAIM_PERCENTAGE; } else { rc = 0; goto out; @@ -1236,7 +1236,7 @@ static int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr) } else if (!strcmp(ncache_attr->attr.name, "timeout_msecs")) { if (val > -1) { new_op->upcall.req.param.op = - PVFS2_PARAM_REQUEST_OP_NCACHE_TIMEOUT_MSECS; + ORANGEFS_PARAM_REQUEST_OP_NCACHE_TIMEOUT_MSECS; } else { rc = 0; goto out; @@ -1250,7 +1250,7 @@ static int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr) goto out; } - new_op->upcall.req.param.type = PVFS2_PARAM_REQUEST_SET; + new_op->upcall.req.param.type = ORANGEFS_PARAM_REQUEST_SET; new_op->upcall.req.param.value = val; @@ -1258,7 +1258,7 @@ static int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr) * The service_operation will return a errno return code on * error, and zero on success. */ - rc = service_operation(new_op, "pvfs2_param", PVFS2_OP_INTERRUPTIBLE); + rc = service_operation(new_op, "orangefs_param", ORANGEFS_OP_INTERRUPTIBLE); if (rc < 0) { gossip_err("sysfs_service_op_store: service op returned:%d:\n", diff --git a/fs/orangefs/pvfs2-utils.c b/fs/orangefs/pvfs2-utils.c index 1180a2480d2b..d132c5f712a4 100644 --- a/fs/orangefs/pvfs2-utils.c +++ b/fs/orangefs/pvfs2-utils.c @@ -8,67 +8,67 @@ #include "pvfs2-dev-proto.h" #include "pvfs2-bufmap.h" -__s32 fsid_of_op(struct pvfs2_kernel_op_s *op) +__s32 fsid_of_op(struct orangefs_kernel_op_s *op) { - __s32 fsid = PVFS_FS_ID_NULL; + __s32 fsid = ORANGEFS_FS_ID_NULL; if (op) { switch (op->upcall.type) { - case PVFS2_VFS_OP_FILE_IO: + case ORANGEFS_VFS_OP_FILE_IO: fsid = op->upcall.req.io.refn.fs_id; break; - case PVFS2_VFS_OP_LOOKUP: + case ORANGEFS_VFS_OP_LOOKUP: fsid = op->upcall.req.lookup.parent_refn.fs_id; break; - case PVFS2_VFS_OP_CREATE: + case ORANGEFS_VFS_OP_CREATE: fsid = op->upcall.req.create.parent_refn.fs_id; break; - case PVFS2_VFS_OP_GETATTR: + case ORANGEFS_VFS_OP_GETATTR: fsid = op->upcall.req.getattr.refn.fs_id; break; - case PVFS2_VFS_OP_REMOVE: + case ORANGEFS_VFS_OP_REMOVE: fsid = op->upcall.req.remove.parent_refn.fs_id; break; - case PVFS2_VFS_OP_MKDIR: + case ORANGEFS_VFS_OP_MKDIR: fsid = op->upcall.req.mkdir.parent_refn.fs_id; break; - case PVFS2_VFS_OP_READDIR: + case ORANGEFS_VFS_OP_READDIR: fsid = op->upcall.req.readdir.refn.fs_id; break; - case PVFS2_VFS_OP_SETATTR: + case ORANGEFS_VFS_OP_SETATTR: fsid = op->upcall.req.setattr.refn.fs_id; break; - case PVFS2_VFS_OP_SYMLINK: + case ORANGEFS_VFS_OP_SYMLINK: fsid = op->upcall.req.sym.parent_refn.fs_id; break; - case PVFS2_VFS_OP_RENAME: + case ORANGEFS_VFS_OP_RENAME: fsid = op->upcall.req.rename.old_parent_refn.fs_id; break; - case PVFS2_VFS_OP_STATFS: + case ORANGEFS_VFS_OP_STATFS: fsid = op->upcall.req.statfs.fs_id; break; - case PVFS2_VFS_OP_TRUNCATE: + case ORANGEFS_VFS_OP_TRUNCATE: fsid = op->upcall.req.truncate.refn.fs_id; break; - case PVFS2_VFS_OP_MMAP_RA_FLUSH: + case ORANGEFS_VFS_OP_MMAP_RA_FLUSH: fsid = op->upcall.req.ra_cache_flush.refn.fs_id; break; - case PVFS2_VFS_OP_FS_UMOUNT: + case ORANGEFS_VFS_OP_FS_UMOUNT: fsid = op->upcall.req.fs_umount.fs_id; break; - case PVFS2_VFS_OP_GETXATTR: + case ORANGEFS_VFS_OP_GETXATTR: fsid = op->upcall.req.getxattr.refn.fs_id; break; - case PVFS2_VFS_OP_SETXATTR: + case ORANGEFS_VFS_OP_SETXATTR: fsid = op->upcall.req.setxattr.refn.fs_id; break; - case PVFS2_VFS_OP_LISTXATTR: + case ORANGEFS_VFS_OP_LISTXATTR: fsid = op->upcall.req.listxattr.refn.fs_id; break; - case PVFS2_VFS_OP_REMOVEXATTR: + case ORANGEFS_VFS_OP_REMOVEXATTR: fsid = op->upcall.req.removexattr.refn.fs_id; break; - case PVFS2_VFS_OP_FSYNC: + case ORANGEFS_VFS_OP_FSYNC: fsid = op->upcall.req.fsync.refn.fs_id; break; default: @@ -78,20 +78,20 @@ __s32 fsid_of_op(struct pvfs2_kernel_op_s *op) return fsid; } -static void pvfs2_set_inode_flags(struct inode *inode, - struct PVFS_sys_attr_s *attrs) +static void orangefs_set_inode_flags(struct inode *inode, + struct ORANGEFS_sys_attr_s *attrs) { - if (attrs->flags & PVFS_IMMUTABLE_FL) + if (attrs->flags & ORANGEFS_IMMUTABLE_FL) inode->i_flags |= S_IMMUTABLE; else inode->i_flags &= ~S_IMMUTABLE; - if (attrs->flags & PVFS_APPEND_FL) + if (attrs->flags & ORANGEFS_APPEND_FL) inode->i_flags |= S_APPEND; else inode->i_flags &= ~S_APPEND; - if (attrs->flags & PVFS_NOATIME_FL) + if (attrs->flags & ORANGEFS_NOATIME_FL) inode->i_flags |= S_NOATIME; else inode->i_flags &= ~S_NOATIME; @@ -100,12 +100,12 @@ static void pvfs2_set_inode_flags(struct inode *inode, /* NOTE: symname is ignored unless the inode is a sym link */ static int copy_attributes_to_inode(struct inode *inode, - struct PVFS_sys_attr_s *attrs, + struct ORANGEFS_sys_attr_s *attrs, char *symname) { int ret = -1; int perm_mode = 0; - struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); + struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); loff_t inode_size = 0; loff_t rounded_up_size = 0; @@ -127,24 +127,24 @@ static int copy_attributes_to_inode(struct inode *inode, gossip_debug(GOSSIP_UTILS_DEBUG, "attrs->mask = %x (objtype = %s)\n", attrs->mask, - attrs->objtype == PVFS_TYPE_METAFILE ? "file" : - attrs->objtype == PVFS_TYPE_DIRECTORY ? "directory" : - attrs->objtype == PVFS_TYPE_SYMLINK ? "symlink" : + attrs->objtype == ORANGEFS_TYPE_METAFILE ? "file" : + attrs->objtype == ORANGEFS_TYPE_DIRECTORY ? "directory" : + attrs->objtype == ORANGEFS_TYPE_SYMLINK ? "symlink" : "invalid/unknown"); switch (attrs->objtype) { - case PVFS_TYPE_METAFILE: - pvfs2_set_inode_flags(inode, attrs); - if (attrs->mask & PVFS_ATTR_SYS_SIZE) { + case ORANGEFS_TYPE_METAFILE: + orangefs_set_inode_flags(inode, attrs); + if (attrs->mask & ORANGEFS_ATTR_SYS_SIZE) { inode_size = (loff_t) attrs->size; rounded_up_size = (inode_size + (4096 - (inode_size % 4096))); - pvfs2_lock_inode(inode); + orangefs_lock_inode(inode); inode->i_bytes = inode_size; inode->i_blocks = (unsigned long)(rounded_up_size / 512); - pvfs2_unlock_inode(inode); + orangefs_unlock_inode(inode); /* * NOTE: make sure all the places we're called @@ -155,7 +155,7 @@ static int copy_attributes_to_inode(struct inode *inode, inode->i_size = inode_size; } break; - case PVFS_TYPE_SYMLINK: + case ORANGEFS_TYPE_SYMLINK: if (symname != NULL) { inode->i_size = (loff_t) strlen(symname); break; @@ -164,9 +164,9 @@ static int copy_attributes_to_inode(struct inode *inode, default: inode->i_size = PAGE_CACHE_SIZE; - pvfs2_lock_inode(inode); + orangefs_lock_inode(inode); inode_set_bytes(inode, inode->i_size); - pvfs2_unlock_inode(inode); + orangefs_unlock_inode(inode); break; } @@ -179,30 +179,30 @@ static int copy_attributes_to_inode(struct inode *inode, inode->i_mtime.tv_nsec = 0; inode->i_ctime.tv_nsec = 0; - if (attrs->perms & PVFS_O_EXECUTE) + if (attrs->perms & ORANGEFS_O_EXECUTE) perm_mode |= S_IXOTH; - if (attrs->perms & PVFS_O_WRITE) + if (attrs->perms & ORANGEFS_O_WRITE) perm_mode |= S_IWOTH; - if (attrs->perms & PVFS_O_READ) + if (attrs->perms & ORANGEFS_O_READ) perm_mode |= S_IROTH; - if (attrs->perms & PVFS_G_EXECUTE) + if (attrs->perms & ORANGEFS_G_EXECUTE) perm_mode |= S_IXGRP; - if (attrs->perms & PVFS_G_WRITE) + if (attrs->perms & ORANGEFS_G_WRITE) perm_mode |= S_IWGRP; - if (attrs->perms & PVFS_G_READ) + if (attrs->perms & ORANGEFS_G_READ) perm_mode |= S_IRGRP; - if (attrs->perms & PVFS_U_EXECUTE) + if (attrs->perms & ORANGEFS_U_EXECUTE) perm_mode |= S_IXUSR; - if (attrs->perms & PVFS_U_WRITE) + if (attrs->perms & ORANGEFS_U_WRITE) perm_mode |= S_IWUSR; - if (attrs->perms & PVFS_U_READ) + if (attrs->perms & ORANGEFS_U_READ) perm_mode |= S_IRUSR; - if (attrs->perms & PVFS_G_SGID) + if (attrs->perms & ORANGEFS_G_SGID) perm_mode |= S_ISGID; - if (attrs->perms & PVFS_U_SUID) + if (attrs->perms & ORANGEFS_U_SUID) perm_mode |= S_ISUID; inode->i_mode = perm_mode; @@ -216,11 +216,11 @@ static int copy_attributes_to_inode(struct inode *inode, } switch (attrs->objtype) { - case PVFS_TYPE_METAFILE: + case ORANGEFS_TYPE_METAFILE: inode->i_mode |= S_IFREG; ret = 0; break; - case PVFS_TYPE_DIRECTORY: + case ORANGEFS_TYPE_DIRECTORY: inode->i_mode |= S_IFDIR; /* NOTE: we have no good way to keep nlink consistent * for directories across clients; keep constant at 1. @@ -230,17 +230,17 @@ static int copy_attributes_to_inode(struct inode *inode, set_nlink(inode, 1); ret = 0; break; - case PVFS_TYPE_SYMLINK: + case ORANGEFS_TYPE_SYMLINK: inode->i_mode |= S_IFLNK; /* copy link target to inode private data */ - if (pvfs2_inode && symname) { - strncpy(pvfs2_inode->link_target, + if (orangefs_inode && symname) { + strncpy(orangefs_inode->link_target, symname, - PVFS_NAME_MAX); + ORANGEFS_NAME_MAX); gossip_debug(GOSSIP_UTILS_DEBUG, "Copied attr link target %s\n", - pvfs2_inode->link_target); + orangefs_inode->link_target); } gossip_debug(GOSSIP_UTILS_DEBUG, "symlink mode %o\n", @@ -248,12 +248,12 @@ static int copy_attributes_to_inode(struct inode *inode, ret = 0; break; default: - gossip_err("pvfs2: copy_attributes_to_inode: got invalid attribute type %x\n", + gossip_err("orangefs: copy_attributes_to_inode: got invalid attribute type %x\n", attrs->objtype); } gossip_debug(GOSSIP_UTILS_DEBUG, - "pvfs2: copy_attributes_to_inode: setting i_mode to %o, i_size to %lu\n", + "orangefs: copy_attributes_to_inode: setting i_mode to %o, i_size to %lu\n", inode->i_mode, (unsigned long)i_size_read(inode)); @@ -265,7 +265,7 @@ static int copy_attributes_to_inode(struct inode *inode, * anything, so don't bother copying it into the sys_attr object here. */ static inline int copy_attributes_from_inode(struct inode *inode, - struct PVFS_sys_attr_s *attrs, + struct ORANGEFS_sys_attr_s *attrs, struct iattr *iattr) { umode_t tmp_mode; @@ -285,36 +285,36 @@ static inline int copy_attributes_from_inode(struct inode *inode, attrs->mask = 0; if (iattr->ia_valid & ATTR_UID) { attrs->owner = from_kuid(current_user_ns(), iattr->ia_uid); - attrs->mask |= PVFS_ATTR_SYS_UID; + attrs->mask |= ORANGEFS_ATTR_SYS_UID; gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner); } if (iattr->ia_valid & ATTR_GID) { attrs->group = from_kgid(current_user_ns(), iattr->ia_gid); - attrs->mask |= PVFS_ATTR_SYS_GID; + attrs->mask |= ORANGEFS_ATTR_SYS_GID; gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group); } if (iattr->ia_valid & ATTR_ATIME) { - attrs->mask |= PVFS_ATTR_SYS_ATIME; + attrs->mask |= ORANGEFS_ATTR_SYS_ATIME; if (iattr->ia_valid & ATTR_ATIME_SET) { attrs->atime = - pvfs2_convert_time_field(&iattr->ia_atime); - attrs->mask |= PVFS_ATTR_SYS_ATIME_SET; + orangefs_convert_time_field(&iattr->ia_atime); + attrs->mask |= ORANGEFS_ATTR_SYS_ATIME_SET; } } if (iattr->ia_valid & ATTR_MTIME) { - attrs->mask |= PVFS_ATTR_SYS_MTIME; + attrs->mask |= ORANGEFS_ATTR_SYS_MTIME; if (iattr->ia_valid & ATTR_MTIME_SET) { attrs->mtime = - pvfs2_convert_time_field(&iattr->ia_mtime); - attrs->mask |= PVFS_ATTR_SYS_MTIME_SET; + orangefs_convert_time_field(&iattr->ia_mtime); + attrs->mask |= ORANGEFS_ATTR_SYS_MTIME_SET; } } if (iattr->ia_valid & ATTR_CTIME) - attrs->mask |= PVFS_ATTR_SYS_CTIME; + attrs->mask |= ORANGEFS_ATTR_SYS_CTIME; /* - * PVFS2 cannot set size with a setattr operation. Probably not likely + * ORANGEFS cannot set size with a setattr operation. Probably not likely * to be requested through the VFS, but just in case, don't worry about * ATTR_SIZE */ @@ -342,21 +342,21 @@ static inline int copy_attributes_from_inode(struct inode *inode, return -EINVAL; } - attrs->perms = PVFS_util_translate_mode(tmp_mode); - attrs->mask |= PVFS_ATTR_SYS_PERM; + attrs->perms = ORANGEFS_util_translate_mode(tmp_mode); + attrs->mask |= ORANGEFS_ATTR_SYS_PERM; } return 0; } /* - * issues a pvfs2 getattr request and fills in the appropriate inode + * issues a orangefs getattr request and fills in the appropriate inode * attributes if successful. returns 0 on success; -errno otherwise */ -int pvfs2_inode_getattr(struct inode *inode, __u32 getattr_mask) +int orangefs_inode_getattr(struct inode *inode, __u32 getattr_mask) { - struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); - struct pvfs2_kernel_op_s *new_op; + struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); + struct orangefs_kernel_op_s *new_op; int ret = -EINVAL; gossip_debug(GOSSIP_UTILS_DEBUG, @@ -364,10 +364,10 @@ int pvfs2_inode_getattr(struct inode *inode, __u32 getattr_mask) __func__, get_khandle_from_ino(inode)); - new_op = op_alloc(PVFS2_VFS_OP_GETATTR); + new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR); if (!new_op) return -ENOMEM; - new_op->upcall.req.getattr.refn = pvfs2_inode->refn; + new_op->upcall.req.getattr.refn = orangefs_inode->refn; new_op->upcall.req.getattr.mask = getattr_mask; ret = service_operation(new_op, __func__, @@ -384,17 +384,17 @@ int pvfs2_inode_getattr(struct inode *inode, __u32 getattr_mask) } /* - * Store blksize in pvfs2 specific part of inode structure; we are + * Store blksize in orangefs specific part of inode structure; we are * only going to use this to report to stat to make sure it doesn't * perturb any inode related code paths. */ if (new_op->downcall.resp.getattr.attributes.objtype == - PVFS_TYPE_METAFILE) { - pvfs2_inode->blksize = + ORANGEFS_TYPE_METAFILE) { + orangefs_inode->blksize = new_op->downcall.resp.getattr.attributes.blksize; } else { /* mimic behavior of generic_fillattr() for other types. */ - pvfs2_inode->blksize = (1 << inode->i_blkbits); + orangefs_inode->blksize = (1 << inode->i_blkbits); } @@ -402,8 +402,8 @@ out: gossip_debug(GOSSIP_UTILS_DEBUG, "Getattr on handle %pU, " "fsid %d\n (inode ct = %d) returned %d\n", - &pvfs2_inode->refn.khandle, - pvfs2_inode->refn.fs_id, + &orangefs_inode->refn.khandle, + orangefs_inode->refn.fs_id, (int)atomic_read(&inode->i_count), ret); @@ -412,20 +412,20 @@ out: } /* - * issues a pvfs2 setattr request to make sure the new attribute values + * issues a orangefs setattr request to make sure the new attribute values * take effect if successful. returns 0 on success; -errno otherwise */ -int pvfs2_inode_setattr(struct inode *inode, struct iattr *iattr) +int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr) { - struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); - struct pvfs2_kernel_op_s *new_op; + struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); + struct orangefs_kernel_op_s *new_op; int ret; - new_op = op_alloc(PVFS2_VFS_OP_SETATTR); + new_op = op_alloc(ORANGEFS_VFS_OP_SETATTR); if (!new_op) return -ENOMEM; - new_op->upcall.req.setattr.refn = pvfs2_inode->refn; + new_op->upcall.req.setattr.refn = orangefs_inode->refn; ret = copy_attributes_from_inode(inode, &new_op->upcall.req.setattr.attributes, iattr); @@ -438,7 +438,7 @@ int pvfs2_inode_setattr(struct inode *inode, struct iattr *iattr) get_interruptible_flag(inode)); gossip_debug(GOSSIP_UTILS_DEBUG, - "pvfs2_inode_setattr: returning %d\n", + "orangefs_inode_setattr: returning %d\n", ret); /* when request is serviced properly, free req op struct */ @@ -449,16 +449,16 @@ int pvfs2_inode_setattr(struct inode *inode, struct iattr *iattr) * ctime flags. */ if (ret == 0) { - ClearAtimeFlag(pvfs2_inode); - ClearMtimeFlag(pvfs2_inode); - ClearCtimeFlag(pvfs2_inode); - ClearModeFlag(pvfs2_inode); + ClearAtimeFlag(orangefs_inode); + ClearMtimeFlag(orangefs_inode); + ClearCtimeFlag(orangefs_inode); + ClearModeFlag(orangefs_inode); } return ret; } -int pvfs2_flush_inode(struct inode *inode) +int orangefs_flush_inode(struct inode *inode) { /* * If it is a dirty inode, this function gets called. @@ -472,7 +472,7 @@ int pvfs2_flush_inode(struct inode *inode) int ctime_flag; int atime_flag; int mode_flag; - struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); + struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); memset(&wbattr, 0, sizeof(wbattr)); @@ -481,14 +481,14 @@ int pvfs2_flush_inode(struct inode *inode) * will prevent multiple processes from all trying to flush the same * inode if they call close() simultaneously */ - mtime_flag = MtimeFlag(pvfs2_inode); - ClearMtimeFlag(pvfs2_inode); - ctime_flag = CtimeFlag(pvfs2_inode); - ClearCtimeFlag(pvfs2_inode); - atime_flag = AtimeFlag(pvfs2_inode); - ClearAtimeFlag(pvfs2_inode); - mode_flag = ModeFlag(pvfs2_inode); - ClearModeFlag(pvfs2_inode); + mtime_flag = MtimeFlag(orangefs_inode); + ClearMtimeFlag(orangefs_inode); + ctime_flag = CtimeFlag(orangefs_inode); + ClearCtimeFlag(orangefs_inode); + atime_flag = AtimeFlag(orangefs_inode); + ClearAtimeFlag(orangefs_inode); + mode_flag = ModeFlag(orangefs_inode); + ClearModeFlag(orangefs_inode); /* -- Lazy atime,mtime and ctime update -- * Note: all times are dictated by server in the new scheme @@ -510,56 +510,56 @@ int pvfs2_flush_inode(struct inode *inode) } gossip_debug(GOSSIP_UTILS_DEBUG, - "*********** pvfs2_flush_inode: %pU " + "*********** orangefs_flush_inode: %pU " "(ia_valid %d)\n", get_khandle_from_ino(inode), wbattr.ia_valid); if (wbattr.ia_valid == 0) { gossip_debug(GOSSIP_UTILS_DEBUG, - "pvfs2_flush_inode skipping setattr()\n"); + "orangefs_flush_inode skipping setattr()\n"); return 0; } gossip_debug(GOSSIP_UTILS_DEBUG, - "pvfs2_flush_inode (%pU) writing mode %o\n", + "orangefs_flush_inode (%pU) writing mode %o\n", get_khandle_from_ino(inode), inode->i_mode); - ret = pvfs2_inode_setattr(inode, &wbattr); + ret = orangefs_inode_setattr(inode, &wbattr); return ret; } -int pvfs2_unmount_sb(struct super_block *sb) +int orangefs_unmount_sb(struct super_block *sb) { int ret = -EINVAL; - struct pvfs2_kernel_op_s *new_op = NULL; + struct orangefs_kernel_op_s *new_op = NULL; gossip_debug(GOSSIP_UTILS_DEBUG, - "pvfs2_unmount_sb called on sb %p\n", + "orangefs_unmount_sb called on sb %p\n", sb); - new_op = op_alloc(PVFS2_VFS_OP_FS_UMOUNT); + new_op = op_alloc(ORANGEFS_VFS_OP_FS_UMOUNT); if (!new_op) return -ENOMEM; - new_op->upcall.req.fs_umount.id = PVFS2_SB(sb)->id; - new_op->upcall.req.fs_umount.fs_id = PVFS2_SB(sb)->fs_id; - strncpy(new_op->upcall.req.fs_umount.pvfs2_config_server, - PVFS2_SB(sb)->devname, - PVFS_MAX_SERVER_ADDR_LEN); + new_op->upcall.req.fs_umount.id = ORANGEFS_SB(sb)->id; + new_op->upcall.req.fs_umount.fs_id = ORANGEFS_SB(sb)->fs_id; + strncpy(new_op->upcall.req.fs_umount.orangefs_config_server, + ORANGEFS_SB(sb)->devname, + ORANGEFS_MAX_SERVER_ADDR_LEN); gossip_debug(GOSSIP_UTILS_DEBUG, - "Attempting PVFS2 Unmount via host %s\n", - new_op->upcall.req.fs_umount.pvfs2_config_server); + "Attempting ORANGEFS Unmount via host %s\n", + new_op->upcall.req.fs_umount.orangefs_config_server); - ret = service_operation(new_op, "pvfs2_fs_umount", 0); + ret = service_operation(new_op, "orangefs_fs_umount", 0); gossip_debug(GOSSIP_UTILS_DEBUG, - "pvfs2_unmount: got return value of %d\n", ret); + "orangefs_unmount: got return value of %d\n", ret); if (ret) sb = ERR_PTR(ret); else - PVFS2_SB(sb)->mount_pending = 1; + ORANGEFS_SB(sb)->mount_pending = 1; op_release(new_op); return ret; @@ -569,42 +569,42 @@ int pvfs2_unmount_sb(struct super_block *sb) * NOTE: on successful cancellation, be sure to return -EINTR, as * that's the return value the caller expects */ -int pvfs2_cancel_op_in_progress(__u64 tag) +int orangefs_cancel_op_in_progress(__u64 tag) { int ret = -EINVAL; - struct pvfs2_kernel_op_s *new_op = NULL; + struct orangefs_kernel_op_s *new_op = NULL; gossip_debug(GOSSIP_UTILS_DEBUG, - "pvfs2_cancel_op_in_progress called on tag %llu\n", + "orangefs_cancel_op_in_progress called on tag %llu\n", llu(tag)); - new_op = op_alloc(PVFS2_VFS_OP_CANCEL); + new_op = op_alloc(ORANGEFS_VFS_OP_CANCEL); if (!new_op) return -ENOMEM; new_op->upcall.req.cancel.op_tag = tag; gossip_debug(GOSSIP_UTILS_DEBUG, - "Attempting PVFS2 operation cancellation of tag %llu\n", + "Attempting ORANGEFS operation cancellation of tag %llu\n", llu(new_op->upcall.req.cancel.op_tag)); - ret = service_operation(new_op, "pvfs2_cancel", PVFS2_OP_CANCELLATION); + ret = service_operation(new_op, "orangefs_cancel", ORANGEFS_OP_CANCELLATION); gossip_debug(GOSSIP_UTILS_DEBUG, - "pvfs2_cancel_op_in_progress: got return value of %d\n", + "orangefs_cancel_op_in_progress: got return value of %d\n", ret); op_release(new_op); return ret; } -void pvfs2_op_initialize(struct pvfs2_kernel_op_s *op) +void orangefs_op_initialize(struct orangefs_kernel_op_s *op) { if (op) { spin_lock(&op->lock); op->io_completed = 0; - op->upcall.type = PVFS2_VFS_OP_INVALID; - op->downcall.type = PVFS2_VFS_OP_INVALID; + op->upcall.type = ORANGEFS_VFS_OP_INVALID; + op->downcall.type = ORANGEFS_VFS_OP_INVALID; op->downcall.status = -1; op->op_state = OP_VFS_STATE_UNKNOWN; @@ -613,7 +613,7 @@ void pvfs2_op_initialize(struct pvfs2_kernel_op_s *op) } } -void pvfs2_make_bad_inode(struct inode *inode) +void orangefs_make_bad_inode(struct inode *inode) { if (is_root_handle(inode)) { /* @@ -655,10 +655,10 @@ void set_signals(sigset_t *sigset) /* * The following is a very dirty hack that is now a permanent part of the - * PVFS2 protocol. See protocol.h for more error definitions. + * ORANGEFS protocol. See protocol.h for more error definitions. */ -/* The order matches include/pvfs2-types.h in the OrangeFS source. */ +/* The order matches include/orangefs-types.h in the OrangeFS source. */ static int PINT_errno_mapping[] = { 0, EPERM, ENOENT, EINTR, EIO, ENXIO, EBADF, EAGAIN, ENOMEM, EFAULT, EBUSY, EEXIST, ENODEV, ENOTDIR, EISDIR, EINVAL, EMFILE, @@ -672,7 +672,7 @@ static int PINT_errno_mapping[] = { EACCES, ECONNRESET, ERANGE }; -int pvfs2_normalize_to_errno(__s32 error_code) +int orangefs_normalize_to_errno(__s32 error_code) { __u32 i; @@ -684,24 +684,24 @@ int pvfs2_normalize_to_errno(__s32 error_code) * server. */ } else if (error_code > 0) { - gossip_err("pvfs2: error status receieved.\n"); - gossip_err("pvfs2: assuming error code is inverted.\n"); + gossip_err("orangefs: error status receieved.\n"); + gossip_err("orangefs: assuming error code is inverted.\n"); error_code = -error_code; } /* - * XXX: This is very bad since error codes from PVFS2 may not be + * XXX: This is very bad since error codes from ORANGEFS may not be * suitable for return into userspace. */ /* - * Convert PVFS2 error values into errno values suitable for return + * Convert ORANGEFS error values into errno values suitable for return * from the kernel. */ - if ((-error_code) & PVFS_NON_ERRNO_ERROR_BIT) { + if ((-error_code) & ORANGEFS_NON_ERRNO_ERROR_BIT) { if (((-error_code) & - (PVFS_ERROR_NUMBER_BITS|PVFS_NON_ERRNO_ERROR_BIT| - PVFS_ERROR_BIT)) == PVFS_ECANCEL) { + (ORANGEFS_ERROR_NUMBER_BITS|ORANGEFS_NON_ERRNO_ERROR_BIT| + ORANGEFS_ERROR_BIT)) == ORANGEFS_ECANCEL) { /* * cancellation error codes generally correspond to * a timeout from the client's perspective @@ -709,30 +709,30 @@ int pvfs2_normalize_to_errno(__s32 error_code) error_code = -ETIMEDOUT; } else { /* assume a default error code */ - gossip_err("pvfs2: warning: got error code without errno equivalent: %d.\n", error_code); + gossip_err("orangefs: warning: got error code without errno equivalent: %d.\n", error_code); error_code = -EINVAL; } - /* Convert PVFS2 encoded errno values into regular errno values. */ - } else if ((-error_code) & PVFS_ERROR_BIT) { - i = (-error_code) & ~(PVFS_ERROR_BIT|PVFS_ERROR_CLASS_BITS); + /* Convert ORANGEFS encoded errno values into regular errno values. */ + } else if ((-error_code) & ORANGEFS_ERROR_BIT) { + i = (-error_code) & ~(ORANGEFS_ERROR_BIT|ORANGEFS_ERROR_CLASS_BITS); if (i < sizeof(PINT_errno_mapping)/sizeof(*PINT_errno_mapping)) error_code = -PINT_errno_mapping[i]; else error_code = -EINVAL; /* - * Only PVFS2 protocol error codes should ever come here. Otherwise + * Only ORANGEFS protocol error codes should ever come here. Otherwise * there is a bug somewhere. */ } else { - gossip_err("pvfs2: pvfs2_normalize_to_errno: got error code which is not from PVFS2.\n"); + gossip_err("orangefs: orangefs_normalize_to_errno: got error code which is not from ORANGEFS.\n"); } return error_code; } #define NUM_MODES 11 -__s32 PVFS_util_translate_mode(int mode) +__s32 ORANGEFS_util_translate_mode(int mode) { int ret = 0; int i = 0; @@ -742,16 +742,16 @@ __s32 PVFS_util_translate_mode(int mode) S_IXUSR, S_IWUSR, S_IRUSR, S_ISGID, S_ISUID }; - static int pvfs2_modes[NUM_MODES] = { - PVFS_O_EXECUTE, PVFS_O_WRITE, PVFS_O_READ, - PVFS_G_EXECUTE, PVFS_G_WRITE, PVFS_G_READ, - PVFS_U_EXECUTE, PVFS_U_WRITE, PVFS_U_READ, - PVFS_G_SGID, PVFS_U_SUID + static int orangefs_modes[NUM_MODES] = { + ORANGEFS_O_EXECUTE, ORANGEFS_O_WRITE, ORANGEFS_O_READ, + ORANGEFS_G_EXECUTE, ORANGEFS_G_WRITE, ORANGEFS_G_READ, + ORANGEFS_U_EXECUTE, ORANGEFS_U_WRITE, ORANGEFS_U_READ, + ORANGEFS_G_SGID, ORANGEFS_U_SUID }; for (i = 0; i < NUM_MODES; i++) if (mode & modes[i]) - ret |= pvfs2_modes[i]; + ret |= orangefs_modes[i]; return ret; } @@ -813,10 +813,10 @@ int orangefs_prepare_cdm_array(char *debug_array_string) (unsigned long long *)&(cdm_array[i].mask1), (unsigned long long *)&(cdm_array[i].mask2)); - if (!strcmp(cdm_array[i].keyword, PVFS2_VERBOSE)) + if (!strcmp(cdm_array[i].keyword, ORANGEFS_VERBOSE)) client_verbose_index = i; - if (!strcmp(cdm_array[i].keyword, PVFS2_ALL)) + if (!strcmp(cdm_array[i].keyword, ORANGEFS_ALL)) client_all_index = i; cds_head = cds_delimiter + 1; @@ -952,7 +952,7 @@ void debug_mask_to_string(void *mask, int type) element_count = num_kmod_keyword_mask_map; } - memset(debug_string, 0, PVFS2_MAX_DEBUG_STRING_LEN); + memset(debug_string, 0, ORANGEFS_MAX_DEBUG_STRING_LEN); /* * Some keywords, like "all" or "verbose", are amalgams of @@ -998,13 +998,13 @@ void do_k_string(void *k_mask, int index) if (*mask & s_kmod_keyword_mask_map[index].mask_val) { if ((strlen(kernel_debug_string) + strlen(s_kmod_keyword_mask_map[index].keyword)) - < PVFS2_MAX_DEBUG_STRING_LEN - 1) { + < ORANGEFS_MAX_DEBUG_STRING_LEN - 1) { strcat(kernel_debug_string, s_kmod_keyword_mask_map[index].keyword); strcat(kernel_debug_string, ","); } else { gossip_err("%s: overflow!\n", __func__); - strcpy(kernel_debug_string, PVFS2_ALL); + strcpy(kernel_debug_string, ORANGEFS_ALL); goto out; } } @@ -1025,13 +1025,13 @@ void do_c_string(void *c_mask, int index) (mask->mask2 & cdm_array[index].mask2)) { if ((strlen(client_debug_string) + strlen(cdm_array[index].keyword) + 1) - < PVFS2_MAX_DEBUG_STRING_LEN - 2) { + < ORANGEFS_MAX_DEBUG_STRING_LEN - 2) { strcat(client_debug_string, cdm_array[index].keyword); strcat(client_debug_string, ","); } else { gossip_err("%s: overflow!\n", __func__); - strcpy(client_debug_string, PVFS2_ALL); + strcpy(client_debug_string, ORANGEFS_ALL); goto out; } } @@ -1043,7 +1043,7 @@ int keyword_is_amalgam(char *keyword) { int rc = 0; - if ((!strcmp(keyword, PVFS2_ALL)) || (!strcmp(keyword, PVFS2_VERBOSE))) + if ((!strcmp(keyword, ORANGEFS_ALL)) || (!strcmp(keyword, ORANGEFS_VERBOSE))) rc = 1; return rc; @@ -1067,14 +1067,14 @@ int check_amalgam_keyword(void *mask, int type) if ((c_mask->mask1 == cdm_array[client_all_index].mask1) && (c_mask->mask2 == cdm_array[client_all_index].mask2)) { - strcpy(client_debug_string, PVFS2_ALL); + strcpy(client_debug_string, ORANGEFS_ALL); rc = 1; goto out; } if ((c_mask->mask1 == cdm_array[client_verbose_index].mask1) && (c_mask->mask2 == cdm_array[client_verbose_index].mask2)) { - strcpy(client_debug_string, PVFS2_VERBOSE); + strcpy(client_debug_string, ORANGEFS_VERBOSE); rc = 1; goto out; } @@ -1083,7 +1083,7 @@ int check_amalgam_keyword(void *mask, int type) k_mask = (__u64 *) mask; if (*k_mask >= s_kmod_keyword_mask_map[k_all_index].mask_val) { - strcpy(kernel_debug_string, PVFS2_ALL); + strcpy(kernel_debug_string, ORANGEFS_ALL); rc = 1; goto out; } diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 45db0772a767..c104de1ae5de 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -10,13 +10,13 @@ #include -/* a cache for pvfs2-inode objects (i.e. pvfs2 inode private data) */ -static struct kmem_cache *pvfs2_inode_cache; +/* a cache for orangefs-inode objects (i.e. orangefs inode private data) */ +static struct kmem_cache *orangefs_inode_cache; -/* list for storing pvfs2 specific superblocks in use */ -LIST_HEAD(pvfs2_superblocks); +/* list for storing orangefs specific superblocks in use */ +LIST_HEAD(orangefs_superblocks); -DEFINE_SPINLOCK(pvfs2_superblocks_lock); +DEFINE_SPINLOCK(orangefs_superblocks_lock); enum { Opt_intr, @@ -37,7 +37,7 @@ static const match_table_t tokens = { static int parse_mount_options(struct super_block *sb, char *options, int silent) { - struct pvfs2_sb_info_s *pvfs2_sb = PVFS2_SB(sb); + struct orangefs_sb_info_s *orangefs_sb = ORANGEFS_SB(sb); substring_t args[MAX_OPT_ARGS]; char *p; @@ -46,8 +46,8 @@ static int parse_mount_options(struct super_block *sb, char *options, * to zero, ie, initialize to unset. */ sb->s_flags &= ~MS_POSIXACL; - pvfs2_sb->flags &= ~PVFS2_OPT_INTR; - pvfs2_sb->flags &= ~PVFS2_OPT_LOCAL_LOCK; + orangefs_sb->flags &= ~ORANGEFS_OPT_INTR; + orangefs_sb->flags &= ~ORANGEFS_OPT_LOCAL_LOCK; while ((p = strsep(&options, ",")) != NULL) { int token; @@ -61,10 +61,10 @@ static int parse_mount_options(struct super_block *sb, char *options, sb->s_flags |= MS_POSIXACL; break; case Opt_intr: - pvfs2_sb->flags |= PVFS2_OPT_INTR; + orangefs_sb->flags |= ORANGEFS_OPT_INTR; break; case Opt_local_lock: - pvfs2_sb->flags |= PVFS2_OPT_LOCAL_LOCK; + orangefs_sb->flags |= ORANGEFS_OPT_LOCAL_LOCK; break; default: goto fail; @@ -78,24 +78,24 @@ fail: return -EINVAL; } -static void pvfs2_inode_cache_ctor(void *req) +static void orangefs_inode_cache_ctor(void *req) { - struct pvfs2_inode_s *pvfs2_inode = req; + struct orangefs_inode_s *orangefs_inode = req; - inode_init_once(&pvfs2_inode->vfs_inode); - init_rwsem(&pvfs2_inode->xattr_sem); + inode_init_once(&orangefs_inode->vfs_inode); + init_rwsem(&orangefs_inode->xattr_sem); - pvfs2_inode->vfs_inode.i_version = 1; + orangefs_inode->vfs_inode.i_version = 1; } -static struct inode *pvfs2_alloc_inode(struct super_block *sb) +static struct inode *orangefs_alloc_inode(struct super_block *sb) { - struct pvfs2_inode_s *pvfs2_inode; + struct orangefs_inode_s *orangefs_inode; - pvfs2_inode = kmem_cache_alloc(pvfs2_inode_cache, - PVFS2_CACHE_ALLOC_FLAGS); - if (pvfs2_inode == NULL) { - gossip_err("Failed to allocate pvfs2_inode\n"); + orangefs_inode = kmem_cache_alloc(orangefs_inode_cache, + ORANGEFS_CACHE_ALLOC_FLAGS); + if (orangefs_inode == NULL) { + gossip_err("Failed to allocate orangefs_inode\n"); return NULL; } @@ -103,71 +103,71 @@ static struct inode *pvfs2_alloc_inode(struct super_block *sb) * We want to clear everything except for rw_semaphore and the * vfs_inode. */ - memset(&pvfs2_inode->refn.khandle, 0, 16); - pvfs2_inode->refn.fs_id = PVFS_FS_ID_NULL; - pvfs2_inode->last_failed_block_index_read = 0; - memset(pvfs2_inode->link_target, 0, sizeof(pvfs2_inode->link_target)); - pvfs2_inode->pinode_flags = 0; + memset(&orangefs_inode->refn.khandle, 0, 16); + orangefs_inode->refn.fs_id = ORANGEFS_FS_ID_NULL; + orangefs_inode->last_failed_block_index_read = 0; + memset(orangefs_inode->link_target, 0, sizeof(orangefs_inode->link_target)); + orangefs_inode->pinode_flags = 0; gossip_debug(GOSSIP_SUPER_DEBUG, - "pvfs2_alloc_inode: allocated %p\n", - &pvfs2_inode->vfs_inode); - return &pvfs2_inode->vfs_inode; + "orangefs_alloc_inode: allocated %p\n", + &orangefs_inode->vfs_inode); + return &orangefs_inode->vfs_inode; } -static void pvfs2_destroy_inode(struct inode *inode) +static void orangefs_destroy_inode(struct inode *inode) { - struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); + struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); gossip_debug(GOSSIP_SUPER_DEBUG, "%s: deallocated %p destroying inode %pU\n", - __func__, pvfs2_inode, get_khandle_from_ino(inode)); + __func__, orangefs_inode, get_khandle_from_ino(inode)); - kmem_cache_free(pvfs2_inode_cache, pvfs2_inode); + kmem_cache_free(orangefs_inode_cache, orangefs_inode); } /* * NOTE: information filled in here is typically reflected in the * output of the system command 'df' */ -static int pvfs2_statfs(struct dentry *dentry, struct kstatfs *buf) +static int orangefs_statfs(struct dentry *dentry, struct kstatfs *buf) { int ret = -ENOMEM; - struct pvfs2_kernel_op_s *new_op = NULL; + struct orangefs_kernel_op_s *new_op = NULL; int flags = 0; struct super_block *sb = NULL; sb = dentry->d_sb; gossip_debug(GOSSIP_SUPER_DEBUG, - "pvfs2_statfs: called on sb %p (fs_id is %d)\n", + "orangefs_statfs: called on sb %p (fs_id is %d)\n", sb, - (int)(PVFS2_SB(sb)->fs_id)); + (int)(ORANGEFS_SB(sb)->fs_id)); - new_op = op_alloc(PVFS2_VFS_OP_STATFS); + new_op = op_alloc(ORANGEFS_VFS_OP_STATFS); if (!new_op) return ret; - new_op->upcall.req.statfs.fs_id = PVFS2_SB(sb)->fs_id; + new_op->upcall.req.statfs.fs_id = ORANGEFS_SB(sb)->fs_id; - if (PVFS2_SB(sb)->flags & PVFS2_OPT_INTR) - flags = PVFS2_OP_INTERRUPTIBLE; + if (ORANGEFS_SB(sb)->flags & ORANGEFS_OPT_INTR) + flags = ORANGEFS_OP_INTERRUPTIBLE; - ret = service_operation(new_op, "pvfs2_statfs", flags); + ret = service_operation(new_op, "orangefs_statfs", flags); if (new_op->downcall.status < 0) goto out_op_release; gossip_debug(GOSSIP_SUPER_DEBUG, - "pvfs2_statfs: got %ld blocks available | " + "orangefs_statfs: got %ld blocks available | " "%ld blocks total | %ld block size\n", (long)new_op->downcall.resp.statfs.blocks_avail, (long)new_op->downcall.resp.statfs.blocks_total, (long)new_op->downcall.resp.statfs.block_size); buf->f_type = sb->s_magic; - memcpy(&buf->f_fsid, &PVFS2_SB(sb)->fs_id, sizeof(buf->f_fsid)); + memcpy(&buf->f_fsid, &ORANGEFS_SB(sb)->fs_id, sizeof(buf->f_fsid)); buf->f_bsize = new_op->downcall.resp.statfs.block_size; - buf->f_namelen = PVFS2_NAME_LEN; + buf->f_namelen = ORANGEFS_NAME_LEN; buf->f_blocks = (sector_t) new_op->downcall.resp.statfs.blocks_total; buf->f_bfree = (sector_t) new_op->downcall.resp.statfs.blocks_avail; @@ -178,7 +178,7 @@ static int pvfs2_statfs(struct dentry *dentry, struct kstatfs *buf) out_op_release: op_release(new_op); - gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_statfs: returning %d\n", ret); + gossip_debug(GOSSIP_SUPER_DEBUG, "orangefs_statfs: returning %d\n", ret); return ret; } @@ -186,9 +186,9 @@ out_op_release: * Remount as initiated by VFS layer. We just need to reparse the mount * options, no need to signal pvfs2-client-core about it. */ -static int pvfs2_remount_fs(struct super_block *sb, int *flags, char *data) +static int orangefs_remount_fs(struct super_block *sb, int *flags, char *data) { - gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_remount_fs: called\n"); + gossip_debug(GOSSIP_SUPER_DEBUG, "orangefs_remount_fs: called\n"); return parse_mount_options(sb, data, 1); } @@ -207,33 +207,33 @@ static int pvfs2_remount_fs(struct super_block *sb, int *flags, char *data) * the client regains all of the mount information from us. * NOTE: this function assumes that the request_mutex is already acquired! */ -int pvfs2_remount(struct super_block *sb) +int orangefs_remount(struct super_block *sb) { - struct pvfs2_kernel_op_s *new_op; + struct orangefs_kernel_op_s *new_op; int ret = -EINVAL; - gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_remount: called\n"); + gossip_debug(GOSSIP_SUPER_DEBUG, "orangefs_remount: called\n"); - new_op = op_alloc(PVFS2_VFS_OP_FS_MOUNT); + new_op = op_alloc(ORANGEFS_VFS_OP_FS_MOUNT); if (!new_op) return -ENOMEM; - strncpy(new_op->upcall.req.fs_mount.pvfs2_config_server, - PVFS2_SB(sb)->devname, - PVFS_MAX_SERVER_ADDR_LEN); + strncpy(new_op->upcall.req.fs_mount.orangefs_config_server, + ORANGEFS_SB(sb)->devname, + ORANGEFS_MAX_SERVER_ADDR_LEN); gossip_debug(GOSSIP_SUPER_DEBUG, - "Attempting PVFS2 Remount via host %s\n", - new_op->upcall.req.fs_mount.pvfs2_config_server); + "Attempting ORANGEFS Remount via host %s\n", + new_op->upcall.req.fs_mount.orangefs_config_server); /* * we assume that the calling function has already acquire the * request_mutex to prevent other operations from bypassing * this one */ - ret = service_operation(new_op, "pvfs2_remount", - PVFS2_OP_PRIORITY | PVFS2_OP_NO_SEMAPHORE); + ret = service_operation(new_op, "orangefs_remount", + ORANGEFS_OP_PRIORITY | ORANGEFS_OP_NO_SEMAPHORE); gossip_debug(GOSSIP_SUPER_DEBUG, - "pvfs2_remount: mount got return value of %d\n", + "orangefs_remount: mount got return value of %d\n", ret); if (ret == 0) { /* @@ -241,8 +241,8 @@ int pvfs2_remount(struct super_block *sb) * short-lived mapping that the system interface uses * to map this superblock to a particular mount entry */ - PVFS2_SB(sb)->id = new_op->downcall.resp.fs_mount.id; - PVFS2_SB(sb)->mount_pending = 0; + ORANGEFS_SB(sb)->id = new_op->downcall.resp.fs_mount.id; + ORANGEFS_SB(sb)->mount_pending = 0; } op_release(new_op); @@ -259,54 +259,54 @@ void fsid_key_table_finalize(void) } /* Called whenever the VFS dirties the inode in response to atime updates */ -static void pvfs2_dirty_inode(struct inode *inode, int flags) +static void orangefs_dirty_inode(struct inode *inode, int flags) { - struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); + struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); gossip_debug(GOSSIP_SUPER_DEBUG, - "pvfs2_dirty_inode: %pU\n", + "orangefs_dirty_inode: %pU\n", get_khandle_from_ino(inode)); - SetAtimeFlag(pvfs2_inode); + SetAtimeFlag(orangefs_inode); } -static const struct super_operations pvfs2_s_ops = { - .alloc_inode = pvfs2_alloc_inode, - .destroy_inode = pvfs2_destroy_inode, - .dirty_inode = pvfs2_dirty_inode, +static const struct super_operations orangefs_s_ops = { + .alloc_inode = orangefs_alloc_inode, + .destroy_inode = orangefs_destroy_inode, + .dirty_inode = orangefs_dirty_inode, .drop_inode = generic_delete_inode, - .statfs = pvfs2_statfs, - .remount_fs = pvfs2_remount_fs, + .statfs = orangefs_statfs, + .remount_fs = orangefs_remount_fs, .show_options = generic_show_options, }; -static struct dentry *pvfs2_fh_to_dentry(struct super_block *sb, +static struct dentry *orangefs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { - struct pvfs2_object_kref refn; + struct orangefs_object_kref refn; if (fh_len < 5 || fh_type > 2) return NULL; - PVFS_khandle_from(&(refn.khandle), fid->raw, 16); + ORANGEFS_khandle_from(&(refn.khandle), fid->raw, 16); refn.fs_id = (u32) fid->raw[4]; gossip_debug(GOSSIP_SUPER_DEBUG, "fh_to_dentry: handle %pU, fs_id %d\n", &refn.khandle, refn.fs_id); - return d_obtain_alias(pvfs2_iget(sb, &refn)); + return d_obtain_alias(orangefs_iget(sb, &refn)); } -static int pvfs2_encode_fh(struct inode *inode, +static int orangefs_encode_fh(struct inode *inode, __u32 *fh, int *max_len, struct inode *parent) { int len = parent ? 10 : 5; int type = 1; - struct pvfs2_object_kref refn; + struct orangefs_object_kref refn; if (*max_len < len) { gossip_lerr("fh buffer is too small for encoding\n"); @@ -315,8 +315,8 @@ static int pvfs2_encode_fh(struct inode *inode, goto out; } - refn = PVFS2_I(inode)->refn; - PVFS_khandle_to(&refn.khandle, fh, 16); + refn = ORANGEFS_I(inode)->refn; + ORANGEFS_khandle_to(&refn.khandle, fh, 16); fh[4] = refn.fs_id; gossip_debug(GOSSIP_SUPER_DEBUG, @@ -326,8 +326,8 @@ static int pvfs2_encode_fh(struct inode *inode, if (parent) { - refn = PVFS2_I(parent)->refn; - PVFS_khandle_to(&refn.khandle, (char *) fh + 20, 16); + refn = ORANGEFS_I(parent)->refn; + ORANGEFS_khandle_to(&refn.khandle, (char *) fh + 20, 16); fh[9] = refn.fs_id; type = 2; @@ -342,30 +342,30 @@ out: return type; } -static struct export_operations pvfs2_export_ops = { - .encode_fh = pvfs2_encode_fh, - .fh_to_dentry = pvfs2_fh_to_dentry, +static struct export_operations orangefs_export_ops = { + .encode_fh = orangefs_encode_fh, + .fh_to_dentry = orangefs_fh_to_dentry, }; -static int pvfs2_fill_sb(struct super_block *sb, - struct pvfs2_fs_mount_response *fs_mount, +static int orangefs_fill_sb(struct super_block *sb, + struct orangefs_fs_mount_response *fs_mount, void *data, int silent) { int ret = -EINVAL; struct inode *root = NULL; struct dentry *root_dentry = NULL; - struct pvfs2_object_kref root_object; + struct orangefs_object_kref root_object; - /* alloc and init our private pvfs2 sb info */ + /* alloc and init our private orangefs sb info */ sb->s_fs_info = - kzalloc(sizeof(struct pvfs2_sb_info_s), PVFS2_GFP_FLAGS); - if (!PVFS2_SB(sb)) + kzalloc(sizeof(struct orangefs_sb_info_s), ORANGEFS_GFP_FLAGS); + if (!ORANGEFS_SB(sb)) return -ENOMEM; - PVFS2_SB(sb)->sb = sb; + ORANGEFS_SB(sb)->sb = sb; - PVFS2_SB(sb)->root_khandle = fs_mount->root_khandle; - PVFS2_SB(sb)->fs_id = fs_mount->fs_id; - PVFS2_SB(sb)->id = fs_mount->id; + ORANGEFS_SB(sb)->root_khandle = fs_mount->root_khandle; + ORANGEFS_SB(sb)->fs_id = fs_mount->fs_id; + ORANGEFS_SB(sb)->id = fs_mount->id; if (data) { ret = parse_mount_options(sb, data, silent); @@ -374,23 +374,23 @@ static int pvfs2_fill_sb(struct super_block *sb, } /* Hang the xattr handlers off the superblock */ - sb->s_xattr = pvfs2_xattr_handlers; - sb->s_magic = PVFS2_SUPER_MAGIC; - sb->s_op = &pvfs2_s_ops; - sb->s_d_op = &pvfs2_dentry_operations; + sb->s_xattr = orangefs_xattr_handlers; + sb->s_magic = ORANGEFS_SUPER_MAGIC; + sb->s_op = &orangefs_s_ops; + sb->s_d_op = &orangefs_dentry_operations; - sb->s_blocksize = pvfs_bufmap_size_query(); - sb->s_blocksize_bits = pvfs_bufmap_shift_query(); + sb->s_blocksize = orangefs_bufmap_size_query(); + sb->s_blocksize_bits = orangefs_bufmap_shift_query(); sb->s_maxbytes = MAX_LFS_FILESIZE; - root_object.khandle = PVFS2_SB(sb)->root_khandle; - root_object.fs_id = PVFS2_SB(sb)->fs_id; + root_object.khandle = ORANGEFS_SB(sb)->root_khandle; + root_object.fs_id = ORANGEFS_SB(sb)->fs_id; gossip_debug(GOSSIP_SUPER_DEBUG, "get inode %pU, fsid %d\n", &root_object.khandle, root_object.fs_id); - root = pvfs2_iget(sb, &root_object); + root = orangefs_iget(sb, &root_object); if (IS_ERR(root)) return PTR_ERR(root); @@ -404,23 +404,23 @@ static int pvfs2_fill_sb(struct super_block *sb, if (!root_dentry) return -ENOMEM; - sb->s_export_op = &pvfs2_export_ops; + sb->s_export_op = &orangefs_export_ops; sb->s_root = root_dentry; return 0; } -struct dentry *pvfs2_mount(struct file_system_type *fst, +struct dentry *orangefs_mount(struct file_system_type *fst, int flags, const char *devname, void *data) { int ret = -EINVAL; struct super_block *sb = ERR_PTR(-EINVAL); - struct pvfs2_kernel_op_s *new_op; + struct orangefs_kernel_op_s *new_op; struct dentry *d = ERR_PTR(-EINVAL); gossip_debug(GOSSIP_SUPER_DEBUG, - "pvfs2_mount: called with devname %s\n", + "orangefs_mount: called with devname %s\n", devname); if (!devname) { @@ -428,25 +428,25 @@ struct dentry *pvfs2_mount(struct file_system_type *fst, return ERR_PTR(-EINVAL); } - new_op = op_alloc(PVFS2_VFS_OP_FS_MOUNT); + new_op = op_alloc(ORANGEFS_VFS_OP_FS_MOUNT); if (!new_op) return ERR_PTR(-ENOMEM); - strncpy(new_op->upcall.req.fs_mount.pvfs2_config_server, + strncpy(new_op->upcall.req.fs_mount.orangefs_config_server, devname, - PVFS_MAX_SERVER_ADDR_LEN); + ORANGEFS_MAX_SERVER_ADDR_LEN); gossip_debug(GOSSIP_SUPER_DEBUG, - "Attempting PVFS2 Mount via host %s\n", - new_op->upcall.req.fs_mount.pvfs2_config_server); + "Attempting ORANGEFS Mount via host %s\n", + new_op->upcall.req.fs_mount.orangefs_config_server); - ret = service_operation(new_op, "pvfs2_mount", 0); + ret = service_operation(new_op, "orangefs_mount", 0); gossip_debug(GOSSIP_SUPER_DEBUG, - "pvfs2_mount: mount got return value of %d\n", ret); + "orangefs_mount: mount got return value of %d\n", ret); if (ret) goto free_op; - if (new_op->downcall.resp.fs_mount.fs_id == PVFS_FS_ID_NULL) { + if (new_op->downcall.resp.fs_mount.fs_id == ORANGEFS_FS_ID_NULL) { gossip_err("ERROR: Retrieved null fs_id\n"); ret = -EINVAL; goto free_op; @@ -459,7 +459,7 @@ struct dentry *pvfs2_mount(struct file_system_type *fst, goto free_op; } - ret = pvfs2_fill_sb(sb, + ret = orangefs_fill_sb(sb, &new_op->downcall.resp.fs_mount, data, flags & MS_SILENT ? 1 : 0); @@ -472,25 +472,25 @@ struct dentry *pvfs2_mount(struct file_system_type *fst, * on successful mount, store the devname and data * used */ - strncpy(PVFS2_SB(sb)->devname, + strncpy(ORANGEFS_SB(sb)->devname, devname, - PVFS_MAX_SERVER_ADDR_LEN); + ORANGEFS_MAX_SERVER_ADDR_LEN); /* mount_pending must be cleared */ - PVFS2_SB(sb)->mount_pending = 0; + ORANGEFS_SB(sb)->mount_pending = 0; /* - * finally, add this sb to our list of known pvfs2 + * finally, add this sb to our list of known orangefs * sb's */ - add_pvfs2_sb(sb); + add_orangefs_sb(sb); op_release(new_op); return dget(sb->s_root); free_op: - gossip_err("pvfs2_mount: mount request failed with %d\n", ret); + gossip_err("orangefs_mount: mount request failed with %d\n", ret); if (ret == -EINVAL) { - gossip_err("Ensure that all pvfs2-servers have the same FS configuration files\n"); + gossip_err("Ensure that all orangefs-servers have the same FS configuration files\n"); gossip_err("Look at pvfs2-client-core log file (typically /tmp/pvfs2-client.log) for more details\n"); } @@ -499,43 +499,43 @@ free_op: return d; } -void pvfs2_kill_sb(struct super_block *sb) +void orangefs_kill_sb(struct super_block *sb) { - gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_kill_sb: called\n"); + gossip_debug(GOSSIP_SUPER_DEBUG, "orangefs_kill_sb: called\n"); /* * issue the unmount to userspace to tell it to remove the * dynamic mount info it has for this superblock */ - pvfs2_unmount_sb(sb); + orangefs_unmount_sb(sb); - /* remove the sb from our list of pvfs2 specific sb's */ - remove_pvfs2_sb(sb); + /* remove the sb from our list of orangefs specific sb's */ + remove_orangefs_sb(sb); /* provided sb cleanup */ kill_anon_super(sb); - /* free the pvfs2 superblock private data */ - kfree(PVFS2_SB(sb)); + /* free the orangefs superblock private data */ + kfree(ORANGEFS_SB(sb)); } -int pvfs2_inode_cache_initialize(void) +int orangefs_inode_cache_initialize(void) { - pvfs2_inode_cache = kmem_cache_create("pvfs2_inode_cache", - sizeof(struct pvfs2_inode_s), + orangefs_inode_cache = kmem_cache_create("orangefs_inode_cache", + sizeof(struct orangefs_inode_s), 0, - PVFS2_CACHE_CREATE_FLAGS, - pvfs2_inode_cache_ctor); + ORANGEFS_CACHE_CREATE_FLAGS, + orangefs_inode_cache_ctor); - if (!pvfs2_inode_cache) { - gossip_err("Cannot create pvfs2_inode_cache\n"); + if (!orangefs_inode_cache) { + gossip_err("Cannot create orangefs_inode_cache\n"); return -ENOMEM; } return 0; } -int pvfs2_inode_cache_finalize(void) +int orangefs_inode_cache_finalize(void) { - kmem_cache_destroy(pvfs2_inode_cache); + kmem_cache_destroy(orangefs_inode_cache); return 0; } diff --git a/fs/orangefs/symlink.c b/fs/orangefs/symlink.c index 2adfceff7730..321f626b190b 100644 --- a/fs/orangefs/symlink.c +++ b/fs/orangefs/symlink.c @@ -8,9 +8,9 @@ #include "pvfs2-kernel.h" #include "pvfs2-bufmap.h" -static const char *pvfs2_follow_link(struct dentry *dentry, void **cookie) +static const char *orangefs_follow_link(struct dentry *dentry, void **cookie) { - char *target = PVFS2_I(dentry->d_inode)->link_target; + char *target = ORANGEFS_I(dentry->d_inode)->link_target; gossip_debug(GOSSIP_INODE_DEBUG, "%s: called on %s (target is %p)\n", @@ -21,11 +21,11 @@ static const char *pvfs2_follow_link(struct dentry *dentry, void **cookie) return target; } -struct inode_operations pvfs2_symlink_inode_operations = { +struct inode_operations orangefs_symlink_inode_operations = { .readlink = generic_readlink, - .follow_link = pvfs2_follow_link, - .setattr = pvfs2_setattr, - .getattr = pvfs2_getattr, - .listxattr = pvfs2_listxattr, + .follow_link = orangefs_follow_link, + .setattr = orangefs_setattr, + .getattr = orangefs_getattr, + .listxattr = orangefs_listxattr, .setxattr = generic_setxattr, }; diff --git a/fs/orangefs/upcall.h b/fs/orangefs/upcall.h index 0805778a8185..781cbc38523a 100644 --- a/fs/orangefs/upcall.h +++ b/fs/orangefs/upcall.h @@ -12,68 +12,68 @@ * 32-64 bit interaction issues between * client-core and device */ -struct pvfs2_io_request_s { +struct orangefs_io_request_s { __s32 async_vfs_io; __s32 buf_index; __s32 count; __s32 __pad1; __s64 offset; - struct pvfs2_object_kref refn; - enum PVFS_io_type io_type; + struct orangefs_object_kref refn; + enum ORANGEFS_io_type io_type; __s32 readahead_size; }; -struct pvfs2_lookup_request_s { +struct orangefs_lookup_request_s { __s32 sym_follow; __s32 __pad1; - struct pvfs2_object_kref parent_refn; - char d_name[PVFS2_NAME_LEN]; + struct orangefs_object_kref parent_refn; + char d_name[ORANGEFS_NAME_LEN]; }; -struct pvfs2_create_request_s { - struct pvfs2_object_kref parent_refn; - struct PVFS_sys_attr_s attributes; - char d_name[PVFS2_NAME_LEN]; +struct orangefs_create_request_s { + struct orangefs_object_kref parent_refn; + struct ORANGEFS_sys_attr_s attributes; + char d_name[ORANGEFS_NAME_LEN]; }; -struct pvfs2_symlink_request_s { - struct pvfs2_object_kref parent_refn; - struct PVFS_sys_attr_s attributes; - char entry_name[PVFS2_NAME_LEN]; - char target[PVFS2_NAME_LEN]; +struct orangefs_symlink_request_s { + struct orangefs_object_kref parent_refn; + struct ORANGEFS_sys_attr_s attributes; + char entry_name[ORANGEFS_NAME_LEN]; + char target[ORANGEFS_NAME_LEN]; }; -struct pvfs2_getattr_request_s { - struct pvfs2_object_kref refn; +struct orangefs_getattr_request_s { + struct orangefs_object_kref refn; __u32 mask; __u32 __pad1; }; -struct pvfs2_setattr_request_s { - struct pvfs2_object_kref refn; - struct PVFS_sys_attr_s attributes; +struct orangefs_setattr_request_s { + struct orangefs_object_kref refn; + struct ORANGEFS_sys_attr_s attributes; }; -struct pvfs2_remove_request_s { - struct pvfs2_object_kref parent_refn; - char d_name[PVFS2_NAME_LEN]; +struct orangefs_remove_request_s { + struct orangefs_object_kref parent_refn; + char d_name[ORANGEFS_NAME_LEN]; }; -struct pvfs2_mkdir_request_s { - struct pvfs2_object_kref parent_refn; - struct PVFS_sys_attr_s attributes; - char d_name[PVFS2_NAME_LEN]; +struct orangefs_mkdir_request_s { + struct orangefs_object_kref parent_refn; + struct ORANGEFS_sys_attr_s attributes; + char d_name[ORANGEFS_NAME_LEN]; }; -struct pvfs2_readdir_request_s { - struct pvfs2_object_kref refn; +struct orangefs_readdir_request_s { + struct orangefs_object_kref refn; __u64 token; __s32 max_dirent_count; __s32 buf_index; }; -struct pvfs2_readdirplus_request_s { - struct pvfs2_object_kref refn; +struct orangefs_readdirplus_request_s { + struct orangefs_object_kref refn; __u64 token; __s32 max_dirent_count; __u32 mask; @@ -81,130 +81,130 @@ struct pvfs2_readdirplus_request_s { __s32 __pad1; }; -struct pvfs2_rename_request_s { - struct pvfs2_object_kref old_parent_refn; - struct pvfs2_object_kref new_parent_refn; - char d_old_name[PVFS2_NAME_LEN]; - char d_new_name[PVFS2_NAME_LEN]; +struct orangefs_rename_request_s { + struct orangefs_object_kref old_parent_refn; + struct orangefs_object_kref new_parent_refn; + char d_old_name[ORANGEFS_NAME_LEN]; + char d_new_name[ORANGEFS_NAME_LEN]; }; -struct pvfs2_statfs_request_s { +struct orangefs_statfs_request_s { __s32 fs_id; __s32 __pad1; }; -struct pvfs2_truncate_request_s { - struct pvfs2_object_kref refn; +struct orangefs_truncate_request_s { + struct orangefs_object_kref refn; __s64 size; }; -struct pvfs2_mmap_ra_cache_flush_request_s { - struct pvfs2_object_kref refn; +struct orangefs_mmap_ra_cache_flush_request_s { + struct orangefs_object_kref refn; }; -struct pvfs2_fs_mount_request_s { - char pvfs2_config_server[PVFS_MAX_SERVER_ADDR_LEN]; +struct orangefs_fs_mount_request_s { + char orangefs_config_server[ORANGEFS_MAX_SERVER_ADDR_LEN]; }; -struct pvfs2_fs_umount_request_s { +struct orangefs_fs_umount_request_s { __s32 id; __s32 fs_id; - char pvfs2_config_server[PVFS_MAX_SERVER_ADDR_LEN]; + char orangefs_config_server[ORANGEFS_MAX_SERVER_ADDR_LEN]; }; -struct pvfs2_getxattr_request_s { - struct pvfs2_object_kref refn; +struct orangefs_getxattr_request_s { + struct orangefs_object_kref refn; __s32 key_sz; __s32 __pad1; - char key[PVFS_MAX_XATTR_NAMELEN]; + char key[ORANGEFS_MAX_XATTR_NAMELEN]; }; -struct pvfs2_setxattr_request_s { - struct pvfs2_object_kref refn; - struct PVFS_keyval_pair keyval; +struct orangefs_setxattr_request_s { + struct orangefs_object_kref refn; + struct ORANGEFS_keyval_pair keyval; __s32 flags; __s32 __pad1; }; -struct pvfs2_listxattr_request_s { - struct pvfs2_object_kref refn; +struct orangefs_listxattr_request_s { + struct orangefs_object_kref refn; __s32 requested_count; __s32 __pad1; __u64 token; }; -struct pvfs2_removexattr_request_s { - struct pvfs2_object_kref refn; +struct orangefs_removexattr_request_s { + struct orangefs_object_kref refn; __s32 key_sz; __s32 __pad1; - char key[PVFS_MAX_XATTR_NAMELEN]; + char key[ORANGEFS_MAX_XATTR_NAMELEN]; }; -struct pvfs2_op_cancel_s { +struct orangefs_op_cancel_s { __u64 op_tag; }; -struct pvfs2_fsync_request_s { - struct pvfs2_object_kref refn; +struct orangefs_fsync_request_s { + struct orangefs_object_kref refn; }; -enum pvfs2_param_request_type { - PVFS2_PARAM_REQUEST_SET = 1, - PVFS2_PARAM_REQUEST_GET = 2 +enum orangefs_param_request_type { + ORANGEFS_PARAM_REQUEST_SET = 1, + ORANGEFS_PARAM_REQUEST_GET = 2 }; -enum pvfs2_param_request_op { - PVFS2_PARAM_REQUEST_OP_ACACHE_TIMEOUT_MSECS = 1, - PVFS2_PARAM_REQUEST_OP_ACACHE_HARD_LIMIT = 2, - PVFS2_PARAM_REQUEST_OP_ACACHE_SOFT_LIMIT = 3, - PVFS2_PARAM_REQUEST_OP_ACACHE_RECLAIM_PERCENTAGE = 4, - PVFS2_PARAM_REQUEST_OP_PERF_TIME_INTERVAL_SECS = 5, - PVFS2_PARAM_REQUEST_OP_PERF_HISTORY_SIZE = 6, - PVFS2_PARAM_REQUEST_OP_PERF_RESET = 7, - PVFS2_PARAM_REQUEST_OP_NCACHE_TIMEOUT_MSECS = 8, - PVFS2_PARAM_REQUEST_OP_NCACHE_HARD_LIMIT = 9, - PVFS2_PARAM_REQUEST_OP_NCACHE_SOFT_LIMIT = 10, - PVFS2_PARAM_REQUEST_OP_NCACHE_RECLAIM_PERCENTAGE = 11, - PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_TIMEOUT_MSECS = 12, - PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_HARD_LIMIT = 13, - PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_SOFT_LIMIT = 14, - PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_RECLAIM_PERCENTAGE = 15, - PVFS2_PARAM_REQUEST_OP_CLIENT_DEBUG = 16, - PVFS2_PARAM_REQUEST_OP_CCACHE_TIMEOUT_SECS = 17, - PVFS2_PARAM_REQUEST_OP_CCACHE_HARD_LIMIT = 18, - PVFS2_PARAM_REQUEST_OP_CCACHE_SOFT_LIMIT = 19, - PVFS2_PARAM_REQUEST_OP_CCACHE_RECLAIM_PERCENTAGE = 20, - PVFS2_PARAM_REQUEST_OP_CAPCACHE_TIMEOUT_SECS = 21, - PVFS2_PARAM_REQUEST_OP_CAPCACHE_HARD_LIMIT = 22, - PVFS2_PARAM_REQUEST_OP_CAPCACHE_SOFT_LIMIT = 23, - PVFS2_PARAM_REQUEST_OP_CAPCACHE_RECLAIM_PERCENTAGE = 24, - PVFS2_PARAM_REQUEST_OP_TWO_MASK_VALUES = 25, +enum orangefs_param_request_op { + ORANGEFS_PARAM_REQUEST_OP_ACACHE_TIMEOUT_MSECS = 1, + ORANGEFS_PARAM_REQUEST_OP_ACACHE_HARD_LIMIT = 2, + ORANGEFS_PARAM_REQUEST_OP_ACACHE_SOFT_LIMIT = 3, + ORANGEFS_PARAM_REQUEST_OP_ACACHE_RECLAIM_PERCENTAGE = 4, + ORANGEFS_PARAM_REQUEST_OP_PERF_TIME_INTERVAL_SECS = 5, + ORANGEFS_PARAM_REQUEST_OP_PERF_HISTORY_SIZE = 6, + ORANGEFS_PARAM_REQUEST_OP_PERF_RESET = 7, + ORANGEFS_PARAM_REQUEST_OP_NCACHE_TIMEOUT_MSECS = 8, + ORANGEFS_PARAM_REQUEST_OP_NCACHE_HARD_LIMIT = 9, + ORANGEFS_PARAM_REQUEST_OP_NCACHE_SOFT_LIMIT = 10, + ORANGEFS_PARAM_REQUEST_OP_NCACHE_RECLAIM_PERCENTAGE = 11, + ORANGEFS_PARAM_REQUEST_OP_STATIC_ACACHE_TIMEOUT_MSECS = 12, + ORANGEFS_PARAM_REQUEST_OP_STATIC_ACACHE_HARD_LIMIT = 13, + ORANGEFS_PARAM_REQUEST_OP_STATIC_ACACHE_SOFT_LIMIT = 14, + ORANGEFS_PARAM_REQUEST_OP_STATIC_ACACHE_RECLAIM_PERCENTAGE = 15, + ORANGEFS_PARAM_REQUEST_OP_CLIENT_DEBUG = 16, + ORANGEFS_PARAM_REQUEST_OP_CCACHE_TIMEOUT_SECS = 17, + ORANGEFS_PARAM_REQUEST_OP_CCACHE_HARD_LIMIT = 18, + ORANGEFS_PARAM_REQUEST_OP_CCACHE_SOFT_LIMIT = 19, + ORANGEFS_PARAM_REQUEST_OP_CCACHE_RECLAIM_PERCENTAGE = 20, + ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_TIMEOUT_SECS = 21, + ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_HARD_LIMIT = 22, + ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_SOFT_LIMIT = 23, + ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_RECLAIM_PERCENTAGE = 24, + ORANGEFS_PARAM_REQUEST_OP_TWO_MASK_VALUES = 25, }; -struct pvfs2_param_request_s { - enum pvfs2_param_request_type type; - enum pvfs2_param_request_op op; +struct orangefs_param_request_s { + enum orangefs_param_request_type type; + enum orangefs_param_request_op op; __s64 value; - char s_value[PVFS2_MAX_DEBUG_STRING_LEN]; + char s_value[ORANGEFS_MAX_DEBUG_STRING_LEN]; }; -enum pvfs2_perf_count_request_type { - PVFS2_PERF_COUNT_REQUEST_ACACHE = 1, - PVFS2_PERF_COUNT_REQUEST_NCACHE = 2, - PVFS2_PERF_COUNT_REQUEST_CAPCACHE = 3, +enum orangefs_perf_count_request_type { + ORANGEFS_PERF_COUNT_REQUEST_ACACHE = 1, + ORANGEFS_PERF_COUNT_REQUEST_NCACHE = 2, + ORANGEFS_PERF_COUNT_REQUEST_CAPCACHE = 3, }; -struct pvfs2_perf_count_request_s { - enum pvfs2_perf_count_request_type type; +struct orangefs_perf_count_request_s { + enum orangefs_perf_count_request_type type; __s32 __pad1; }; -struct pvfs2_fs_key_request_s { +struct orangefs_fs_key_request_s { __s32 fsid; __s32 __pad1; }; -struct pvfs2_upcall_s { +struct orangefs_upcall_s { __s32 type; __u32 uid; __u32 gid; @@ -215,31 +215,31 @@ struct pvfs2_upcall_s { char *trailer_buf; union { - struct pvfs2_io_request_s io; - struct pvfs2_lookup_request_s lookup; - struct pvfs2_create_request_s create; - struct pvfs2_symlink_request_s sym; - struct pvfs2_getattr_request_s getattr; - struct pvfs2_setattr_request_s setattr; - struct pvfs2_remove_request_s remove; - struct pvfs2_mkdir_request_s mkdir; - struct pvfs2_readdir_request_s readdir; - struct pvfs2_readdirplus_request_s readdirplus; - struct pvfs2_rename_request_s rename; - struct pvfs2_statfs_request_s statfs; - struct pvfs2_truncate_request_s truncate; - struct pvfs2_mmap_ra_cache_flush_request_s ra_cache_flush; - struct pvfs2_fs_mount_request_s fs_mount; - struct pvfs2_fs_umount_request_s fs_umount; - struct pvfs2_getxattr_request_s getxattr; - struct pvfs2_setxattr_request_s setxattr; - struct pvfs2_listxattr_request_s listxattr; - struct pvfs2_removexattr_request_s removexattr; - struct pvfs2_op_cancel_s cancel; - struct pvfs2_fsync_request_s fsync; - struct pvfs2_param_request_s param; - struct pvfs2_perf_count_request_s perf_count; - struct pvfs2_fs_key_request_s fs_key; + struct orangefs_io_request_s io; + struct orangefs_lookup_request_s lookup; + struct orangefs_create_request_s create; + struct orangefs_symlink_request_s sym; + struct orangefs_getattr_request_s getattr; + struct orangefs_setattr_request_s setattr; + struct orangefs_remove_request_s remove; + struct orangefs_mkdir_request_s mkdir; + struct orangefs_readdir_request_s readdir; + struct orangefs_readdirplus_request_s readdirplus; + struct orangefs_rename_request_s rename; + struct orangefs_statfs_request_s statfs; + struct orangefs_truncate_request_s truncate; + struct orangefs_mmap_ra_cache_flush_request_s ra_cache_flush; + struct orangefs_fs_mount_request_s fs_mount; + struct orangefs_fs_umount_request_s fs_umount; + struct orangefs_getxattr_request_s getxattr; + struct orangefs_setxattr_request_s setxattr; + struct orangefs_listxattr_request_s listxattr; + struct orangefs_removexattr_request_s removexattr; + struct orangefs_op_cancel_s cancel; + struct orangefs_fsync_request_s fsync; + struct orangefs_param_request_s param; + struct orangefs_perf_count_request_s perf_count; + struct orangefs_fs_key_request_s fs_key; } req; }; diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index d7b0eba043ab..cfc8dc59c4eb 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -25,10 +25,10 @@ */ void purge_waiting_ops(void) { - struct pvfs2_kernel_op_s *op; + struct orangefs_kernel_op_s *op; - spin_lock(&pvfs2_request_list_lock); - list_for_each_entry(op, &pvfs2_request_list, list) { + spin_lock(&orangefs_request_list_lock); + list_for_each_entry(op, &orangefs_request_list, list) { gossip_debug(GOSSIP_WAIT_DEBUG, "pvfs2-client-core: purging op tag %llu %s\n", llu(op->tag), @@ -38,11 +38,11 @@ void purge_waiting_ops(void) spin_unlock(&op->lock); wake_up_interruptible(&op->waitq); } - spin_unlock(&pvfs2_request_list_lock); + spin_unlock(&orangefs_request_list_lock); } /* - * submits a PVFS2 operation and waits for it to complete + * submits a ORANGEFS operation and waits for it to complete * * Note op->downcall.status will contain the status of the operation (in * errno format), whether provided by pvfs2-client or a result of failure to @@ -51,7 +51,7 @@ void purge_waiting_ops(void) * * Returns contents of op->downcall.status for convenience */ -int service_operation(struct pvfs2_kernel_op_s *op, +int service_operation(struct orangefs_kernel_op_s *op, const char *op_name, int flags) { @@ -70,30 +70,30 @@ int service_operation(struct pvfs2_kernel_op_s *op, retry_servicing: op->downcall.status = 0; gossip_debug(GOSSIP_WAIT_DEBUG, - "pvfs2: service_operation: %s %p\n", + "orangefs: service_operation: %s %p\n", op_name, op); gossip_debug(GOSSIP_WAIT_DEBUG, - "pvfs2: operation posted by process: %s, pid: %i\n", + "orangefs: operation posted by process: %s, pid: %i\n", current->comm, current->pid); /* mask out signals if this operation is not to be interrupted */ - if (!(flags & PVFS2_OP_INTERRUPTIBLE)) + if (!(flags & ORANGEFS_OP_INTERRUPTIBLE)) block_signals(&orig_sigset); - if (!(flags & PVFS2_OP_NO_SEMAPHORE)) { + if (!(flags & ORANGEFS_OP_NO_SEMAPHORE)) { ret = mutex_lock_interruptible(&request_mutex); /* * check to see if we were interrupted while waiting for * semaphore */ if (ret < 0) { - if (!(flags & PVFS2_OP_INTERRUPTIBLE)) + if (!(flags & ORANGEFS_OP_INTERRUPTIBLE)) set_signals(&orig_sigset); op->downcall.status = ret; gossip_debug(GOSSIP_WAIT_DEBUG, - "pvfs2: service_operation interrupted.\n"); + "orangefs: service_operation interrupted.\n"); return ret; } } @@ -116,7 +116,7 @@ retry_servicing: } /* queue up the operation */ - if (flags & PVFS2_OP_PRIORITY) { + if (flags & ORANGEFS_OP_PRIORITY) { add_priority_op_to_request_list(op); } else { gossip_debug(GOSSIP_WAIT_DEBUG, @@ -125,17 +125,17 @@ retry_servicing: add_op_to_request_list(op); } - if (!(flags & PVFS2_OP_NO_SEMAPHORE)) + if (!(flags & ORANGEFS_OP_NO_SEMAPHORE)) mutex_unlock(&request_mutex); /* * If we are asked to service an asynchronous operation from * VFS perspective, we are done. */ - if (flags & PVFS2_OP_ASYNC) + if (flags & ORANGEFS_OP_ASYNC) return 0; - if (flags & PVFS2_OP_CANCELLATION) { + if (flags & ORANGEFS_OP_CANCELLATION) { gossip_debug(GOSSIP_WAIT_DEBUG, "%s:" "About to call wait_for_cancellation_downcall.\n", @@ -148,25 +148,25 @@ retry_servicing: if (ret < 0) { /* failed to get matching downcall */ if (ret == -ETIMEDOUT) { - gossip_err("pvfs2: %s -- wait timed out; aborting attempt.\n", + gossip_err("orangefs: %s -- wait timed out; aborting attempt.\n", op_name); } op->downcall.status = ret; } else { /* got matching downcall; make sure status is in errno format */ op->downcall.status = - pvfs2_normalize_to_errno(op->downcall.status); + orangefs_normalize_to_errno(op->downcall.status); ret = op->downcall.status; } - if (!(flags & PVFS2_OP_INTERRUPTIBLE)) + if (!(flags & ORANGEFS_OP_INTERRUPTIBLE)) set_signals(&orig_sigset); BUG_ON(ret != op->downcall.status); /* retry if operation has not been serviced and if requested */ if (!op_state_serviced(op) && op->downcall.status == -EAGAIN) { gossip_debug(GOSSIP_WAIT_DEBUG, - "pvfs2: tag %llu (%s)" + "orangefs: tag %llu (%s)" " -- operation to be retried (%d attempt)\n", llu(op->tag), op_name, @@ -204,17 +204,17 @@ retry_servicing: * memory system can be initialized. */ spin_lock_irqsave(&op->lock, irqflags); - add_wait_queue(&pvfs2_bufmap_init_waitq, &wait_entry); + add_wait_queue(&orangefs_bufmap_init_waitq, &wait_entry); spin_unlock_irqrestore(&op->lock, irqflags); set_current_state(TASK_INTERRUPTIBLE); /* - * Wait for pvfs_bufmap_initialize() to wake me up + * Wait for orangefs_bufmap_initialize() to wake me up * within the allotted time. */ ret = schedule_timeout(MSECS_TO_JIFFIES - (1000 * PVFS2_BUFMAP_WAIT_TIMEOUT_SECS)); + (1000 * ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS)); gossip_debug(GOSSIP_WAIT_DEBUG, "Value returned from schedule_timeout:" @@ -225,14 +225,14 @@ retry_servicing: get_bufmap_init()); spin_lock_irqsave(&op->lock, irqflags); - remove_wait_queue(&pvfs2_bufmap_init_waitq, + remove_wait_queue(&orangefs_bufmap_init_waitq, &wait_entry); spin_unlock_irqrestore(&op->lock, irqflags); if (get_bufmap_init() == 0) { gossip_err("%s:The shared memory system has not started in %d seconds after the client core restarted. Aborting user's request(%s).\n", __func__, - PVFS2_BUFMAP_WAIT_TIMEOUT_SECS, + ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS, get_opname_string(op)); return -EIO; } @@ -246,14 +246,14 @@ retry_servicing: } gossip_debug(GOSSIP_WAIT_DEBUG, - "pvfs2: service_operation %s returning: %d for %p.\n", + "orangefs: service_operation %s returning: %d for %p.\n", op_name, ret, op); return ret; } -void pvfs2_clean_up_interrupted_operation(struct pvfs2_kernel_op_s *op) +void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *op) { /* * handle interrupted cases depending on what state we were in when @@ -339,7 +339,7 @@ void pvfs2_clean_up_interrupted_operation(struct pvfs2_kernel_op_s *op) * operation since client-core seems to be exiting too often * or if we were interrupted. */ -int wait_for_matching_downcall(struct pvfs2_kernel_op_s *op) +int wait_for_matching_downcall(struct orangefs_kernel_op_s *op) { int ret = -EINVAL; DECLARE_WAITQUEUE(wait_entry, current); @@ -386,7 +386,7 @@ int wait_for_matching_downcall(struct pvfs2_kernel_op_s *op) op, op->attempts); ret = -ETIMEDOUT; - pvfs2_clean_up_interrupted_operation + orangefs_clean_up_interrupted_operation (op); break; } @@ -403,7 +403,7 @@ int wait_for_matching_downcall(struct pvfs2_kernel_op_s *op) * core starts, and so on... */ if (op_state_purged(op)) { - ret = (op->attempts < PVFS2_PURGE_RETRY_COUNT) ? + ret = (op->attempts < ORANGEFS_PURGE_RETRY_COUNT) ? -EAGAIN : -EIO; spin_unlock(&op->lock); @@ -415,7 +415,7 @@ int wait_for_matching_downcall(struct pvfs2_kernel_op_s *op) llu(op->tag), op, op->attempts); - pvfs2_clean_up_interrupted_operation(op); + orangefs_clean_up_interrupted_operation(op); break; } spin_unlock(&op->lock); @@ -429,7 +429,7 @@ int wait_for_matching_downcall(struct pvfs2_kernel_op_s *op) __func__, llu(op->tag), op); - pvfs2_clean_up_interrupted_operation(op); + orangefs_clean_up_interrupted_operation(op); ret = -EINTR; break; } @@ -452,7 +452,7 @@ int wait_for_matching_downcall(struct pvfs2_kernel_op_s *op) * cancellation upcall anyway. the only way to exit this is to either * timeout or have the cancellation be serviced properly. */ -int wait_for_cancellation_downcall(struct pvfs2_kernel_op_s *op) +int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *op) { int ret = -EINVAL; DECLARE_WAITQUEUE(wait_entry, current); @@ -482,7 +482,7 @@ int wait_for_cancellation_downcall(struct pvfs2_kernel_op_s *op) __func__, llu(op->tag), op); - pvfs2_clean_up_interrupted_operation(op); + orangefs_clean_up_interrupted_operation(op); ret = -EINTR; break; } @@ -502,7 +502,7 @@ int wait_for_cancellation_downcall(struct pvfs2_kernel_op_s *op) "%s:*** operation timed out: %p\n", __func__, op); - pvfs2_clean_up_interrupted_operation(op); + orangefs_clean_up_interrupted_operation(op); ret = -ETIMEDOUT; break; } diff --git a/fs/orangefs/xattr.c b/fs/orangefs/xattr.c index b683daab7425..aeb3c3083591 100644 --- a/fs/orangefs/xattr.c +++ b/fs/orangefs/xattr.c @@ -15,8 +15,8 @@ #include -#define SYSTEM_PVFS2_KEY "system.pvfs2." -#define SYSTEM_PVFS2_KEY_LEN 13 +#define SYSTEM_ORANGEFS_KEY "system.pvfs2." +#define SYSTEM_ORANGEFS_KEY_LEN 13 /* * this function returns @@ -24,15 +24,15 @@ * of a listxattr. * 1 if the key corresponding to name is meant to be returned as part of * a listxattr. - * The ones that start SYSTEM_PVFS2_KEY are the ones to avoid printing. + * The ones that start SYSTEM_ORANGEFS_KEY are the ones to avoid printing. */ static int is_reserved_key(const char *key, size_t size) { - if (size < SYSTEM_PVFS2_KEY_LEN) + if (size < SYSTEM_ORANGEFS_KEY_LEN) return 1; - return strncmp(key, SYSTEM_PVFS2_KEY, SYSTEM_PVFS2_KEY_LEN) ? 1 : 0; + return strncmp(key, SYSTEM_ORANGEFS_KEY, SYSTEM_ORANGEFS_KEY_LEN) ? 1 : 0; } static inline int convert_to_internal_xattr_flags(int setxattr_flags) @@ -41,10 +41,10 @@ static inline int convert_to_internal_xattr_flags(int setxattr_flags) if (setxattr_flags & XATTR_REPLACE) { /* Attribute must exist! */ - internal_flag = PVFS_XATTR_REPLACE; + internal_flag = ORANGEFS_XATTR_REPLACE; } else if (setxattr_flags & XATTR_CREATE) { /* Attribute must not exist */ - internal_flag = PVFS_XATTR_CREATE; + internal_flag = ORANGEFS_XATTR_CREATE; } return internal_flag; } @@ -59,11 +59,11 @@ static inline int convert_to_internal_xattr_flags(int setxattr_flags) * unless the key does not exist for the file and/or if * there were errors in fetching the attribute value. */ -ssize_t pvfs2_inode_getxattr(struct inode *inode, const char *prefix, +ssize_t orangefs_inode_getxattr(struct inode *inode, const char *prefix, const char *name, void *buffer, size_t size) { - struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); - struct pvfs2_kernel_op_s *new_op = NULL; + struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); + struct orangefs_kernel_op_s *new_op = NULL; ssize_t ret = -ENOMEM; ssize_t length = 0; int fsuid; @@ -74,10 +74,10 @@ ssize_t pvfs2_inode_getxattr(struct inode *inode, const char *prefix, __func__, prefix, name, size); if (name == NULL || (size > 0 && buffer == NULL)) { - gossip_err("pvfs2_inode_getxattr: bogus NULL pointers\n"); + gossip_err("orangefs_inode_getxattr: bogus NULL pointers\n"); return -EINVAL; } - if ((strlen(name) + strlen(prefix)) >= PVFS_MAX_XATTR_NAMELEN) { + if ((strlen(name) + strlen(prefix)) >= ORANGEFS_MAX_XATTR_NAMELEN) { gossip_err("Invalid key length (%d)\n", (int)(strlen(name) + strlen(prefix))); return -EINVAL; @@ -94,15 +94,15 @@ ssize_t pvfs2_inode_getxattr(struct inode *inode, const char *prefix, fsuid, fsgid); - down_read(&pvfs2_inode->xattr_sem); + down_read(&orangefs_inode->xattr_sem); - new_op = op_alloc(PVFS2_VFS_OP_GETXATTR); + new_op = op_alloc(ORANGEFS_VFS_OP_GETXATTR); if (!new_op) goto out_unlock; - new_op->upcall.req.getxattr.refn = pvfs2_inode->refn; + new_op->upcall.req.getxattr.refn = orangefs_inode->refn; ret = snprintf((char *)new_op->upcall.req.getxattr.key, - PVFS_MAX_XATTR_NAMELEN, "%s%s", prefix, name); + ORANGEFS_MAX_XATTR_NAMELEN, "%s%s", prefix, name); /* * NOTE: Although keys are meant to be NULL terminated textual @@ -111,13 +111,13 @@ ssize_t pvfs2_inode_getxattr(struct inode *inode, const char *prefix, */ new_op->upcall.req.getxattr.key_sz = ret + 1; - ret = service_operation(new_op, "pvfs2_inode_getxattr", + ret = service_operation(new_op, "orangefs_inode_getxattr", get_interruptible_flag(inode)); if (ret != 0) { if (ret == -ENOENT) { ret = -ENODATA; gossip_debug(GOSSIP_XATTR_DEBUG, - "pvfs2_inode_getxattr: inode %pU key %s" + "orangefs_inode_getxattr: inode %pU key %s" " does not exist!\n", get_khandle_from_ino(inode), (char *)new_op->upcall.req.getxattr.key); @@ -149,7 +149,7 @@ ssize_t pvfs2_inode_getxattr(struct inode *inode, const char *prefix, memset(buffer, 0, size); memcpy(buffer, new_op->downcall.resp.getxattr.val, length); gossip_debug(GOSSIP_XATTR_DEBUG, - "pvfs2_inode_getxattr: inode %pU " + "orangefs_inode_getxattr: inode %pU " "key %s key_sz %d, val_len %d\n", get_khandle_from_ino(inode), (char *)new_op-> @@ -163,44 +163,44 @@ ssize_t pvfs2_inode_getxattr(struct inode *inode, const char *prefix, out_release_op: op_release(new_op); out_unlock: - up_read(&pvfs2_inode->xattr_sem); + up_read(&orangefs_inode->xattr_sem); return ret; } -static int pvfs2_inode_removexattr(struct inode *inode, +static int orangefs_inode_removexattr(struct inode *inode, const char *prefix, const char *name, int flags) { - struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); - struct pvfs2_kernel_op_s *new_op = NULL; + struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); + struct orangefs_kernel_op_s *new_op = NULL; int ret = -ENOMEM; - down_write(&pvfs2_inode->xattr_sem); - new_op = op_alloc(PVFS2_VFS_OP_REMOVEXATTR); + down_write(&orangefs_inode->xattr_sem); + new_op = op_alloc(ORANGEFS_VFS_OP_REMOVEXATTR); if (!new_op) goto out_unlock; - new_op->upcall.req.removexattr.refn = pvfs2_inode->refn; + new_op->upcall.req.removexattr.refn = orangefs_inode->refn; /* * NOTE: Although keys are meant to be NULL terminated * textual strings, I am going to explicitly pass the * length just in case we change this later on... */ ret = snprintf((char *)new_op->upcall.req.removexattr.key, - PVFS_MAX_XATTR_NAMELEN, + ORANGEFS_MAX_XATTR_NAMELEN, "%s%s", (prefix ? prefix : ""), name); new_op->upcall.req.removexattr.key_sz = ret + 1; gossip_debug(GOSSIP_XATTR_DEBUG, - "pvfs2_inode_removexattr: key %s, key_sz %d\n", + "orangefs_inode_removexattr: key %s, key_sz %d\n", (char *)new_op->upcall.req.removexattr.key, (int)new_op->upcall.req.removexattr.key_sz); ret = service_operation(new_op, - "pvfs2_inode_removexattr", + "orangefs_inode_removexattr", get_interruptible_flag(inode)); if (ret == -ENOENT) { /* @@ -213,11 +213,11 @@ static int pvfs2_inode_removexattr(struct inode *inode, } gossip_debug(GOSSIP_XATTR_DEBUG, - "pvfs2_inode_removexattr: returning %d\n", ret); + "orangefs_inode_removexattr: returning %d\n", ret); op_release(new_op); out_unlock: - up_write(&pvfs2_inode->xattr_sem); + up_write(&orangefs_inode->xattr_sem); return ret; } @@ -227,11 +227,11 @@ out_unlock: * Returns a -ve number on error and 0 on success. Key is text, but value * can be binary! */ -int pvfs2_inode_setxattr(struct inode *inode, const char *prefix, +int orangefs_inode_setxattr(struct inode *inode, const char *prefix, const char *name, const void *value, size_t size, int flags) { - struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); - struct pvfs2_kernel_op_s *new_op; + struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); + struct orangefs_kernel_op_s *new_op; int internal_flag = 0; int ret = -ENOMEM; @@ -240,9 +240,9 @@ int pvfs2_inode_setxattr(struct inode *inode, const char *prefix, __func__, prefix, name, size); if (size < 0 || - size >= PVFS_MAX_XATTR_VALUELEN || + size >= ORANGEFS_MAX_XATTR_VALUELEN || flags < 0) { - gossip_err("pvfs2_inode_setxattr: bogus values of size(%d), flags(%d)\n", + gossip_err("orangefs_inode_setxattr: bogus values of size(%d), flags(%d)\n", (int)size, flags); return -EINVAL; @@ -250,23 +250,23 @@ int pvfs2_inode_setxattr(struct inode *inode, const char *prefix, if (name == NULL || (size > 0 && value == NULL)) { - gossip_err("pvfs2_inode_setxattr: bogus NULL pointers!\n"); + gossip_err("orangefs_inode_setxattr: bogus NULL pointers!\n"); return -EINVAL; } internal_flag = convert_to_internal_xattr_flags(flags); if (prefix) { - if (strlen(name) + strlen(prefix) >= PVFS_MAX_XATTR_NAMELEN) { + if (strlen(name) + strlen(prefix) >= ORANGEFS_MAX_XATTR_NAMELEN) { gossip_err - ("pvfs2_inode_setxattr: bogus key size (%d)\n", + ("orangefs_inode_setxattr: bogus key size (%d)\n", (int)(strlen(name) + strlen(prefix))); return -EINVAL; } } else { - if (strlen(name) >= PVFS_MAX_XATTR_NAMELEN) { + if (strlen(name) >= ORANGEFS_MAX_XATTR_NAMELEN) { gossip_err - ("pvfs2_inode_setxattr: bogus key size (%d)\n", + ("orangefs_inode_setxattr: bogus key size (%d)\n", (int)(strlen(name))); return -EINVAL; } @@ -278,7 +278,7 @@ int pvfs2_inode_setxattr(struct inode *inode, const char *prefix, "removing xattr (%s%s)\n", prefix, name); - return pvfs2_inode_removexattr(inode, prefix, name, flags); + return orangefs_inode_removexattr(inode, prefix, name, flags); } gossip_debug(GOSSIP_XATTR_DEBUG, @@ -286,13 +286,13 @@ int pvfs2_inode_setxattr(struct inode *inode, const char *prefix, get_khandle_from_ino(inode), name); - down_write(&pvfs2_inode->xattr_sem); - new_op = op_alloc(PVFS2_VFS_OP_SETXATTR); + down_write(&orangefs_inode->xattr_sem); + new_op = op_alloc(ORANGEFS_VFS_OP_SETXATTR); if (!new_op) goto out_unlock; - new_op->upcall.req.setxattr.refn = pvfs2_inode->refn; + new_op->upcall.req.setxattr.refn = orangefs_inode->refn; new_op->upcall.req.setxattr.flags = internal_flag; /* * NOTE: Although keys are meant to be NULL terminated textual @@ -300,7 +300,7 @@ int pvfs2_inode_setxattr(struct inode *inode, const char *prefix, * case we change this later on... */ ret = snprintf((char *)new_op->upcall.req.setxattr.keyval.key, - PVFS_MAX_XATTR_NAMELEN, + ORANGEFS_MAX_XATTR_NAMELEN, "%s%s", prefix, name); new_op->upcall.req.setxattr.keyval.key_sz = ret + 1; @@ -308,24 +308,24 @@ int pvfs2_inode_setxattr(struct inode *inode, const char *prefix, new_op->upcall.req.setxattr.keyval.val_sz = size; gossip_debug(GOSSIP_XATTR_DEBUG, - "pvfs2_inode_setxattr: key %s, key_sz %d " + "orangefs_inode_setxattr: key %s, key_sz %d " " value size %zd\n", (char *)new_op->upcall.req.setxattr.keyval.key, (int)new_op->upcall.req.setxattr.keyval.key_sz, size); ret = service_operation(new_op, - "pvfs2_inode_setxattr", + "orangefs_inode_setxattr", get_interruptible_flag(inode)); gossip_debug(GOSSIP_XATTR_DEBUG, - "pvfs2_inode_setxattr: returning %d\n", + "orangefs_inode_setxattr: returning %d\n", ret); /* when request is serviced properly, free req op struct */ op_release(new_op); out_unlock: - up_write(&pvfs2_inode->xattr_sem); + up_write(&orangefs_inode->xattr_sem); return ret; } @@ -336,12 +336,12 @@ out_unlock: * subsequent memory allocations. Thus our return value is always the size of * all the keys unless there were errors in fetching the keys! */ -ssize_t pvfs2_listxattr(struct dentry *dentry, char *buffer, size_t size) +ssize_t orangefs_listxattr(struct dentry *dentry, char *buffer, size_t size) { struct inode *inode = dentry->d_inode; - struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); - struct pvfs2_kernel_op_s *new_op; - __u64 token = PVFS_ITERATE_START; + struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); + struct orangefs_kernel_op_s *new_op; + __u64 token = ORANGEFS_ITERATE_START; ssize_t ret = -ENOMEM; ssize_t total = 0; ssize_t length = 0; @@ -358,8 +358,8 @@ ssize_t pvfs2_listxattr(struct dentry *dentry, char *buffer, size_t size) return -EINVAL; } - down_read(&pvfs2_inode->xattr_sem); - new_op = op_alloc(PVFS2_VFS_OP_LISTXATTR); + down_read(&orangefs_inode->xattr_sem); + new_op = op_alloc(ORANGEFS_VFS_OP_LISTXATTR); if (!new_op) goto out_unlock; @@ -368,10 +368,10 @@ ssize_t pvfs2_listxattr(struct dentry *dentry, char *buffer, size_t size) try_again: key_size = 0; - new_op->upcall.req.listxattr.refn = pvfs2_inode->refn; + new_op->upcall.req.listxattr.refn = orangefs_inode->refn; new_op->upcall.req.listxattr.token = token; new_op->upcall.req.listxattr.requested_count = - (size == 0) ? 0 : PVFS_MAX_XATTR_LISTLEN; + (size == 0) ? 0 : ORANGEFS_MAX_XATTR_LISTLEN; ret = service_operation(new_op, __func__, get_interruptible_flag(inode)); if (ret != 0) @@ -384,7 +384,7 @@ try_again: * up allocating memory rather than us... */ total = new_op->downcall.resp.listxattr.returned_count * - PVFS_MAX_XATTR_NAMELEN; + ORANGEFS_MAX_XATTR_NAMELEN; goto done; } @@ -429,7 +429,7 @@ try_again: * fetching more keys! */ token = new_op->downcall.resp.listxattr.token; - if (token != PVFS_ITERATE_END) + if (token != ORANGEFS_ITERATE_END) goto try_again; done: @@ -443,88 +443,88 @@ done: if (ret == 0) ret = total; out_unlock: - up_read(&pvfs2_inode->xattr_sem); + up_read(&orangefs_inode->xattr_sem); return ret; } -static int pvfs2_xattr_set_default(const struct xattr_handler *handler, - struct dentry *dentry, - const char *name, - const void *buffer, - size_t size, - int flags) +static int orangefs_xattr_set_default(const struct xattr_handler *handler, + struct dentry *dentry, + const char *name, + const void *buffer, + size_t size, + int flags) { - return pvfs2_inode_setxattr(dentry->d_inode, - PVFS2_XATTR_NAME_DEFAULT_PREFIX, + return orangefs_inode_setxattr(dentry->d_inode, + ORANGEFS_XATTR_NAME_DEFAULT_PREFIX, name, buffer, size, flags); } -static int pvfs2_xattr_get_default(const struct xattr_handler *handler, - struct dentry *dentry, - const char *name, - void *buffer, - size_t size) +static int orangefs_xattr_get_default(const struct xattr_handler *handler, + struct dentry *dentry, + const char *name, + void *buffer, + size_t size) { - return pvfs2_inode_getxattr(dentry->d_inode, - PVFS2_XATTR_NAME_DEFAULT_PREFIX, + return orangefs_inode_getxattr(dentry->d_inode, + ORANGEFS_XATTR_NAME_DEFAULT_PREFIX, name, buffer, size); } -static int pvfs2_xattr_set_trusted(const struct xattr_handler *handler, - struct dentry *dentry, - const char *name, - const void *buffer, - size_t size, - int flags) +static int orangefs_xattr_set_trusted(const struct xattr_handler *handler, + struct dentry *dentry, + const char *name, + const void *buffer, + size_t size, + int flags) { - return pvfs2_inode_setxattr(dentry->d_inode, - PVFS2_XATTR_NAME_TRUSTED_PREFIX, + return orangefs_inode_setxattr(dentry->d_inode, + ORANGEFS_XATTR_NAME_TRUSTED_PREFIX, name, buffer, size, flags); } -static int pvfs2_xattr_get_trusted(const struct xattr_handler *handler, - struct dentry *dentry, - const char *name, - void *buffer, - size_t size) +static int orangefs_xattr_get_trusted(const struct xattr_handler *handler, + struct dentry *dentry, + const char *name, + void *buffer, + size_t size) { - return pvfs2_inode_getxattr(dentry->d_inode, - PVFS2_XATTR_NAME_TRUSTED_PREFIX, + return orangefs_inode_getxattr(dentry->d_inode, + ORANGEFS_XATTR_NAME_TRUSTED_PREFIX, name, buffer, size); } -static struct xattr_handler pvfs2_xattr_trusted_handler = { - .prefix = PVFS2_XATTR_NAME_TRUSTED_PREFIX, - .get = pvfs2_xattr_get_trusted, - .set = pvfs2_xattr_set_trusted, +static struct xattr_handler orangefs_xattr_trusted_handler = { + .prefix = ORANGEFS_XATTR_NAME_TRUSTED_PREFIX, + .get = orangefs_xattr_get_trusted, + .set = orangefs_xattr_set_trusted, }; -static struct xattr_handler pvfs2_xattr_default_handler = { +static struct xattr_handler orangefs_xattr_default_handler = { /* * NOTE: this is set to be the empty string. * so that all un-prefixed xattrs keys get caught * here! */ - .prefix = PVFS2_XATTR_NAME_DEFAULT_PREFIX, - .get = pvfs2_xattr_get_default, - .set = pvfs2_xattr_set_default, + .prefix = ORANGEFS_XATTR_NAME_DEFAULT_PREFIX, + .get = orangefs_xattr_get_default, + .set = orangefs_xattr_set_default, }; -const struct xattr_handler *pvfs2_xattr_handlers[] = { +const struct xattr_handler *orangefs_xattr_handlers[] = { &posix_acl_access_xattr_handler, &posix_acl_default_xattr_handler, - &pvfs2_xattr_trusted_handler, - &pvfs2_xattr_default_handler, + &orangefs_xattr_trusted_handler, + &orangefs_xattr_default_handler, NULL }; From 575e946125f70c41c2042f10172842c5cab9a09a Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Fri, 4 Dec 2015 12:56:14 -0500 Subject: [PATCH 051/174] Orangefs: change pvfs2 filenames to orangefs Also changed references within source files that referred to header files whose names had changed. Signed-off-by: Mike Marshall --- Makefile | 2 +- fs/orangefs/Makefile | 8 ++++---- fs/orangefs/acl.c | 4 ++-- fs/orangefs/dcache.c | 2 +- fs/orangefs/{devpvfs2-req.c => devorangefs-req.c} | 6 +++--- fs/orangefs/dir.c | 4 ++-- fs/orangefs/file.c | 6 +++--- fs/orangefs/inode.c | 8 ++++---- fs/orangefs/namei.c | 2 +- fs/orangefs/{pvfs2-bufmap.c => orangefs-bufmap.c} | 4 ++-- fs/orangefs/{pvfs2-bufmap.h => orangefs-bufmap.h} | 0 fs/orangefs/{pvfs2-cache.c => orangefs-cache.c} | 2 +- fs/orangefs/{pvfs2-debug.h => orangefs-debug.h} | 0 fs/orangefs/{pvfs2-debugfs.c => orangefs-debugfs.c} | 4 ++-- fs/orangefs/{pvfs2-debugfs.h => orangefs-debugfs.h} | 0 fs/orangefs/{pvfs2-dev-proto.h => orangefs-dev-proto.h} | 0 fs/orangefs/{pvfs2-kernel.h => orangefs-kernel.h} | 2 +- fs/orangefs/{pvfs2-mod.c => orangefs-mod.c} | 8 ++++---- fs/orangefs/{pvfs2-sysfs.c => orangefs-sysfs.c} | 4 ++-- fs/orangefs/{pvfs2-sysfs.h => orangefs-sysfs.h} | 0 fs/orangefs/{pvfs2-utils.c => orangefs-utils.c} | 6 +++--- fs/orangefs/protocol.h | 6 +++--- fs/orangefs/super.c | 4 ++-- fs/orangefs/symlink.c | 4 ++-- fs/orangefs/waitqueue.c | 4 ++-- fs/orangefs/xattr.c | 4 ++-- 26 files changed, 47 insertions(+), 47 deletions(-) rename fs/orangefs/{devpvfs2-req.c => devorangefs-req.c} (99%) rename fs/orangefs/{pvfs2-bufmap.c => orangefs-bufmap.c} (99%) rename fs/orangefs/{pvfs2-bufmap.h => orangefs-bufmap.h} (100%) rename fs/orangefs/{pvfs2-cache.c => orangefs-cache.c} (99%) rename fs/orangefs/{pvfs2-debug.h => orangefs-debug.h} (100%) rename fs/orangefs/{pvfs2-debugfs.c => orangefs-debugfs.c} (99%) rename fs/orangefs/{pvfs2-debugfs.h => orangefs-debugfs.h} (100%) rename fs/orangefs/{pvfs2-dev-proto.h => orangefs-dev-proto.h} (100%) rename fs/orangefs/{pvfs2-kernel.h => orangefs-kernel.h} (99%) rename fs/orangefs/{pvfs2-mod.c => orangefs-mod.c} (98%) rename fs/orangefs/{pvfs2-sysfs.c => orangefs-sysfs.c} (99%) rename fs/orangefs/{pvfs2-sysfs.h => orangefs-sysfs.h} (100%) rename fs/orangefs/{pvfs2-utils.c => orangefs-utils.c} (99%) diff --git a/Makefile b/Makefile index 3a0234f50f36..aca4a73ad069 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 4 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc1-o NAME = Blurry Fish Butt # *DOCUMENTATION* diff --git a/fs/orangefs/Makefile b/fs/orangefs/Makefile index 828b36a6916d..a9d6a968fe6d 100644 --- a/fs/orangefs/Makefile +++ b/fs/orangefs/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_ORANGEFS_FS) += orangefs.o -orangefs-objs := acl.o file.o pvfs2-cache.o pvfs2-utils.o xattr.o dcache.o \ - inode.o pvfs2-sysfs.o pvfs2-mod.o super.o devpvfs2-req.o \ - namei.o symlink.o dir.o pvfs2-bufmap.o \ - pvfs2-debugfs.o waitqueue.o +orangefs-objs := acl.o file.o orangefs-cache.o orangefs-utils.o xattr.o \ + dcache.o inode.o orangefs-sysfs.o orangefs-mod.o super.o \ + devorangefs-req.o namei.o symlink.o dir.o orangefs-bufmap.o \ + orangefs-debugfs.o waitqueue.o diff --git a/fs/orangefs/acl.c b/fs/orangefs/acl.c index 5e27d5fcb6bf..03f89dbb2512 100644 --- a/fs/orangefs/acl.c +++ b/fs/orangefs/acl.c @@ -5,8 +5,8 @@ */ #include "protocol.h" -#include "pvfs2-kernel.h" -#include "pvfs2-bufmap.h" +#include "orangefs-kernel.h" +#include "orangefs-bufmap.h" #include #include diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c index 12c916fa4c7f..5dd9841df64e 100644 --- a/fs/orangefs/dcache.c +++ b/fs/orangefs/dcache.c @@ -9,7 +9,7 @@ */ #include "protocol.h" -#include "pvfs2-kernel.h" +#include "orangefs-kernel.h" /* Returns 1 if dentry can still be trusted, else 0. */ static int orangefs_revalidate_lookup(struct dentry *dentry) diff --git a/fs/orangefs/devpvfs2-req.c b/fs/orangefs/devorangefs-req.c similarity index 99% rename from fs/orangefs/devpvfs2-req.c rename to fs/orangefs/devorangefs-req.c index e18149f0975b..e74938d575d6 100644 --- a/fs/orangefs/devpvfs2-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -8,9 +8,9 @@ */ #include "protocol.h" -#include "pvfs2-kernel.h" -#include "pvfs2-dev-proto.h" -#include "pvfs2-bufmap.h" +#include "orangefs-kernel.h" +#include "orangefs-dev-proto.h" +#include "orangefs-bufmap.h" #include #include diff --git a/fs/orangefs/dir.c b/fs/orangefs/dir.c index 452d589b9747..c043894fc2bd 100644 --- a/fs/orangefs/dir.c +++ b/fs/orangefs/dir.c @@ -5,8 +5,8 @@ */ #include "protocol.h" -#include "pvfs2-kernel.h" -#include "pvfs2-bufmap.h" +#include "orangefs-kernel.h" +#include "orangefs-bufmap.h" struct readdir_handle_s { int buffer_index; diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index ae5d8ed67ed5..171013ae0036 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -9,8 +9,8 @@ */ #include "protocol.h" -#include "pvfs2-kernel.h" -#include "pvfs2-bufmap.h" +#include "orangefs-kernel.h" +#include "orangefs-bufmap.h" #include #include @@ -186,7 +186,7 @@ populate_shared_memory: } if (ret < 0) { - handle_io_error(); /* defined in pvfs2-kernel.h */ + handle_io_error(); /* * don't write an error to syslog on signaled operation * termination unless we've got debugging turned on, as diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 58e83182d3dc..4724c92b61ac 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -9,8 +9,8 @@ */ #include "protocol.h" -#include "pvfs2-kernel.h" -#include "pvfs2-bufmap.h" +#include "orangefs-kernel.h" +#include "orangefs-bufmap.h" static int read_one_page(struct page *page) { @@ -125,7 +125,7 @@ static int orangefs_releasepage(struct page *page, gfp_t foo) * AIO. Modeled after NFS, they do this too. */ /* - * static ssize_t pvfs2_direct_IO(int rw, + * static ssize_t orangefs_direct_IO(int rw, * struct kiocb *iocb, * struct iov_iter *iter, * loff_t offset) @@ -150,7 +150,7 @@ const struct address_space_operations orangefs_address_operations = { .readpages = orangefs_readpages, .invalidatepage = orangefs_invalidatepage, .releasepage = orangefs_releasepage, -/* .direct_IO = pvfs2_direct_IO */ +/* .direct_IO = orangefs_direct_IO */ }; static int orangefs_setattr_size(struct inode *inode, struct iattr *iattr) diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index 333c87c8b0f5..63aa1e7fbdb6 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -9,7 +9,7 @@ */ #include "protocol.h" -#include "pvfs2-kernel.h" +#include "orangefs-kernel.h" /* * Get a newly allocated inode to go with a negative dentry. diff --git a/fs/orangefs/pvfs2-bufmap.c b/fs/orangefs/orangefs-bufmap.c similarity index 99% rename from fs/orangefs/pvfs2-bufmap.c rename to fs/orangefs/orangefs-bufmap.c index 345287e871b1..c5368d852ee2 100644 --- a/fs/orangefs/pvfs2-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -4,8 +4,8 @@ * See COPYING in top-level directory. */ #include "protocol.h" -#include "pvfs2-kernel.h" -#include "pvfs2-bufmap.h" +#include "orangefs-kernel.h" +#include "orangefs-bufmap.h" DECLARE_WAIT_QUEUE_HEAD(orangefs_bufmap_init_waitq); diff --git a/fs/orangefs/pvfs2-bufmap.h b/fs/orangefs/orangefs-bufmap.h similarity index 100% rename from fs/orangefs/pvfs2-bufmap.h rename to fs/orangefs/orangefs-bufmap.h diff --git a/fs/orangefs/pvfs2-cache.c b/fs/orangefs/orangefs-cache.c similarity index 99% rename from fs/orangefs/pvfs2-cache.c rename to fs/orangefs/orangefs-cache.c index a224831770f4..57e270246e3d 100644 --- a/fs/orangefs/pvfs2-cache.c +++ b/fs/orangefs/orangefs-cache.c @@ -5,7 +5,7 @@ */ #include "protocol.h" -#include "pvfs2-kernel.h" +#include "orangefs-kernel.h" /* tags assigned to kernel upcall operations */ static __u64 next_tag_value; diff --git a/fs/orangefs/pvfs2-debug.h b/fs/orangefs/orangefs-debug.h similarity index 100% rename from fs/orangefs/pvfs2-debug.h rename to fs/orangefs/orangefs-debug.h diff --git a/fs/orangefs/pvfs2-debugfs.c b/fs/orangefs/orangefs-debugfs.c similarity index 99% rename from fs/orangefs/pvfs2-debugfs.c rename to fs/orangefs/orangefs-debugfs.c index 315dc538b723..7319f1a2ecb8 100644 --- a/fs/orangefs/pvfs2-debugfs.c +++ b/fs/orangefs/orangefs-debugfs.c @@ -39,9 +39,9 @@ #include -#include "pvfs2-debugfs.h" +#include "orangefs-debugfs.h" #include "protocol.h" -#include "pvfs2-kernel.h" +#include "orangefs-kernel.h" static int orangefs_debug_disabled = 1; diff --git a/fs/orangefs/pvfs2-debugfs.h b/fs/orangefs/orangefs-debugfs.h similarity index 100% rename from fs/orangefs/pvfs2-debugfs.h rename to fs/orangefs/orangefs-debugfs.h diff --git a/fs/orangefs/pvfs2-dev-proto.h b/fs/orangefs/orangefs-dev-proto.h similarity index 100% rename from fs/orangefs/pvfs2-dev-proto.h rename to fs/orangefs/orangefs-dev-proto.h diff --git a/fs/orangefs/pvfs2-kernel.h b/fs/orangefs/orangefs-kernel.h similarity index 99% rename from fs/orangefs/pvfs2-kernel.h rename to fs/orangefs/orangefs-kernel.h index 33fcf3bccd2e..840872389fc5 100644 --- a/fs/orangefs/pvfs2-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -53,7 +53,7 @@ #include -#include "pvfs2-dev-proto.h" +#include "orangefs-dev-proto.h" #ifdef ORANGEFS_KERNEL_DEBUG #define ORANGEFS_DEFAULT_OP_TIMEOUT_SECS 10 diff --git a/fs/orangefs/pvfs2-mod.c b/fs/orangefs/orangefs-mod.c similarity index 98% rename from fs/orangefs/pvfs2-mod.c rename to fs/orangefs/orangefs-mod.c index d8642908a917..fa2fca6dca7c 100644 --- a/fs/orangefs/pvfs2-mod.c +++ b/fs/orangefs/orangefs-mod.c @@ -8,9 +8,9 @@ */ #include "protocol.h" -#include "pvfs2-kernel.h" -#include "pvfs2-debugfs.h" -#include "pvfs2-sysfs.h" +#include "orangefs-kernel.h" +#include "orangefs-debugfs.h" +#include "orangefs-sysfs.h" /* ORANGEFS_VERSION is a ./configure define */ #ifndef ORANGEFS_VERSION @@ -152,7 +152,7 @@ static int __init orangefs_init(void) if (ret < 0) goto cleanup_inode; - /* Initialize the pvfsdev subsystem. */ + /* Initialize the orangefsdev subsystem. */ ret = orangefs_dev_init(); if (ret < 0) { gossip_err("orangefs: could not initialize device subsystem %d!\n", diff --git a/fs/orangefs/pvfs2-sysfs.c b/fs/orangefs/orangefs-sysfs.c similarity index 99% rename from fs/orangefs/pvfs2-sysfs.c rename to fs/orangefs/orangefs-sysfs.c index f04de2593c79..3d360383ea22 100644 --- a/fs/orangefs/pvfs2-sysfs.c +++ b/fs/orangefs/orangefs-sysfs.c @@ -99,8 +99,8 @@ #include #include "protocol.h" -#include "pvfs2-kernel.h" -#include "pvfs2-sysfs.h" +#include "orangefs-kernel.h" +#include "orangefs-sysfs.h" #define ORANGEFS_KOBJ_ID "orangefs" #define ACACHE_KOBJ_ID "acache" diff --git a/fs/orangefs/pvfs2-sysfs.h b/fs/orangefs/orangefs-sysfs.h similarity index 100% rename from fs/orangefs/pvfs2-sysfs.h rename to fs/orangefs/orangefs-sysfs.h diff --git a/fs/orangefs/pvfs2-utils.c b/fs/orangefs/orangefs-utils.c similarity index 99% rename from fs/orangefs/pvfs2-utils.c rename to fs/orangefs/orangefs-utils.c index d132c5f712a4..fa2a46521b7a 100644 --- a/fs/orangefs/pvfs2-utils.c +++ b/fs/orangefs/orangefs-utils.c @@ -4,9 +4,9 @@ * See COPYING in top-level directory. */ #include "protocol.h" -#include "pvfs2-kernel.h" -#include "pvfs2-dev-proto.h" -#include "pvfs2-bufmap.h" +#include "orangefs-kernel.h" +#include "orangefs-dev-proto.h" +#include "orangefs-bufmap.h" __s32 fsid_of_op(struct orangefs_kernel_op_s *op) { diff --git a/fs/orangefs/protocol.h b/fs/orangefs/protocol.h index 5f10ebc83e76..03bbe7505a35 100644 --- a/fs/orangefs/protocol.h +++ b/fs/orangefs/protocol.h @@ -130,7 +130,7 @@ typedef __s64 ORANGEFS_offset; /* Bits 6 - 0 are reserved for the actual error code. */ #define ORANGEFS_ERROR_NUMBER_BITS 0x7f -/* Encoded errno values are decoded by PINT_errno_mapping in pvfs2-utils.c. */ +/* Encoded errno values decoded by PINT_errno_mapping in orangefs-utils.c. */ /* Our own ORANGEFS protocol error codes. */ #define ORANGEFS_ECANCEL (1|ORANGEFS_NON_ERRNO_ERROR_BIT|ORANGEFS_ERROR_BIT) @@ -352,7 +352,7 @@ struct dev_mask2_info_s { __s32 ORANGEFS_util_translate_mode(int mode); /* pvfs2-debug.h ************************************************************/ -#include "pvfs2-debug.h" +#include "orangefs-debug.h" /* pvfs2-internal.h *********************************************************/ #define llu(x) (unsigned long long)(x) @@ -402,7 +402,7 @@ enum { /* * describes memory regions to map in the ORANGEFS_DEV_MAP ioctl. - * NOTE: See devpvfs2-req.c for 32 bit compat structure. + * NOTE: See devorangefs-req.c for 32 bit compat structure. * Since this structure has a variable-sized layout that is different * on 32 and 64 bit platforms, we need to normalize to a 64 bit layout * on such systems before servicing ioctl calls from user-space binaries diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index c104de1ae5de..52bc522ea21c 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -5,8 +5,8 @@ */ #include "protocol.h" -#include "pvfs2-kernel.h" -#include "pvfs2-bufmap.h" +#include "orangefs-kernel.h" +#include "orangefs-bufmap.h" #include diff --git a/fs/orangefs/symlink.c b/fs/orangefs/symlink.c index 321f626b190b..1b3ae63463dc 100644 --- a/fs/orangefs/symlink.c +++ b/fs/orangefs/symlink.c @@ -5,8 +5,8 @@ */ #include "protocol.h" -#include "pvfs2-kernel.h" -#include "pvfs2-bufmap.h" +#include "orangefs-kernel.h" +#include "orangefs-bufmap.h" static const char *orangefs_follow_link(struct dentry *dentry, void **cookie) { diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index cfc8dc59c4eb..c731cbdd5fbd 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -13,8 +13,8 @@ */ #include "protocol.h" -#include "pvfs2-kernel.h" -#include "pvfs2-bufmap.h" +#include "orangefs-kernel.h" +#include "orangefs-bufmap.h" /* * What we do in this function is to walk the list of operations that are diff --git a/fs/orangefs/xattr.c b/fs/orangefs/xattr.c index aeb3c3083591..0e4e01602738 100644 --- a/fs/orangefs/xattr.c +++ b/fs/orangefs/xattr.c @@ -9,8 +9,8 @@ */ #include "protocol.h" -#include "pvfs2-kernel.h" -#include "pvfs2-bufmap.h" +#include "orangefs-kernel.h" +#include "orangefs-bufmap.h" #include #include From e91f1c804328ae4d46d6509791af27bfe70d917d Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Fri, 4 Dec 2015 13:06:55 -0500 Subject: [PATCH 052/174] Orangefs: don't expose internal details of pathname resolution to userspace. Signed-off-by: Mike Marshall --- fs/orangefs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index 63aa1e7fbdb6..9f82eb377857 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -113,7 +113,7 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry, if (!new_op) return ERR_PTR(-ENOMEM); - new_op->upcall.req.lookup.sym_follow = flags & LOOKUP_FOLLOW; + new_op->upcall.req.lookup.sym_follow = PVFS2_LOOKUP_LINK_NO_FOLLOW; gossip_debug(GOSSIP_NAME_DEBUG, "%s:%s:%d using parent %pU\n", __FILE__, From 7cec28e91d15ae6d145d8a4ef6cc5ed7c08e70bd Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Fri, 11 Dec 2015 10:46:22 -0500 Subject: [PATCH 053/174] Orangefs: don't keep checking stuff in on Friday afternoon. Signed-off-by: Mike Marshall --- fs/orangefs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index 9f82eb377857..50bc45d02009 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -113,7 +113,7 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry, if (!new_op) return ERR_PTR(-ENOMEM); - new_op->upcall.req.lookup.sym_follow = PVFS2_LOOKUP_LINK_NO_FOLLOW; + new_op->upcall.req.lookup.sym_follow = ORANGEFS_LOOKUP_LINK_NO_FOLLOW; gossip_debug(GOSSIP_NAME_DEBUG, "%s:%s:%d using parent %pU\n", __FILE__, From b5e376ea8b20d5d0b48871d2c05916d69da4e604 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Fri, 11 Dec 2015 10:50:42 -0500 Subject: [PATCH 054/174] Orangefs: improve comments Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-bufmap.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index c5368d852ee2..f7cd18a2a73b 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -499,6 +499,10 @@ void readdir_index_put(struct orangefs_bufmap *bufmap, int buffer_index) orangefs_bufmap_unref(bufmap); } +/* + * we've been handed an iovec, we need to copy it to + * the shared memory descriptor at "buffer_index". + */ int orangefs_bufmap_copy_from_iovec(struct orangefs_bufmap *bufmap, struct iov_iter *iter, int buffer_index, @@ -527,9 +531,8 @@ int orangefs_bufmap_copy_from_iovec(struct orangefs_bufmap *bufmap, } /* - * Iterate through the array of pages containing the bytes from - * a file being read. - * + * we've been handed an iovec, we need to fill it from + * the shared memory descriptor at "buffer_index". */ int orangefs_bufmap_copy_to_iovec(struct orangefs_bufmap *bufmap, struct iov_iter *iter, From b4cf67a2ba1a58dbd2a967c3d877b807fef83b25 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Fri, 11 Dec 2015 11:00:12 -0500 Subject: [PATCH 055/174] Orangef: remove overlooked old-style userspace debug parts Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-debug.h | 202 +---------------------------------- 1 file changed, 1 insertion(+), 201 deletions(-) diff --git a/fs/orangefs/orangefs-debug.h b/fs/orangefs/orangefs-debug.h index e6b4baa5e8fb..387db17cde2b 100644 --- a/fs/orangefs/orangefs-debug.h +++ b/fs/orangefs/orangefs-debug.h @@ -18,79 +18,7 @@ #include #endif -#define GOSSIP_NO_DEBUG (__u64)0 -#define GOSSIP_BMI_DEBUG_TCP ((__u64)1 << 0) -#define GOSSIP_BMI_DEBUG_CONTROL ((__u64)1 << 1) -#define GOSSIP_BMI_DEBUG_OFFSETS ((__u64)1 << 2) -#define GOSSIP_BMI_DEBUG_GM ((__u64)1 << 3) -#define GOSSIP_JOB_DEBUG ((__u64)1 << 4) -#define GOSSIP_SERVER_DEBUG ((__u64)1 << 5) -#define GOSSIP_STO_DEBUG_CTRL ((__u64)1 << 6) -#define GOSSIP_STO_DEBUG_DEFAULT ((__u64)1 << 7) -#define GOSSIP_FLOW_DEBUG ((__u64)1 << 8) -#define GOSSIP_BMI_DEBUG_GM_MEM ((__u64)1 << 9) -#define GOSSIP_REQUEST_DEBUG ((__u64)1 << 10) -#define GOSSIP_FLOW_PROTO_DEBUG ((__u64)1 << 11) -#define GOSSIP_NCACHE_DEBUG ((__u64)1 << 12) -#define GOSSIP_CLIENT_DEBUG ((__u64)1 << 13) -#define GOSSIP_REQ_SCHED_DEBUG ((__u64)1 << 14) -#define GOSSIP_ACACHE_DEBUG ((__u64)1 << 15) -#define GOSSIP_TROVE_DEBUG ((__u64)1 << 16) -#define GOSSIP_TROVE_OP_DEBUG ((__u64)1 << 17) -#define GOSSIP_DIST_DEBUG ((__u64)1 << 18) -#define GOSSIP_BMI_DEBUG_IB ((__u64)1 << 19) -#define GOSSIP_DBPF_ATTRCACHE_DEBUG ((__u64)1 << 20) -#define GOSSIP_MMAP_RCACHE_DEBUG ((__u64)1 << 21) -#define GOSSIP_LOOKUP_DEBUG ((__u64)1 << 22) -#define GOSSIP_REMOVE_DEBUG ((__u64)1 << 23) -#define GOSSIP_GETATTR_DEBUG ((__u64)1 << 24) -#define GOSSIP_READDIR_DEBUG ((__u64)1 << 25) -#define GOSSIP_IO_DEBUG ((__u64)1 << 26) -#define GOSSIP_DBPF_OPEN_CACHE_DEBUG ((__u64)1 << 27) -#define GOSSIP_PERMISSIONS_DEBUG ((__u64)1 << 28) -#define GOSSIP_CANCEL_DEBUG ((__u64)1 << 29) -#define GOSSIP_MSGPAIR_DEBUG ((__u64)1 << 30) -#define GOSSIP_CLIENTCORE_DEBUG ((__u64)1 << 31) -#define GOSSIP_CLIENTCORE_TIMING_DEBUG ((__u64)1 << 32) -#define GOSSIP_SETATTR_DEBUG ((__u64)1 << 33) -#define GOSSIP_MKDIR_DEBUG ((__u64)1 << 34) -#define GOSSIP_VARSTRIP_DEBUG ((__u64)1 << 35) -#define GOSSIP_GETEATTR_DEBUG ((__u64)1 << 36) -#define GOSSIP_SETEATTR_DEBUG ((__u64)1 << 37) -#define GOSSIP_ENDECODE_DEBUG ((__u64)1 << 38) -#define GOSSIP_DELEATTR_DEBUG ((__u64)1 << 39) -#define GOSSIP_ACCESS_DEBUG ((__u64)1 << 40) -#define GOSSIP_ACCESS_DETAIL_DEBUG ((__u64)1 << 41) -#define GOSSIP_LISTEATTR_DEBUG ((__u64)1 << 42) -#define GOSSIP_PERFCOUNTER_DEBUG ((__u64)1 << 43) -#define GOSSIP_STATE_MACHINE_DEBUG ((__u64)1 << 44) -#define GOSSIP_DBPF_KEYVAL_DEBUG ((__u64)1 << 45) -#define GOSSIP_LISTATTR_DEBUG ((__u64)1 << 46) -#define GOSSIP_DBPF_COALESCE_DEBUG ((__u64)1 << 47) -#define GOSSIP_ACCESS_HOSTNAMES ((__u64)1 << 48) -#define GOSSIP_FSCK_DEBUG ((__u64)1 << 49) -#define GOSSIP_BMI_DEBUG_MX ((__u64)1 << 50) -#define GOSSIP_BSTREAM_DEBUG ((__u64)1 << 51) -#define GOSSIP_BMI_DEBUG_PORTALS ((__u64)1 << 52) -#define GOSSIP_USER_DEV_DEBUG ((__u64)1 << 53) -#define GOSSIP_DIRECTIO_DEBUG ((__u64)1 << 54) -#define GOSSIP_MGMT_DEBUG ((__u64)1 << 55) -#define GOSSIP_MIRROR_DEBUG ((__u64)1 << 56) -#define GOSSIP_WIN_CLIENT_DEBUG ((__u64)1 << 57) -#define GOSSIP_SECURITY_DEBUG ((__u64)1 << 58) -#define GOSSIP_USRINT_DEBUG ((__u64)1 << 59) -#define GOSSIP_RCACHE_DEBUG ((__u64)1 << 60) -#define GOSSIP_SECCACHE_DEBUG ((__u64)1 << 61) - -#define GOSSIP_BMI_DEBUG_ALL ((__u64) (GOSSIP_BMI_DEBUG_TCP + \ - GOSSIP_BMI_DEBUG_CONTROL + \ - GOSSIP_BMI_DEBUG_GM + \ - GOSSIP_BMI_DEBUG_OFFSETS + \ - GOSSIP_BMI_DEBUG_IB + \ - GOSSIP_BMI_DEBUG_MX + \ - GOSSIP_BMI_DEBUG_PORTALS)) - -const char *ORANGEFS_debug_get_next_debug_keyword(int position); +#define GOSSIP_NO_DEBUG (__u64)0 #define GOSSIP_SUPER_DEBUG ((__u64)1 << 0) #define GOSSIP_INODE_DEBUG ((__u64)1 << 1) @@ -124,131 +52,6 @@ struct __keyword_mask_s { __u64 mask_val; }; -#define __DEBUG_ALL ((__u64) -1) - -/* map all config keywords to pvfs2 debug masks here */ -static struct __keyword_mask_s s_keyword_mask_map[] = { - /* Log trove debugging info. Same as 'trove'. */ - {"storage", GOSSIP_TROVE_DEBUG}, - /* Log trove debugging info. Same as 'storage'. */ - {"trove", GOSSIP_TROVE_DEBUG}, - /* Log trove operations. */ - {"trove_op", GOSSIP_TROVE_OP_DEBUG}, - /* Log network debug info. */ - {"network", GOSSIP_BMI_DEBUG_ALL}, - /* Log server info, including new operations. */ - {"server", GOSSIP_SERVER_DEBUG}, - /* Log client sysint info. This is only useful for the client. */ - {"client", GOSSIP_CLIENT_DEBUG}, - /* Debug the varstrip distribution */ - {"varstrip", GOSSIP_VARSTRIP_DEBUG}, - /* Log job info */ - {"job", GOSSIP_JOB_DEBUG}, - /* Debug PINT_process_request calls. EXTREMELY verbose! */ - {"request", GOSSIP_REQUEST_DEBUG}, - /* Log request scheduler events */ - {"reqsched", GOSSIP_REQ_SCHED_DEBUG}, - /* Log the flow protocol events, including flowproto_multiqueue */ - {"flowproto", GOSSIP_FLOW_PROTO_DEBUG}, - /* Log flow calls */ - {"flow", GOSSIP_FLOW_DEBUG}, - /* Debug the client name cache. Only useful on the client. */ - {"ncache", GOSSIP_NCACHE_DEBUG}, - /* Debug read-ahead cache events. Only useful on the client. */ - {"mmaprcache", GOSSIP_MMAP_RCACHE_DEBUG}, - /* Debug the attribute cache. Only useful on the client. */ - {"acache", GOSSIP_ACACHE_DEBUG}, - /* Log/Debug distribution calls */ - {"distribution", GOSSIP_DIST_DEBUG}, - /* Debug the server-side dbpf attribute cache */ - {"dbpfattrcache", GOSSIP_DBPF_ATTRCACHE_DEBUG}, - /* Debug the client lookup state machine. */ - {"lookup", GOSSIP_LOOKUP_DEBUG}, - /* Debug the client remove state macine. */ - {"remove", GOSSIP_REMOVE_DEBUG}, - /* Debug the server getattr state machine. */ - {"getattr", GOSSIP_GETATTR_DEBUG}, - /* Debug the server setattr state machine. */ - {"setattr", GOSSIP_SETATTR_DEBUG}, - /* vectored getattr server state machine */ - {"listattr", GOSSIP_LISTATTR_DEBUG}, - /* Debug the client and server get ext attributes SM. */ - {"geteattr", GOSSIP_GETEATTR_DEBUG}, - /* Debug the client and server set ext attributes SM. */ - {"seteattr", GOSSIP_SETEATTR_DEBUG}, - /* Debug the readdir operation (client and server) */ - {"readdir", GOSSIP_READDIR_DEBUG}, - /* Debug the mkdir operation (server only) */ - {"mkdir", GOSSIP_MKDIR_DEBUG}, - /* - * Debug the io operation (reads and writes) - * for both the client and server. - */ - {"io", GOSSIP_IO_DEBUG}, - /* Debug the server's open file descriptor cache */ - {"open_cache", GOSSIP_DBPF_OPEN_CACHE_DEBUG}, - /* Debug permissions checking on the server */ - {"permissions", GOSSIP_PERMISSIONS_DEBUG}, - /* Debug the cancel operation */ - {"cancel", GOSSIP_CANCEL_DEBUG}, - /* Debug the msgpair state machine */ - {"msgpair", GOSSIP_MSGPAIR_DEBUG}, - /* Debug the client core app */ - {"clientcore", GOSSIP_CLIENTCORE_DEBUG}, - /* Debug the client timing state machines (job timeout, etc.) */ - {"clientcore_timing", GOSSIP_CLIENTCORE_TIMING_DEBUG}, - /* network encoding */ - {"endecode", GOSSIP_ENDECODE_DEBUG}, - /* Show server file (metadata) accesses (both modify and read-only). */ - {"access", GOSSIP_ACCESS_DEBUG}, - /* Show more detailed server file accesses */ - {"access_detail", GOSSIP_ACCESS_DETAIL_DEBUG}, - /* Debug the listeattr operation */ - {"listeattr", GOSSIP_LISTEATTR_DEBUG}, - /* Debug the state machine management code */ - {"sm", GOSSIP_STATE_MACHINE_DEBUG}, - /* Debug the metadata dbpf keyval functions */ - {"keyval", GOSSIP_DBPF_KEYVAL_DEBUG}, - /* Debug the metadata sync coalescing code */ - {"coalesce", GOSSIP_DBPF_COALESCE_DEBUG}, - /* Display the hostnames instead of IP addrs in debug output */ - {"access_hostnames", GOSSIP_ACCESS_HOSTNAMES}, - /* Show the client device events */ - {"user_dev", GOSSIP_USER_DEV_DEBUG}, - /* Debug the fsck tool */ - {"fsck", GOSSIP_FSCK_DEBUG}, - /* Debug the bstream code */ - {"bstream", GOSSIP_BSTREAM_DEBUG}, - /* Debug trove in direct io mode */ - {"directio", GOSSIP_DIRECTIO_DEBUG}, - /* Debug direct io thread management */ - {"mgmt", GOSSIP_MGMT_DEBUG}, - /* Debug mirroring process */ - {"mirror", GOSSIP_MIRROR_DEBUG}, - /* Windows client */ - {"win_client", GOSSIP_WIN_CLIENT_DEBUG}, - /* Debug robust security code */ - {"security", GOSSIP_SECURITY_DEBUG}, - /* Capability Cache */ - {"seccache", GOSSIP_SECCACHE_DEBUG}, - /* Client User Interface */ - {"usrint", GOSSIP_USRINT_DEBUG}, - /* rcache */ - {"rcache", GOSSIP_RCACHE_DEBUG}, - /* Everything except the periodic events. Useful for debugging */ - {"verbose", - (__DEBUG_ALL & - ~(GOSSIP_PERFCOUNTER_DEBUG | GOSSIP_STATE_MACHINE_DEBUG | - GOSSIP_ENDECODE_DEBUG | GOSSIP_USER_DEV_DEBUG)) - }, - /* No debug output */ - {"none", GOSSIP_NO_DEBUG}, - /* Everything */ - {"all", __DEBUG_ALL} -}; - -#undef __DEBUG_ALL - /* * Map all kmod keywords to kmod debug masks here. Keep this * structure "packed": @@ -286,7 +89,4 @@ static struct __keyword_mask_s s_kmod_keyword_mask_map[] = { static const int num_kmod_keyword_mask_map = (int) (sizeof(s_kmod_keyword_mask_map) / sizeof(struct __keyword_mask_s)); -static const int num_keyword_mask_map = (int) - (sizeof(s_keyword_mask_map) / sizeof(struct __keyword_mask_s)); - #endif /* __ORANGEFS_DEBUG_H */ From 97f100277cfdcd268f0cf3d83bb6e4d1a345bc80 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Fri, 11 Dec 2015 16:45:03 -0500 Subject: [PATCH 056/174] Orangefs: de-uglify orangefs_devreq_writev, and devorangefs-req.c in general AV dislikes many parts of orangefs_devreq_writev. Besides making orangefs_devreq_writev more easily readable and better commented, this patch makes an effort to address some of the problems: > The 5th is quietly ignored unless trailer_size is positive and > status is zero. If trailer_size > 0 && status == 0, you verify that > the length of the 5th segment is no more than trailer_size and copy > it to vmalloc'ed buffer. Without bothering to zero the rest of that > buffer out. It was just wrong to allow a 5th segment that is not exactly equal to trailer_size. Now that that's fixed, there's nothing to zero out in the vmalloced buffer - it is exactly the right size to hold the 5th segment. > Another API bogosity: when the 5th segment is present, successful writev() > returns the sum of sizes of the first 4. Added size of 5th segment to writev return... > if concatenation of the first 4 segments is longer than > 16 + sizeof(struct pvfs2_downcall_s) by no more than sizeof(long) => whine > and proceed with garbage. If 4th segment isn't exactly sizeof(struct pvfs2_downcall_s), whine and fail. > if the 32bit value 4 bytes into op->downcall is zero and 64bit > value following it is non-zero, the latter is interpreted as the size of > trailer data. The latter is what userspace claimed was the length of the trailer data. The kernel module now compares it to the trailer iovec's iov_len as a sanity check. > if there's no trailer, the 5th segment (if present) is completely ignored. Whine and fail if there should be no trailer, yet a 5th segment is present. > if vmalloc fails, act as if status (32bit at offset 5 into > op->downcall) had been -ENOMEM and don't look at the 5th segment at all. whine and fail with -ENOMEM. Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 279 +++++++++++++++++++++------------- 1 file changed, 173 insertions(+), 106 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index e74938d575d6..b182b025db86 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -76,11 +76,12 @@ static int orangefs_devreq_open(struct inode *inode, struct file *file) int ret = -EINVAL; if (!(file->f_flags & O_NONBLOCK)) { - gossip_err("orangefs: device cannot be opened in blocking mode\n"); + gossip_err("%s: device cannot be opened in blocking mode\n", + __func__); goto out; } ret = -EACCES; - gossip_debug(GOSSIP_DEV_DEBUG, "pvfs2-client-core: opening device\n"); + gossip_debug(GOSSIP_DEV_DEBUG, "client-core: opening device\n"); mutex_lock(&devreq_mutex); if (open_access_count == 0) { @@ -100,6 +101,7 @@ out: return ret; } +/* Function for read() callers into the device */ static ssize_t orangefs_devreq_read(struct file *file, char __user *buf, size_t count, loff_t *offset) @@ -112,7 +114,8 @@ static ssize_t orangefs_devreq_read(struct file *file, /* We do not support blocking IO. */ if (!(file->f_flags & O_NONBLOCK)) { - gossip_err("orangefs: blocking reads are not supported! (pvfs2-client-core bug)\n"); + gossip_err("%s: blocking read from client-core.\n", + __func__); return -EINVAL; } @@ -143,12 +146,16 @@ static ssize_t orangefs_devreq_read(struct file *file, llu(op->tag), get_opname_string(op)); spin_unlock(&op->lock); continue; - /* Skip ops whose filesystem we don't know about unless - * it is being mounted. */ + /* + * Skip ops whose filesystem we don't know about unless + * it is being mounted. + */ /* XXX: is there a better way to detect this? */ } else if (ret == -1 && - !(op->upcall.type == ORANGEFS_VFS_OP_FS_MOUNT || - op->upcall.type == ORANGEFS_VFS_OP_GETATTR)) { + !(op->upcall.type == + ORANGEFS_VFS_OP_FS_MOUNT || + op->upcall.type == + ORANGEFS_VFS_OP_GETATTR)) { gossip_debug(GOSSIP_DEV_DEBUG, "orangefs: skipping op tag %llu %s\n", llu(op->tag), get_opname_string(op)); @@ -237,7 +244,11 @@ error: return -EFAULT; } -/* Function for writev() callers into the device */ +/* + * Function for writev() callers into the device. Readdir related + * operations have an extra iovec containing info about objects + * contained in directories. + */ static ssize_t orangefs_devreq_writev(struct file *file, const struct iovec *iov, size_t count, @@ -247,27 +258,43 @@ static ssize_t orangefs_devreq_writev(struct file *file, void *buffer = NULL; void *ptr = NULL; unsigned long i = 0; - static int max_downsize = MAX_ALIGNED_DEV_REQ_DOWNSIZE; - int ret = 0, num_remaining = max_downsize; - int notrailer_count = 4; /* num elements in iovec without trailer */ + int num_remaining = MAX_ALIGNED_DEV_REQ_DOWNSIZE; + int ret = 0; + /* num elements in iovec without trailer */ + int notrailer_count = 4; + /* + * If there's a trailer, its iov index will be equal to + * notrailer_count. + */ + int trailer_index = notrailer_count; int payload_size = 0; + int returned_downcall_size = 0; __s32 magic = 0; __s32 proto_ver = 0; __u64 tag = 0; ssize_t total_returned_size = 0; - /* Either there is a trailer or there isn't */ + /* + * There will always be at least notrailer_count iovecs, and + * when there's a trailer, one more than notrailer_count. Check + * count's sanity. + */ if (count != notrailer_count && count != (notrailer_count + 1)) { - gossip_err("Error: Number of iov vectors is (%zu) and notrailer count is %d\n", + gossip_err("%s: count:%zu: notrailer_count :%d:\n", + __func__, count, notrailer_count); return -EPROTO; } - buffer = dev_req_alloc(); - if (!buffer) - return -ENOMEM; - ptr = buffer; + + /* Copy the non-trailer iovec data into a device request buffer. */ + buffer = dev_req_alloc(); + if (!buffer) { + gossip_err("%s: dev_req_alloc failed.\n", __func__); + return -ENOMEM; + } + ptr = buffer; for (i = 0; i < notrailer_count; i++) { if (iov[i].iov_len > num_remaining) { gossip_err @@ -292,7 +319,7 @@ static ssize_t orangefs_devreq_writev(struct file *file, * make it 8 bytes big, or use get_unaligned when asigning. */ ptr = buffer; - proto_ver = *((__s32 *) ptr); + proto_ver = *((__s32 *) ptr); /* unused */ ptr += sizeof(__s32); magic = *((__s32 *) ptr); @@ -307,82 +334,114 @@ static ssize_t orangefs_devreq_writev(struct file *file, return -EPROTO; } - /* - * proto_ver = 20902 for 2.9.2 - */ - op = orangefs_devreq_remove_op(tag); if (op) { /* Increase ref count! */ get_op(op); - /* cut off magic and tag from payload size */ - payload_size -= (2 * sizeof(__s32) + sizeof(__u64)); - if (payload_size <= sizeof(struct orangefs_downcall_s)) - /* copy the passed in downcall into the op */ + + /* calculate the size of the returned downcall. */ + returned_downcall_size = + payload_size - (2 * sizeof(__s32) + sizeof(__u64)); + + /* copy the passed in downcall into the op */ + if (returned_downcall_size == + sizeof(struct orangefs_downcall_s)) { memcpy(&op->downcall, ptr, sizeof(struct orangefs_downcall_s)); - else - gossip_debug(GOSSIP_DEV_DEBUG, - "writev: Ignoring %d bytes\n", - payload_size); - - /* Do not allocate needlessly if client-core forgets - * to reset trailer size on op errors. - */ - if (op->downcall.status == 0 && op->downcall.trailer_size > 0) { - __u64 trailer_size = op->downcall.trailer_size; - size_t size; - gossip_debug(GOSSIP_DEV_DEBUG, - "writev: trailer size %ld\n", - (unsigned long)trailer_size); - if (count != (notrailer_count + 1)) { - gossip_err("Error: trailer size (%ld) is non-zero, no trailer elements though? (%zu)\n", (unsigned long)trailer_size, count); - dev_req_release(buffer); - put_op(op); - return -EPROTO; - } - size = iov[notrailer_count].iov_len; - if (size > trailer_size) { - gossip_err("writev error: trailer size (%ld) != iov_len (%zd)\n", (unsigned long)trailer_size, size); - dev_req_release(buffer); - put_op(op); - return -EMSGSIZE; - } - /* Allocate a buffer large enough to hold the - * trailer bytes. - */ - op->downcall.trailer_buf = vmalloc(trailer_size); - if (op->downcall.trailer_buf != NULL) { - gossip_debug(GOSSIP_DEV_DEBUG, "vmalloc: %p\n", - op->downcall.trailer_buf); - ret = copy_from_user(op->downcall.trailer_buf, - iov[notrailer_count]. - iov_base, - size); - if (ret) { - gossip_err("Failed to copy trailer data from user space\n"); - dev_req_release(buffer); - gossip_debug(GOSSIP_DEV_DEBUG, - "vfree: %p\n", - op->downcall.trailer_buf); - vfree(op->downcall.trailer_buf); - op->downcall.trailer_buf = NULL; - put_op(op); - return -EIO; - } - memset(op->downcall.trailer_buf + size, 0, - trailer_size - size); - } else { - /* Change downcall status */ - op->downcall.status = -ENOMEM; - gossip_err("writev: could not vmalloc for trailer!\n"); - } + } else { + gossip_err("%s: returned downcall size:%d: \n", + __func__, + returned_downcall_size); + dev_req_release(buffer); + put_op(op); + return -EMSGSIZE; } - /* if this operation is an I/O operation and if it was - * initiated on behalf of a *synchronous* VFS I/O operation, - * only then we need to wait + /* Don't tolerate an unexpected trailer iovec. */ + if ((op->downcall.trailer_size == 0) && + (count != notrailer_count)) { + gossip_err("%s: unexpected trailer iovec.\n", + __func__); + dev_req_release(buffer); + put_op(op); + return -EPROTO; + } + + /* Don't consider the trailer if there's a bad status. */ + if (op->downcall.status != 0) + goto no_trailer; + + /* get the trailer if there is one. */ + if (op->downcall.trailer_size == 0) + goto no_trailer; + + gossip_debug(GOSSIP_DEV_DEBUG, + "%s: op->downcall.trailer_size %lld\n", + __func__, + op->downcall.trailer_size); + + /* + * Bail if we think think there should be a trailer, but + * there's no iovec for it. + */ + if (count != (notrailer_count + 1)) { + gossip_err("%s: trailer_size:%lld: count:%zu:\n", + __func__, + op->downcall.trailer_size, + count); + dev_req_release(buffer); + put_op(op); + return -EPROTO; + } + + /* Verify that trailer_size is accurate. */ + if (op->downcall.trailer_size != iov[trailer_index].iov_len) { + gossip_err("%s: trailer_size:%lld: != iov_len:%zd:\n", + __func__, + op->downcall.trailer_size, + iov[trailer_index].iov_len); + dev_req_release(buffer); + put_op(op); + return -EMSGSIZE; + } + + total_returned_size += iov[trailer_index].iov_len; + + /* + * Allocate a buffer, copy the trailer bytes into it and + * attach it to the downcall. + */ + op->downcall.trailer_buf = vmalloc(iov[trailer_index].iov_len); + if (op->downcall.trailer_buf != NULL) { + gossip_debug(GOSSIP_DEV_DEBUG, "vmalloc: %p\n", + op->downcall.trailer_buf); + ret = copy_from_user(op->downcall.trailer_buf, + iov[trailer_index].iov_base, + iov[trailer_index].iov_len); + if (ret) { + gossip_err("%s: Failed to copy trailer.\n", + __func__); + dev_req_release(buffer); + gossip_debug(GOSSIP_DEV_DEBUG, + "vfree: %p\n", + op->downcall.trailer_buf); + vfree(op->downcall.trailer_buf); + op->downcall.trailer_buf = NULL; + put_op(op); + return -EIO; + } + } else { + /* Change downcall status */ + gossip_err("writev: could not vmalloc for trailer!\n"); + dev_req_release(buffer); + put_op(op); + return -ENOMEM; + } + +no_trailer: + + /* if this operation is an I/O operation we need to wait * for all data to be copied before we can return to avoid * buffer corruption and races that can pull the buffers * out from under us. @@ -392,12 +451,12 @@ static ssize_t orangefs_devreq_writev(struct file *file, * application reading/writing this device to return until * the buffers are done being used. */ - if (op->upcall.type == ORANGEFS_VFS_OP_FILE_IO && - op->upcall.req.io.async_vfs_io == ORANGEFS_VFS_SYNC_IO) { + if (op->upcall.type == ORANGEFS_VFS_OP_FILE_IO) { int timed_out = 0; DECLARE_WAITQUEUE(wait_entry, current); - /* tell the vfs op waiting on a waitqueue + /* + * tell the vfs op waiting on a waitqueue * that this op is done */ spin_lock(&op->lock); @@ -423,14 +482,18 @@ static ssize_t orangefs_devreq_writev(struct file *file, MSECS_TO_JIFFIES(1000 * op_timeout_secs); if (!schedule_timeout(timeout)) { - gossip_debug(GOSSIP_DEV_DEBUG, "*** I/O wait time is up\n"); + gossip_debug(GOSSIP_DEV_DEBUG, + "%s: timed out.\n", + __func__); timed_out = 1; break; } continue; } - gossip_debug(GOSSIP_DEV_DEBUG, "*** signal on I/O wait -- aborting\n"); + gossip_debug(GOSSIP_DEV_DEBUG, + "%s: signal on I/O wait, aborting\n", + __func__); break; } @@ -468,6 +531,7 @@ static ssize_t orangefs_devreq_writev(struct file *file, "WARNING: No one's waiting for tag %llu\n", llu(tag)); } + /* put_op? */ dev_req_release(buffer); return total_returned_size; @@ -632,7 +696,8 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg) return ret ? -EIO : orangefs_bufmap_initialize(&user_desc); case ORANGEFS_DEV_REMOUNT_ALL: gossip_debug(GOSSIP_DEV_DEBUG, - "orangefs_devreq_ioctl: got ORANGEFS_DEV_REMOUNT_ALL\n"); + "%s: got ORANGEFS_DEV_REMOUNT_ALL\n", + __func__); /* * remount all mounted orangefs volumes to regain the lost @@ -647,13 +712,17 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg) if (ret < 0) return ret; gossip_debug(GOSSIP_DEV_DEBUG, - "orangefs_devreq_ioctl: priority remount in progress\n"); + "%s: priority remount in progress\n", + __func__); list_for_each(tmp, &orangefs_superblocks) { orangefs_sb = - list_entry(tmp, struct orangefs_sb_info_s, list); + list_entry(tmp, + struct orangefs_sb_info_s, + list); if (orangefs_sb && (orangefs_sb->sb)) { gossip_debug(GOSSIP_DEV_DEBUG, - "Remounting SB %p\n", + "%s: Remounting SB %p\n", + __func__, orangefs_sb); ret = orangefs_remount(orangefs_sb->sb); @@ -661,12 +730,13 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg) gossip_debug(GOSSIP_DEV_DEBUG, "SB %p remount failed\n", orangefs_sb); - break; + break; } } } gossip_debug(GOSSIP_DEV_DEBUG, - "orangefs_devreq_ioctl: priority remount complete\n"); + "%s: priority remount complete\n", + __func__); mutex_unlock(&request_mutex); return ret; @@ -704,15 +774,12 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg) (void __user *)arg, ORANGEFS_MAX_DEBUG_STRING_LEN); if (ret != 0) { - pr_info("%s: " - "ORANGEFS_DEV_CLIENT_STRING: copy_from_user failed" - "\n", + pr_info("%s: CLIENT_STRING: copy_from_user failed\n", __func__); return -EIO; } - pr_info("%s: client debug array string has been been received." - "\n", + pr_info("%s: client debug array string has been received.\n", __func__); if (!help_string_initialized) { @@ -722,9 +789,7 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg) /* build a proper debug help string */ if (orangefs_prepare_debugfs_help_string(0)) { - gossip_err("%s: " - "prepare_debugfs_help_string failed" - "\n", + gossip_err("%s: no debug help string \n", __func__); return -EIO; } @@ -781,15 +846,17 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg) debug_mask_to_string(&mask_info.mask_value, mask_info.mask_type); gossip_debug_mask = mask_info.mask_value; - pr_info("ORANGEFS: kernel debug mask has been modified to " + pr_info("%s: kernel debug mask has been modified to " ":%s: :%llx:\n", + __func__, kernel_debug_string, (unsigned long long)gossip_debug_mask); } else if (mask_info.mask_type == CLIENT_MASK) { debug_mask_to_string(&mask_info.mask_value, mask_info.mask_type); - pr_info("ORANGEFS: client debug mask has been modified to" + pr_info("%s: client debug mask has been modified to" ":%s: :%llx:\n", + __func__, client_debug_string, llu(mask_info.mask_value)); } else { From ce6c414e17be602a84b1b34915468f8301ed14a0 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Mon, 14 Dec 2015 14:54:46 -0500 Subject: [PATCH 057/174] Orangefs: Don't wait the old-fashioned way. Get rid of add_wait_queue, set_current_state, etc, and use the wait_event() model. Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 17 ++++++++-------- fs/orangefs/orangefs-bufmap.c | 15 +++++++------- fs/orangefs/waitqueue.c | 37 +++++++++++------------------------ 3 files changed, 26 insertions(+), 43 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index b182b025db86..dc2e2ce7e943 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -432,7 +432,6 @@ static ssize_t orangefs_devreq_writev(struct file *file, return -EIO; } } else { - /* Change downcall status */ gossip_err("writev: could not vmalloc for trailer!\n"); dev_req_release(buffer); put_op(op); @@ -453,7 +452,7 @@ no_trailer: */ if (op->upcall.type == ORANGEFS_VFS_OP_FILE_IO) { int timed_out = 0; - DECLARE_WAITQUEUE(wait_entry, current); + DEFINE_WAIT(wait_entry); /* * tell the vfs op waiting on a waitqueue @@ -463,14 +462,14 @@ no_trailer: set_op_state_serviced(op); spin_unlock(&op->lock); - add_wait_queue_exclusive(&op->io_completion_waitq, - &wait_entry); wake_up_interruptible(&op->waitq); while (1) { - set_current_state(TASK_INTERRUPTIBLE); - spin_lock(&op->lock); + prepare_to_wait_exclusive( + &op->io_completion_waitq, + &wait_entry, + TASK_INTERRUPTIBLE); if (op->io_completed) { spin_unlock(&op->lock); break; @@ -497,9 +496,9 @@ no_trailer: break; } - set_current_state(TASK_RUNNING); - remove_wait_queue(&op->io_completion_waitq, - &wait_entry); + spin_lock(&op->lock); + finish_wait(&op->io_completion_waitq, &wait_entry); + spin_unlock(&op->lock); /* NOTE: for I/O operations we handle releasing the op * object except in the case of timeout. the reason we diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index f7cd18a2a73b..863c6fc8e192 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -333,19 +333,17 @@ static int wait_for_a_slot(struct slot_args *slargs, int *buffer_index) { int ret = -1; int i = 0; - DECLARE_WAITQUEUE(my_wait, current); - - - add_wait_queue_exclusive(slargs->slot_wq, &my_wait); + DEFINE_WAIT(wait_entry); while (1) { - set_current_state(TASK_INTERRUPTIBLE); - /* * check for available desc, slot_lock is the appropriate * index_lock */ spin_lock(slargs->slot_lock); + prepare_to_wait_exclusive(slargs->slot_wq, + &wait_entry, + TASK_INTERRUPTIBLE); for (i = 0; i < slargs->slot_count; i++) if (slargs->slot_array[i] == 0) { slargs->slot_array[i] = 1; @@ -383,8 +381,9 @@ static int wait_for_a_slot(struct slot_args *slargs, int *buffer_index) break; } - set_current_state(TASK_RUNNING); - remove_wait_queue(slargs->slot_wq, &my_wait); + spin_lock(slargs->slot_lock); + finish_wait(slargs->slot_wq, &wait_entry); + spin_unlock(slargs->slot_lock); return ret; } diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index c731cbdd5fbd..856a4b48fe23 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -62,7 +62,7 @@ int service_operation(struct orangefs_kernel_op_s *op, /* irqflags and wait_entry are only used IF the client-core aborts */ unsigned long irqflags; - DECLARE_WAITQUEUE(wait_entry, current); + DEFINE_WAIT(wait_entry); op->upcall.tgid = current->tgid; op->upcall.pid = current->pid; @@ -204,11 +204,11 @@ retry_servicing: * memory system can be initialized. */ spin_lock_irqsave(&op->lock, irqflags); - add_wait_queue(&orangefs_bufmap_init_waitq, &wait_entry); + prepare_to_wait(&orangefs_bufmap_init_waitq, + &wait_entry, + TASK_INTERRUPTIBLE); spin_unlock_irqrestore(&op->lock, irqflags); - set_current_state(TASK_INTERRUPTIBLE); - /* * Wait for orangefs_bufmap_initialize() to wake me up * within the allotted time. @@ -225,8 +225,7 @@ retry_servicing: get_bufmap_init()); spin_lock_irqsave(&op->lock, irqflags); - remove_wait_queue(&orangefs_bufmap_init_waitq, - &wait_entry); + finish_wait(&orangefs_bufmap_init_waitq, &wait_entry); spin_unlock_irqrestore(&op->lock, irqflags); if (get_bufmap_init() == 0) { @@ -342,16 +341,11 @@ void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *op) int wait_for_matching_downcall(struct orangefs_kernel_op_s *op) { int ret = -EINVAL; - DECLARE_WAITQUEUE(wait_entry, current); - - spin_lock(&op->lock); - add_wait_queue(&op->waitq, &wait_entry); - spin_unlock(&op->lock); + DEFINE_WAIT(wait_entry); while (1) { - set_current_state(TASK_INTERRUPTIBLE); - spin_lock(&op->lock); + prepare_to_wait(&op->waitq, &wait_entry, TASK_INTERRUPTIBLE); if (op_state_serviced(op)) { spin_unlock(&op->lock); ret = 0; @@ -434,10 +428,8 @@ int wait_for_matching_downcall(struct orangefs_kernel_op_s *op) break; } - set_current_state(TASK_RUNNING); - spin_lock(&op->lock); - remove_wait_queue(&op->waitq, &wait_entry); + finish_wait(&op->waitq, &wait_entry); spin_unlock(&op->lock); return ret; @@ -455,16 +447,11 @@ int wait_for_matching_downcall(struct orangefs_kernel_op_s *op) int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *op) { int ret = -EINVAL; - DECLARE_WAITQUEUE(wait_entry, current); - - spin_lock(&op->lock); - add_wait_queue(&op->waitq, &wait_entry); - spin_unlock(&op->lock); + DEFINE_WAIT(wait_entry); while (1) { - set_current_state(TASK_INTERRUPTIBLE); - spin_lock(&op->lock); + prepare_to_wait(&op->waitq, &wait_entry, TASK_INTERRUPTIBLE); if (op_state_serviced(op)) { gossip_debug(GOSSIP_WAIT_DEBUG, "%s:op-state is SERVICED.\n", @@ -514,10 +501,8 @@ int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *op) break; } - set_current_state(TASK_RUNNING); - spin_lock(&op->lock); - remove_wait_queue(&op->waitq, &wait_entry); + finish_wait(&op->waitq, &wait_entry); spin_unlock(&op->lock); gossip_debug(GOSSIP_WAIT_DEBUG, From 90d26aa80861afaee992228d8f0e57cbd06c8d87 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Mon, 14 Dec 2015 15:26:38 -0500 Subject: [PATCH 058/174] Orangefs: do not finalize bufmap if it was never initialized. Found by the infant Orangefs fuzzer... Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index dc2e2ce7e943..4d7ab7cb08f7 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -600,7 +600,8 @@ static int orangefs_devreq_release(struct inode *inode, struct file *file) __func__); mutex_lock(&devreq_mutex); - orangefs_bufmap_finalize(); + if (get_bufmap_init()) + orangefs_bufmap_finalize(); open_access_count--; @@ -692,7 +693,13 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg) (struct ORANGEFS_dev_map_desc __user *) arg, sizeof(struct ORANGEFS_dev_map_desc)); - return ret ? -EIO : orangefs_bufmap_initialize(&user_desc); + if (get_bufmap_init()) { + return -EINVAL; + } else { + return ret ? + -EIO : + orangefs_bufmap_initialize(&user_desc); + } case ORANGEFS_DEV_REMOUNT_ALL: gossip_debug(GOSSIP_DEV_DEBUG, "%s: got ORANGEFS_DEV_REMOUNT_ALL\n", From a762ae6dc5a615f18b5f0fe54a0b8551e02e19d0 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Tue, 15 Dec 2015 14:22:06 -0500 Subject: [PATCH 059/174] orangefs: Remove ``aligned'' upcall and downcall length macros. There was previously MAX_ALIGNED_DEV_REQ_(UP|DOWN)SIZE macros which evaluated to MAX_DEV_REQ_(UP|DOWN)SIZE+8. As it is unclear what this is for, other than creating a situation where we accept more data than we can parse, it is removed. Signed-off-by: Mike Marshall Signed-off-by: Martin Brandenburg --- fs/orangefs/devorangefs-req.c | 12 ++++++------ fs/orangefs/orangefs-cache.c | 4 ++-- fs/orangefs/orangefs-kernel.h | 18 ------------------ 3 files changed, 8 insertions(+), 26 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index 4d7ab7cb08f7..5a9c53eb115f 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -120,10 +120,10 @@ static ssize_t orangefs_devreq_read(struct file *file, } /* - * The client will do an ioctl to find MAX_ALIGNED_DEV_REQ_UPSIZE, then + * The client will do an ioctl to find MAX_DEV_REQ_UPSIZE, then * always read with that size buffer. */ - if (count != MAX_ALIGNED_DEV_REQ_UPSIZE) { + if (count != MAX_DEV_REQ_UPSIZE) { gossip_err("orangefs: client-core tried to read wrong size\n"); return -EINVAL; } @@ -226,7 +226,7 @@ static ssize_t orangefs_devreq_read(struct file *file, goto error; /* The client only asks to read one size buffer. */ - return MAX_ALIGNED_DEV_REQ_UPSIZE; + return MAX_DEV_REQ_UPSIZE; error: /* * We were unable to copy the op data to the client. Put the op back in @@ -258,7 +258,7 @@ static ssize_t orangefs_devreq_writev(struct file *file, void *buffer = NULL; void *ptr = NULL; unsigned long i = 0; - int num_remaining = MAX_ALIGNED_DEV_REQ_DOWNSIZE; + int num_remaining = MAX_DEV_REQ_DOWNSIZE; int ret = 0; /* num elements in iovec without trailer */ int notrailer_count = 4; @@ -661,8 +661,8 @@ static inline long check_ioctl_command(unsigned int command) static long dispatch_ioctl_command(unsigned int command, unsigned long arg) { static __s32 magic = ORANGEFS_DEVREQ_MAGIC; - static __s32 max_up_size = MAX_ALIGNED_DEV_REQ_UPSIZE; - static __s32 max_down_size = MAX_ALIGNED_DEV_REQ_DOWNSIZE; + static __s32 max_up_size = MAX_DEV_REQ_UPSIZE; + static __s32 max_down_size = MAX_DEV_REQ_DOWNSIZE; struct ORANGEFS_dev_map_desc user_desc; int ret = 0; struct dev_mask_info_s mask_info = { 0 }; diff --git a/fs/orangefs/orangefs-cache.c b/fs/orangefs/orangefs-cache.c index 57e270246e3d..b40f5d74aa97 100644 --- a/fs/orangefs/orangefs-cache.c +++ b/fs/orangefs/orangefs-cache.c @@ -166,7 +166,7 @@ void op_release(struct orangefs_kernel_op_s *orangefs_op) int dev_req_cache_initialize(void) { dev_req_cache = kmem_cache_create("orangefs_devreqcache", - MAX_ALIGNED_DEV_REQ_DOWNSIZE, + MAX_DEV_REQ_DOWNSIZE, 0, ORANGEFS_CACHE_CREATE_FLAGS, NULL); @@ -192,7 +192,7 @@ void *dev_req_alloc(void) if (buffer == NULL) gossip_err("Failed to allocate from dev_req_cache\n"); else - memset(buffer, 0, sizeof(MAX_ALIGNED_DEV_REQ_DOWNSIZE)); + memset(buffer, 0, sizeof(MAX_DEV_REQ_DOWNSIZE)); return buffer; } diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 840872389fc5..c337a52eb639 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -80,29 +80,11 @@ sizeof(__u64) + sizeof(struct orangefs_upcall_s)) #define MAX_DEV_REQ_DOWNSIZE (2*sizeof(__s32) + \ sizeof(__u64) + sizeof(struct orangefs_downcall_s)) -#define BITS_PER_LONG_DIV_8 (BITS_PER_LONG >> 3) - /* borrowed from irda.h */ #ifndef MSECS_TO_JIFFIES #define MSECS_TO_JIFFIES(ms) (((ms)*HZ+999)/1000) #endif -#define MAX_ALIGNED_DEV_REQ_UPSIZE \ - (MAX_DEV_REQ_UPSIZE + \ - ((((MAX_DEV_REQ_UPSIZE / \ - (BITS_PER_LONG_DIV_8)) * \ - (BITS_PER_LONG_DIV_8)) + \ - (BITS_PER_LONG_DIV_8)) - \ - MAX_DEV_REQ_UPSIZE)) - -#define MAX_ALIGNED_DEV_REQ_DOWNSIZE \ - (MAX_DEV_REQ_DOWNSIZE + \ - ((((MAX_DEV_REQ_DOWNSIZE / \ - (BITS_PER_LONG_DIV_8)) * \ - (BITS_PER_LONG_DIV_8)) + \ - (BITS_PER_LONG_DIV_8)) - \ - MAX_DEV_REQ_DOWNSIZE)) - /* * valid orangefs kernel operation states * From bf89f584329c79909ea01c99aeac59ec20b3f524 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Tue, 15 Dec 2015 14:45:12 -0500 Subject: [PATCH 060/174] orangefs: Change visibility of several bufmap helpers to static. Signed-off-by: Mike Marshall Signed-off-by: Martin Brandenburg --- fs/orangefs/orangefs-bufmap.c | 12 ++++++++++-- fs/orangefs/orangefs-bufmap.h | 11 ----------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index 863c6fc8e192..bf8470060c74 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -9,6 +9,14 @@ DECLARE_WAIT_QUEUE_HEAD(orangefs_bufmap_init_waitq); +/* used to describe mapped buffers */ +struct orangefs_bufmap_desc { + void *uaddr; /* user space address pointer */ + struct page **page_array; /* array of mapped pages */ + int array_count; /* size of above arrays */ + struct list_head list_link; +}; + static struct orangefs_bufmap { atomic_t refcnt; @@ -50,7 +58,7 @@ orangefs_bufmap_free(struct orangefs_bufmap *bufmap) kfree(bufmap); } -struct orangefs_bufmap *orangefs_bufmap_ref(void) +static struct orangefs_bufmap *orangefs_bufmap_ref(void) { struct orangefs_bufmap *bufmap = NULL; @@ -63,7 +71,7 @@ struct orangefs_bufmap *orangefs_bufmap_ref(void) return bufmap; } -void orangefs_bufmap_unref(struct orangefs_bufmap *bufmap) +static void orangefs_bufmap_unref(struct orangefs_bufmap *bufmap) { if (atomic_dec_and_lock(&bufmap->refcnt, &orangefs_bufmap_lock)) { __orangefs_bufmap = NULL; diff --git a/fs/orangefs/orangefs-bufmap.h b/fs/orangefs/orangefs-bufmap.h index 91d1755c231a..f652b464b340 100644 --- a/fs/orangefs/orangefs-bufmap.h +++ b/fs/orangefs/orangefs-bufmap.h @@ -7,19 +7,8 @@ #ifndef __ORANGEFS_BUFMAP_H #define __ORANGEFS_BUFMAP_H -/* used to describe mapped buffers */ -struct orangefs_bufmap_desc { - void *uaddr; /* user space address pointer */ - struct page **page_array; /* array of mapped pages */ - int array_count; /* size of above arrays */ - struct list_head list_link; -}; - struct orangefs_bufmap; -struct orangefs_bufmap *orangefs_bufmap_ref(void); -void orangefs_bufmap_unref(struct orangefs_bufmap *bufmap); - /* * orangefs_bufmap_size_query is now an inline function because buffer * sizes are not hardcoded From 765a75b34a9d72aca875d85d4dc40945afd2939e Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Tue, 15 Dec 2015 14:48:17 -0500 Subject: [PATCH 061/174] orangefs: Remove useless inline qualifier from bufmap functions. All callers were outside of the file these functions were declared in, so nothing was ever inlined anyway. Further this happens before I/O and any speedup by not having to do a call will be dwarfed by the time it takes to talk to the server. Signed-off-by: Mike Marshall Signed-off-by: Martin Brandenburg --- fs/orangefs/orangefs-bufmap.c | 4 ++-- fs/orangefs/orangefs-bufmap.h | 4 ---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index bf8470060c74..cf3ffb57334b 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -82,7 +82,7 @@ static void orangefs_bufmap_unref(struct orangefs_bufmap *bufmap) } } -inline int orangefs_bufmap_size_query(void) +int orangefs_bufmap_size_query(void) { struct orangefs_bufmap *bufmap = orangefs_bufmap_ref(); int size = bufmap ? bufmap->desc_size : 0; @@ -91,7 +91,7 @@ inline int orangefs_bufmap_size_query(void) return size; } -inline int orangefs_bufmap_shift_query(void) +int orangefs_bufmap_shift_query(void) { struct orangefs_bufmap *bufmap = orangefs_bufmap_ref(); int shift = bufmap ? bufmap->desc_shift : 0; diff --git a/fs/orangefs/orangefs-bufmap.h b/fs/orangefs/orangefs-bufmap.h index f652b464b340..112ec33a1b86 100644 --- a/fs/orangefs/orangefs-bufmap.h +++ b/fs/orangefs/orangefs-bufmap.h @@ -9,10 +9,6 @@ struct orangefs_bufmap; -/* - * orangefs_bufmap_size_query is now an inline function because buffer - * sizes are not hardcoded - */ int orangefs_bufmap_size_query(void); int orangefs_bufmap_shift_query(void); From b09d10df5a39b17ec12ecc0dc230a4d71c8a9996 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Tue, 15 Dec 2015 14:54:27 -0500 Subject: [PATCH 062/174] orangefs: Do not unref if there is no bufmap. Signed-off-by: Mike Marshall Signed-off-by: Martin Brandenburg --- fs/orangefs/orangefs-bufmap.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index cf3ffb57334b..888aa28136ee 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -82,21 +82,32 @@ static void orangefs_bufmap_unref(struct orangefs_bufmap *bufmap) } } +/* + * XXX: Can the size and shift change while the caller gives up the + * XXX: lock between calling this and doing something useful? + */ + int orangefs_bufmap_size_query(void) { - struct orangefs_bufmap *bufmap = orangefs_bufmap_ref(); - int size = bufmap ? bufmap->desc_size : 0; - - orangefs_bufmap_unref(bufmap); + struct orangefs_bufmap *bufmap; + int size = 0; + bufmap = orangefs_bufmap_ref(); + if (bufmap) { + size = bufmap->desc_size; + orangefs_bufmap_unref(bufmap); + } return size; } int orangefs_bufmap_shift_query(void) { - struct orangefs_bufmap *bufmap = orangefs_bufmap_ref(); - int shift = bufmap ? bufmap->desc_shift : 0; - - orangefs_bufmap_unref(bufmap); + struct orangefs_bufmap *bufmap; + int shift = 0; + bufmap = orangefs_bufmap_ref(); + if (bufmap) { + shift = bufmap->desc_shift; + orangefs_bufmap_unref(bufmap); + } return shift; } From fef8b67ce6cab8e031285642b841acf5355d6788 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Thu, 17 Dec 2015 14:31:24 -0500 Subject: [PATCH 063/174] Orangefs: don't use deprecated xattr defines. Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-kernel.h | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index c337a52eb639..0b7ba0496aa3 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -173,17 +173,8 @@ struct client_debug_mask { #define ORANGEFS_XATTR_INDEX_TRUSTED 3 #define ORANGEFS_XATTR_INDEX_DEFAULT 4 -#if 0 -#ifndef POSIX_ACL_XATTR_ACCESS -#define POSIX_ACL_XATTR_ACCESS "system.posix_acl_access" -#endif -#ifndef POSIX_ACL_XATTR_DEFAULT -#define POSIX_ACL_XATTR_DEFAULT "system.posix_acl_default" -#endif -#endif - -#define ORANGEFS_XATTR_NAME_ACL_ACCESS POSIX_ACL_XATTR_ACCESS -#define ORANGEFS_XATTR_NAME_ACL_DEFAULT POSIX_ACL_XATTR_DEFAULT +#define ORANGEFS_XATTR_NAME_ACL_ACCESS XATTR_NAME_POSIX_ACL_ACCESS +#define ORANGEFS_XATTR_NAME_ACL_DEFAULT XATTR_NAME_POSIX_ACL_DEFAULT #define ORANGEFS_XATTR_NAME_TRUSTED_PREFIX "trusted." #define ORANGEFS_XATTR_NAME_DEFAULT_PREFIX "" From 62441fa53bccc69fe344e6b20be0680cca0fbc15 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Thu, 17 Dec 2015 16:11:40 -0500 Subject: [PATCH 064/174] Orangefs: validate resp.listxattr.returned_count Signed-off-by: Mike Marshall --- fs/orangefs/xattr.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/orangefs/xattr.c b/fs/orangefs/xattr.c index 0e4e01602738..8e9ccf971486 100644 --- a/fs/orangefs/xattr.c +++ b/fs/orangefs/xattr.c @@ -348,6 +348,7 @@ ssize_t orangefs_listxattr(struct dentry *dentry, char *buffer, size_t size) int count_keys = 0; int key_size; int i = 0; + int returned_count = 0; if (size > 0 && buffer == NULL) { gossip_err("%s: bogus NULL pointers\n", __func__); @@ -392,10 +393,19 @@ try_again: if (length == 0) goto done; + returned_count = new_op->downcall.resp.listxattr.returned_count; + if (returned_count < 0 || + returned_count >= ORANGEFS_MAX_XATTR_LISTLEN) { + gossip_err("%s: impossible value for returned_count:%d:\n", + __func__, + returned_count); + goto done; + } + /* * Check to see how much can be fit in the buffer. Fit only whole keys. */ - for (i = 0; i < new_op->downcall.resp.listxattr.returned_count; i++) { + for (i = 0; i < returned_count; i++) { if (total + new_op->downcall.resp.listxattr.lengths[i] > size) goto done; From dde58ca4367a216d51c4e034f1f0195e5923c934 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Tue, 22 Dec 2015 17:13:50 +0100 Subject: [PATCH 065/174] Orangefs: use kzalloc for kmalloc + memset 0 This is an API consolidation only. The use of kmalloc + memset to 0 should be equivalent to kzalloc in this case. Signed-off-by: Nicholas Mc Guire Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-debugfs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c index 7319f1a2ecb8..9eb7972ae10d 100644 --- a/fs/orangefs/orangefs-debugfs.c +++ b/fs/orangefs/orangefs-debugfs.c @@ -365,10 +365,9 @@ static ssize_t orangefs_debug_write(struct file *file, count = ORANGEFS_MAX_DEBUG_STRING_LEN + 1; } - buf = kmalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL); + buf = kzalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL); if (!buf) goto out; - memset(buf, 0, ORANGEFS_MAX_DEBUG_STRING_LEN); if (copy_from_user(buf, ubuf, count - 1)) { gossip_debug(GOSSIP_DEBUGFS_DEBUG, From eb57bcc2718a9fb5eaea80e0d76e53afac6ae2ec Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 21 Dec 2015 14:49:29 +0100 Subject: [PATCH 066/174] orangefs: fix typo in ornagefs_inode_lock Orangefs fails to build on 32-bit SMP configurations due to a simple misspelling, this does the obvious fix. Signed-off-by: Arnd Bergmann Fixes: 575e946125f7 ("Orangefs: change pvfs2 filenames to orangefs") Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-kernel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 0b7ba0496aa3..fe8284045a40 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -770,7 +770,7 @@ do { \ static inline void orangefs_i_size_write(struct inode *inode, loff_t i_size) { #if BITS_PER_LONG == 32 && defined(CONFIG_SMP) - ornagefs_inode_lock(inode); + orangefs_inode_lock(inode); #endif i_size_write(inode, i_size); #if BITS_PER_LONG == 32 && defined(CONFIG_SMP) From 4f20854bf7363cc28d4088f2fa954f1a63b5efce Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Mon, 28 Dec 2015 11:01:51 -0500 Subject: [PATCH 067/174] Orangefs: don't change EXTRAVERSION Stephen Rothwell taught me how to use CONFIG_LOCALVERSION_AUTO so now I can quit putting random things in EXTRAVERSION for my own use and then forgetting to take them out... Signed-off-by: Mike Marshall --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index aca4a73ad069..3a0234f50f36 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 4 SUBLEVEL = 0 -EXTRAVERSION = -rc1-o +EXTRAVERSION = -rc1 NAME = Blurry Fish Butt # *DOCUMENTATION* From f987f4c28a0f9a1dee44ca33a29080859b70f24b Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Wed, 30 Dec 2015 13:04:28 -0500 Subject: [PATCH 068/174] Orangefs: don't trigger copy_attributes_to_inode from d_revalidate. Signed-off-by: Mike Marshall --- fs/orangefs/dcache.c | 48 ++++++++++++-------------------------------- 1 file changed, 13 insertions(+), 35 deletions(-) diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c index 5dd9841df64e..0419981f773e 100644 --- a/fs/orangefs/dcache.c +++ b/fs/orangefs/dcache.c @@ -77,7 +77,7 @@ out_drop: /* * Verify that dentry is valid. * - * Should return 1 if dentry can still be trusted, else 0 + * Should return 1 if dentry can still be trusted, else 0. */ static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags) { @@ -92,49 +92,27 @@ static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags) /* find inode from dentry */ if (!dentry->d_inode) { - gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: negative dentry.\n", + gossip_debug(GOSSIP_DCACHE_DEBUG, + "%s: negative dentry.\n", __func__); - goto invalid_exit; + goto out; } gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: inode valid.\n", __func__); inode = dentry->d_inode; - /* - * first perform a lookup to make sure that the object not only - * exists, but is still in the expected place in the name space - */ - if (!is_root_handle(inode)) { - if (!orangefs_revalidate_lookup(dentry)) - goto invalid_exit; - } else { - gossip_debug(GOSSIP_DCACHE_DEBUG, - "%s: root handle, lookup skipped.\n", - __func__); + /* skip root handle lookups. */ + if (is_root_handle(inode)) { + ret = 1; + goto out; } - /* now perform getattr */ - gossip_debug(GOSSIP_DCACHE_DEBUG, - "%s: doing getattr: inode: %p, handle: %pU\n", - __func__, - inode, - get_khandle_from_ino(inode)); - ret = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT); - gossip_debug(GOSSIP_DCACHE_DEBUG, - "%s: getattr %s (ret = %d), returning %s for dentry i_count=%d\n", - __func__, - (ret == 0 ? "succeeded" : "failed"), - ret, - (ret == 0 ? "valid" : "INVALID"), - atomic_read(&inode->i_count)); - if (ret != 0) - goto invalid_exit; + /* lookup the object. */ + if (orangefs_revalidate_lookup(dentry)) + ret = 1; - /* dentry is valid! */ - return 1; - -invalid_exit: - return 0; +out: + return ret; } const struct dentry_operations orangefs_dentry_operations = { From acaca36dd94d1bfe381a7425984991a06ba58f53 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 1 Jan 2016 10:01:52 +0100 Subject: [PATCH 069/174] OrangeFS: constify export_operations structures This export_operations structure is never modified, so declare it as const. Most other structures of this type are already const. Done with the help of Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: Mike Marshall --- fs/orangefs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 52bc522ea21c..bee67b37d805 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -342,7 +342,7 @@ out: return type; } -static struct export_operations orangefs_export_ops = { +static const struct export_operations orangefs_export_ops = { .encode_fh = orangefs_encode_fh, .fh_to_dentry = orangefs_fh_to_dentry, }; From c146c0b87f7cef247744a649f8c1d794d18bfcb7 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sat, 2 Jan 2016 23:04:47 +0100 Subject: [PATCH 070/174] orangefs: Don't pollute global namespace Prefix public functions with "orangefs_" do don't pollute the global namespace. This fixes a build issue on UML which also has block_signals(). Signed-off-by: Richard Weinberger Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-kernel.h | 4 ++-- fs/orangefs/orangefs-utils.c | 4 ++-- fs/orangefs/waitqueue.c | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index fe8284045a40..0c7a9cf9b8ef 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -576,9 +576,9 @@ void orangefs_op_initialize(struct orangefs_kernel_op_s *op); void orangefs_make_bad_inode(struct inode *inode); -void block_signals(sigset_t *); +void orangefs_block_signals(sigset_t *); -void set_signals(sigset_t *); +void orangefs_set_signals(sigset_t *); int orangefs_unmount_sb(struct super_block *sb); diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c index fa2a46521b7a..f21233201ce3 100644 --- a/fs/orangefs/orangefs-utils.c +++ b/fs/orangefs/orangefs-utils.c @@ -633,7 +633,7 @@ void orangefs_make_bad_inode(struct inode *inode) } /* Block all blockable signals... */ -void block_signals(sigset_t *orig_sigset) +void orangefs_block_signals(sigset_t *orig_sigset) { sigset_t mask; @@ -648,7 +648,7 @@ void block_signals(sigset_t *orig_sigset) } /* set the signal mask to the given template... */ -void set_signals(sigset_t *sigset) +void orangefs_set_signals(sigset_t *sigset) { sigprocmask(SIG_SETMASK, sigset, NULL); } diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index 856a4b48fe23..e1415e3882ba 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -80,7 +80,7 @@ retry_servicing: /* mask out signals if this operation is not to be interrupted */ if (!(flags & ORANGEFS_OP_INTERRUPTIBLE)) - block_signals(&orig_sigset); + orangefs_block_signals(&orig_sigset); if (!(flags & ORANGEFS_OP_NO_SEMAPHORE)) { ret = mutex_lock_interruptible(&request_mutex); @@ -90,7 +90,7 @@ retry_servicing: */ if (ret < 0) { if (!(flags & ORANGEFS_OP_INTERRUPTIBLE)) - set_signals(&orig_sigset); + orangefs_set_signals(&orig_sigset); op->downcall.status = ret; gossip_debug(GOSSIP_WAIT_DEBUG, "orangefs: service_operation interrupted.\n"); @@ -160,7 +160,7 @@ retry_servicing: } if (!(flags & ORANGEFS_OP_INTERRUPTIBLE)) - set_signals(&orig_sigset); + orangefs_set_signals(&orig_sigset); BUG_ON(ret != op->downcall.status); /* retry if operation has not been serviced and if requested */ From 7d2214858f137ff5fe20d0fdc2823c12b4b54f46 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Mon, 4 Jan 2016 15:05:28 -0500 Subject: [PATCH 071/174] orangefs: Fix some more global namespace pollution. This only changes the names of things, so there is no functional change. Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 4 ++-- fs/orangefs/dir.c | 23 ++++++++++++++--------- fs/orangefs/orangefs-bufmap.c | 10 +++++----- fs/orangefs/orangefs-bufmap.h | 6 +++--- fs/orangefs/orangefs-dev-proto.h | 32 +++++--------------------------- fs/orangefs/waitqueue.c | 8 ++++---- 6 files changed, 33 insertions(+), 50 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index 5a9c53eb115f..e3bb15e344ed 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -600,7 +600,7 @@ static int orangefs_devreq_release(struct inode *inode, struct file *file) __func__); mutex_lock(&devreq_mutex); - if (get_bufmap_init()) + if (orangefs_get_bufmap_init()) orangefs_bufmap_finalize(); open_access_count--; @@ -693,7 +693,7 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg) (struct ORANGEFS_dev_map_desc __user *) arg, sizeof(struct ORANGEFS_dev_map_desc)); - if (get_bufmap_init()) { + if (orangefs_get_bufmap_init()) { return -EINVAL; } else { return ret ? diff --git a/fs/orangefs/dir.c b/fs/orangefs/dir.c index c043894fc2bd..58558e37fb8a 100644 --- a/fs/orangefs/dir.c +++ b/fs/orangefs/dir.c @@ -52,7 +52,11 @@ static long decode_dirents(char *ptr, size_t size, readdir->dirent_array[i].d_name = buf + 4; readdir->dirent_array[i].d_length = len; - len = roundup8(4 + len + 1); + /* + * Round 4 + len + 1, which is the encoded size plus the string + * plus the null terminator to the nearest eight byte boundry. + */ + len = ((4 + len + 1) + 7) & ~7; if (size < len + 16) goto Einval; size -= len + 16; @@ -109,7 +113,7 @@ static void readdir_handle_dtor(struct orangefs_bufmap *bufmap, rhandle->readdir_response.dirent_array = NULL; if (rhandle->buffer_index >= 0) { - readdir_index_put(bufmap, rhandle->buffer_index); + orangefs_readdir_index_put(bufmap, rhandle->buffer_index); rhandle->buffer_index = -1; } if (rhandle->dents_buf) { @@ -175,7 +179,8 @@ static int orangefs_readdir(struct file *file, struct dir_context *ctx) new_op->uses_shared_memory = 1; new_op->upcall.req.readdir.refn = orangefs_inode->refn; - new_op->upcall.req.readdir.max_dirent_count = MAX_DIRENT_COUNT_READDIR; + new_op->upcall.req.readdir.max_dirent_count = + ORANGEFS_MAX_DIRENT_COUNT_READDIR; gossip_debug(GOSSIP_DIR_DEBUG, "%s: upcall.req.readdir.refn.khandle: %pU\n", @@ -185,9 +190,9 @@ static int orangefs_readdir(struct file *file, struct dir_context *ctx) new_op->upcall.req.readdir.token = *ptoken; get_new_buffer_index: - ret = readdir_index_get(&bufmap, &buffer_index); + ret = orangefs_readdir_index_get(&bufmap, &buffer_index); if (ret < 0) { - gossip_lerr("orangefs_readdir: readdir_index_get() failure (%d)\n", + gossip_lerr("orangefs_readdir: orangefs_readdir_index_get() failure (%d)\n", ret); goto out_free_op; } @@ -211,14 +216,14 @@ get_new_buffer_index: gossip_debug(GOSSIP_DIR_DEBUG, "%s: Getting new buffer_index for retry of readdir..\n", __func__); - readdir_index_put(bufmap, buffer_index); + orangefs_readdir_index_put(bufmap, buffer_index); goto get_new_buffer_index; } if (ret == -EIO && op_state_purged(new_op)) { gossip_err("%s: Client is down. Aborting readdir call.\n", __func__); - readdir_index_put(bufmap, buffer_index); + orangefs_readdir_index_put(bufmap, buffer_index); goto out_free_op; } @@ -226,7 +231,7 @@ get_new_buffer_index: gossip_debug(GOSSIP_DIR_DEBUG, "Readdir request failed. Status:%d\n", new_op->downcall.status); - readdir_index_put(bufmap, buffer_index); + orangefs_readdir_index_put(bufmap, buffer_index); if (ret >= 0) ret = new_op->downcall.status; goto out_free_op; @@ -241,7 +246,7 @@ get_new_buffer_index: gossip_err("orangefs_readdir: Could not decode trailer buffer into a readdir response %d\n", ret); ret = bytes_decoded; - readdir_index_put(bufmap, buffer_index); + orangefs_readdir_index_put(bufmap, buffer_index); goto out_free_op; } diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index 888aa28136ee..15baecb8094d 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -115,14 +115,14 @@ static DECLARE_WAIT_QUEUE_HEAD(bufmap_waitq); static DECLARE_WAIT_QUEUE_HEAD(readdir_waitq); /* - * get_bufmap_init + * orangefs_get_bufmap_init * * If bufmap_init is 1, then the shared memory system, including the * buffer_index_array, is available. Otherwise, it is not. * * returns the value of bufmap_init */ -int get_bufmap_init(void) +int orangefs_get_bufmap_init(void) { return __orangefs_bufmap ? 1 : 0; } @@ -473,7 +473,7 @@ void orangefs_bufmap_put(struct orangefs_bufmap *bufmap, int buffer_index) } /* - * readdir_index_get() + * orangefs_readdir_index_get() * * gets a free descriptor, will sleep until one becomes * available if necessary. @@ -483,7 +483,7 @@ void orangefs_bufmap_put(struct orangefs_bufmap *bufmap, int buffer_index) * * returns 0 on success, -errno on failure */ -int readdir_index_get(struct orangefs_bufmap **mapp, int *buffer_index) +int orangefs_readdir_index_get(struct orangefs_bufmap **mapp, int *buffer_index) { struct orangefs_bufmap *bufmap = orangefs_bufmap_ref(); struct slot_args slargs; @@ -505,7 +505,7 @@ int readdir_index_get(struct orangefs_bufmap **mapp, int *buffer_index) return ret; } -void readdir_index_put(struct orangefs_bufmap *bufmap, int buffer_index) +void orangefs_readdir_index_put(struct orangefs_bufmap *bufmap, int buffer_index) { struct slot_args slargs; diff --git a/fs/orangefs/orangefs-bufmap.h b/fs/orangefs/orangefs-bufmap.h index 112ec33a1b86..dff55e2857c5 100644 --- a/fs/orangefs/orangefs-bufmap.h +++ b/fs/orangefs/orangefs-bufmap.h @@ -15,7 +15,7 @@ int orangefs_bufmap_shift_query(void); int orangefs_bufmap_initialize(struct ORANGEFS_dev_map_desc *user_desc); -int get_bufmap_init(void); +int orangefs_get_bufmap_init(void); void orangefs_bufmap_finalize(void); @@ -23,9 +23,9 @@ int orangefs_bufmap_get(struct orangefs_bufmap **mapp, int *buffer_index); void orangefs_bufmap_put(struct orangefs_bufmap *bufmap, int buffer_index); -int readdir_index_get(struct orangefs_bufmap **mapp, int *buffer_index); +int orangefs_readdir_index_get(struct orangefs_bufmap **mapp, int *buffer_index); -void readdir_index_put(struct orangefs_bufmap *bufmap, int buffer_index); +void orangefs_readdir_index_put(struct orangefs_bufmap *bufmap, int buffer_index); int orangefs_bufmap_copy_from_iovec(struct orangefs_bufmap *bufmap, struct iov_iter *iter, diff --git a/fs/orangefs/orangefs-dev-proto.h b/fs/orangefs/orangefs-dev-proto.h index dc1951dd7045..5a8725a88eac 100644 --- a/fs/orangefs/orangefs-dev-proto.h +++ b/fs/orangefs/orangefs-dev-proto.h @@ -51,35 +51,13 @@ #define ORANGEFS_MAX_DEBUG_ARRAY_LEN 0x00000800 /* - * MAX_DIRENT_COUNT cannot be larger than ORANGEFS_REQ_LIMIT_LISTATTR. - * The value of ORANGEFS_REQ_LIMIT_LISTATTR has been changed from 113 to 60 - * to accomodate an attribute object with mirrored handles. - * MAX_DIRENT_COUNT is replaced by MAX_DIRENT_COUNT_READDIR and - * MAX_DIRENT_COUNT_READDIRPLUS, since readdir doesn't trigger a listattr - * but readdirplus might. -*/ -#define MAX_DIRENT_COUNT_READDIR 0x00000060 -#define MAX_DIRENT_COUNT_READDIRPLUS 0x0000003C + * The maximum number of directory entries in a single request is 96. + * XXX: Why can this not be higher. The client-side code can handle up to 512. + * XXX: What happens if we expect more than the client can return? + */ +#define ORANGEFS_MAX_DIRENT_COUNT_READDIR 96 #include "upcall.h" #include "downcall.h" -/* - * These macros differ from proto macros in that they don't do any - * byte-swappings and are used to ensure that kernel-clientcore interactions - * don't cause any unaligned accesses etc on 64 bit machines - */ -#ifndef roundup4 -#define roundup4(x) (((x)+3) & ~3) -#endif - -#ifndef roundup8 -#define roundup8(x) (((x)+7) & ~7) -#endif - -struct read_write_x { - __s64 off; - __s64 len; -}; - #endif diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index e1415e3882ba..751c3c640a52 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -180,7 +180,7 @@ retry_servicing: goto retry_servicing; /* op uses shared memory */ - if (get_bufmap_init() == 0) { + if (orangefs_get_bufmap_init() == 0) { /* * This operation uses the shared memory system AND * the system is not yet ready. This situation occurs @@ -194,7 +194,7 @@ retry_servicing: "Client core in-service status(%d).\n", is_daemon_in_service()); gossip_debug(GOSSIP_WAIT_DEBUG, "bufmap_init:%d.\n", - get_bufmap_init()); + orangefs_get_bufmap_init()); gossip_debug(GOSSIP_WAIT_DEBUG, "operation's status is 0x%0x.\n", op->op_state); @@ -222,13 +222,13 @@ retry_servicing: ret); gossip_debug(GOSSIP_WAIT_DEBUG, "Is shared memory available? (%d).\n", - get_bufmap_init()); + orangefs_get_bufmap_init()); spin_lock_irqsave(&op->lock, irqflags); finish_wait(&orangefs_bufmap_init_waitq, &wait_entry); spin_unlock_irqrestore(&op->lock, irqflags); - if (get_bufmap_init() == 0) { + if (orangefs_get_bufmap_init() == 0) { gossip_err("%s:The shared memory system has not started in %d seconds after the client core restarted. Aborting user's request(%s).\n", __func__, ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS, From 85096169860199f506ae18acd222d1d870f1ee96 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Mon, 4 Jan 2016 16:38:00 -0500 Subject: [PATCH 072/174] Orangefs: add orangefs to MAINTAINERS Signed-off-by: Mike Marshall --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index e9caa4b28828..4848bd58b478 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7953,6 +7953,14 @@ S: Supported F: fs/overlayfs/ F: Documentation/filesystems/overlayfs.txt +ORANGEFS FILESYSTEM +M: Mike Marshall +L: pvfs2-developers@beowulf-underground.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux.git +S: Supported +F: fs/orangefs/ +F: Documentation/filesystems/orangefs.txt + P54 WIRELESS DRIVER M: Christian Lamparter L: linux-wireless@vger.kernel.org From b3ae4755f561cffd23192cd1fb9648649aa7405e Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Wed, 13 Jan 2016 11:18:12 -0500 Subject: [PATCH 073/174] Orangefs: implement .write_iter Until now, orangefs_devreq_write_iter has just been a wrapper for the old-fashioned orangefs_devreq_writev... linux would call .write_iter with "struct kiocb *iocb" and "struct iov_iter *iter" and .write_iter would just: return pvfs2_devreq_writev(iocb->ki_filp, iter->iov, iter->nr_segs, &iocb->ki_pos); Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 524 +++++++++++++++------------------- 1 file changed, 230 insertions(+), 294 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index e3bb15e344ed..0f01d3edfc2b 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -245,304 +245,240 @@ error: } /* - * Function for writev() callers into the device. Readdir related - * operations have an extra iovec containing info about objects - * contained in directories. + * Function for writev() callers into the device. + * + * Userspace should have written: + * - __u32 version + * - __u32 magic + * - __u64 tag + * - struct orangefs_downcall_s + * - trailer buffer (in the case of READDIR operations) */ -static ssize_t orangefs_devreq_writev(struct file *file, - const struct iovec *iov, - size_t count, - loff_t *offset) -{ - struct orangefs_kernel_op_s *op = NULL; - void *buffer = NULL; - void *ptr = NULL; - unsigned long i = 0; - int num_remaining = MAX_DEV_REQ_DOWNSIZE; - int ret = 0; - /* num elements in iovec without trailer */ - int notrailer_count = 4; - /* - * If there's a trailer, its iov index will be equal to - * notrailer_count. - */ - int trailer_index = notrailer_count; - int payload_size = 0; - int returned_downcall_size = 0; - __s32 magic = 0; - __s32 proto_ver = 0; - __u64 tag = 0; - ssize_t total_returned_size = 0; - - /* - * There will always be at least notrailer_count iovecs, and - * when there's a trailer, one more than notrailer_count. Check - * count's sanity. - */ - if (count != notrailer_count && count != (notrailer_count + 1)) { - gossip_err("%s: count:%zu: notrailer_count :%d:\n", - __func__, - count, - notrailer_count); - return -EPROTO; - } - - - /* Copy the non-trailer iovec data into a device request buffer. */ - buffer = dev_req_alloc(); - if (!buffer) { - gossip_err("%s: dev_req_alloc failed.\n", __func__); - return -ENOMEM; - } - ptr = buffer; - for (i = 0; i < notrailer_count; i++) { - if (iov[i].iov_len > num_remaining) { - gossip_err - ("writev error: Freeing buffer and returning\n"); - dev_req_release(buffer); - return -EMSGSIZE; - } - ret = copy_from_user(ptr, iov[i].iov_base, iov[i].iov_len); - if (ret) { - gossip_err("Failed to copy data from user space\n"); - dev_req_release(buffer); - return -EIO; - } - num_remaining -= iov[i].iov_len; - ptr += iov[i].iov_len; - payload_size += iov[i].iov_len; - } - total_returned_size = payload_size; - - /* these elements are currently 8 byte aligned (8 bytes for (version + - * magic) 8 bytes for tag). If you add another element, either - * make it 8 bytes big, or use get_unaligned when asigning. - */ - ptr = buffer; - proto_ver = *((__s32 *) ptr); /* unused */ - ptr += sizeof(__s32); - - magic = *((__s32 *) ptr); - ptr += sizeof(__s32); - - tag = *((__u64 *) ptr); - ptr += sizeof(__u64); - - if (magic != ORANGEFS_DEVREQ_MAGIC) { - gossip_err("Error: Device magic number does not match.\n"); - dev_req_release(buffer); - return -EPROTO; - } - - op = orangefs_devreq_remove_op(tag); - if (op) { - /* Increase ref count! */ - get_op(op); - - /* calculate the size of the returned downcall. */ - returned_downcall_size = - payload_size - (2 * sizeof(__s32) + sizeof(__u64)); - - /* copy the passed in downcall into the op */ - if (returned_downcall_size == - sizeof(struct orangefs_downcall_s)) { - memcpy(&op->downcall, - ptr, - sizeof(struct orangefs_downcall_s)); - } else { - gossip_err("%s: returned downcall size:%d: \n", - __func__, - returned_downcall_size); - dev_req_release(buffer); - put_op(op); - return -EMSGSIZE; - } - - /* Don't tolerate an unexpected trailer iovec. */ - if ((op->downcall.trailer_size == 0) && - (count != notrailer_count)) { - gossip_err("%s: unexpected trailer iovec.\n", - __func__); - dev_req_release(buffer); - put_op(op); - return -EPROTO; - } - - /* Don't consider the trailer if there's a bad status. */ - if (op->downcall.status != 0) - goto no_trailer; - - /* get the trailer if there is one. */ - if (op->downcall.trailer_size == 0) - goto no_trailer; - - gossip_debug(GOSSIP_DEV_DEBUG, - "%s: op->downcall.trailer_size %lld\n", - __func__, - op->downcall.trailer_size); - - /* - * Bail if we think think there should be a trailer, but - * there's no iovec for it. - */ - if (count != (notrailer_count + 1)) { - gossip_err("%s: trailer_size:%lld: count:%zu:\n", - __func__, - op->downcall.trailer_size, - count); - dev_req_release(buffer); - put_op(op); - return -EPROTO; - } - - /* Verify that trailer_size is accurate. */ - if (op->downcall.trailer_size != iov[trailer_index].iov_len) { - gossip_err("%s: trailer_size:%lld: != iov_len:%zd:\n", - __func__, - op->downcall.trailer_size, - iov[trailer_index].iov_len); - dev_req_release(buffer); - put_op(op); - return -EMSGSIZE; - } - - total_returned_size += iov[trailer_index].iov_len; - - /* - * Allocate a buffer, copy the trailer bytes into it and - * attach it to the downcall. - */ - op->downcall.trailer_buf = vmalloc(iov[trailer_index].iov_len); - if (op->downcall.trailer_buf != NULL) { - gossip_debug(GOSSIP_DEV_DEBUG, "vmalloc: %p\n", - op->downcall.trailer_buf); - ret = copy_from_user(op->downcall.trailer_buf, - iov[trailer_index].iov_base, - iov[trailer_index].iov_len); - if (ret) { - gossip_err("%s: Failed to copy trailer.\n", - __func__); - dev_req_release(buffer); - gossip_debug(GOSSIP_DEV_DEBUG, - "vfree: %p\n", - op->downcall.trailer_buf); - vfree(op->downcall.trailer_buf); - op->downcall.trailer_buf = NULL; - put_op(op); - return -EIO; - } - } else { - gossip_err("writev: could not vmalloc for trailer!\n"); - dev_req_release(buffer); - put_op(op); - return -ENOMEM; - } - -no_trailer: - - /* if this operation is an I/O operation we need to wait - * for all data to be copied before we can return to avoid - * buffer corruption and races that can pull the buffers - * out from under us. - * - * Essentially we're synchronizing with other parts of the - * vfs implicitly by not allowing the user space - * application reading/writing this device to return until - * the buffers are done being used. - */ - if (op->upcall.type == ORANGEFS_VFS_OP_FILE_IO) { - int timed_out = 0; - DEFINE_WAIT(wait_entry); - - /* - * tell the vfs op waiting on a waitqueue - * that this op is done - */ - spin_lock(&op->lock); - set_op_state_serviced(op); - spin_unlock(&op->lock); - - wake_up_interruptible(&op->waitq); - - while (1) { - spin_lock(&op->lock); - prepare_to_wait_exclusive( - &op->io_completion_waitq, - &wait_entry, - TASK_INTERRUPTIBLE); - if (op->io_completed) { - spin_unlock(&op->lock); - break; - } - spin_unlock(&op->lock); - - if (!signal_pending(current)) { - int timeout = - MSECS_TO_JIFFIES(1000 * - op_timeout_secs); - if (!schedule_timeout(timeout)) { - gossip_debug(GOSSIP_DEV_DEBUG, - "%s: timed out.\n", - __func__); - timed_out = 1; - break; - } - continue; - } - - gossip_debug(GOSSIP_DEV_DEBUG, - "%s: signal on I/O wait, aborting\n", - __func__); - break; - } - - spin_lock(&op->lock); - finish_wait(&op->io_completion_waitq, &wait_entry); - spin_unlock(&op->lock); - - /* NOTE: for I/O operations we handle releasing the op - * object except in the case of timeout. the reason we - * can't free the op in timeout cases is that the op - * service logic in the vfs retries operations using - * the same op ptr, thus it can't be freed. - */ - if (!timed_out) - op_release(op); - } else { - - /* - * tell the vfs op waiting on a waitqueue that - * this op is done - */ - spin_lock(&op->lock); - set_op_state_serviced(op); - spin_unlock(&op->lock); - /* - * for every other operation (i.e. non-I/O), we need to - * wake up the callers for downcall completion - * notification - */ - wake_up_interruptible(&op->waitq); - } - } else { - /* ignore downcalls that we're not interested in */ - gossip_debug(GOSSIP_DEV_DEBUG, - "WARNING: No one's waiting for tag %llu\n", - llu(tag)); - } - /* put_op? */ - dev_req_release(buffer); - - return total_returned_size; -} - static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb, struct iov_iter *iter) { - return orangefs_devreq_writev(iocb->ki_filp, - iter->iov, - iter->nr_segs, - &iocb->ki_pos); + ssize_t ret; + struct orangefs_kernel_op_s *op = NULL; + struct { + __u32 version; + __u32 magic; + __u64 tag; + } head; + int total = ret = iov_iter_count(iter); + int n; + int downcall_size = sizeof(struct orangefs_downcall_s); + int head_size = sizeof(head); + + gossip_debug(GOSSIP_DEV_DEBUG, "%s: total:%d: ret:%zd:\n", + __func__, + total, + ret); + + if (total < MAX_DEV_REQ_DOWNSIZE) { + gossip_err("%s: total:%d: must be at least:%lu:\n", + __func__, + total, + MAX_DEV_REQ_DOWNSIZE); + ret = -EFAULT; + goto out; + } + + n = copy_from_iter(&head, head_size, iter); + if (n < head_size) { + gossip_err("%s: failed to copy head.\n", __func__); + ret = -EFAULT; + goto out; + } + + if (head.version < ORANGEFS_MINIMUM_USERSPACE_VERSION) { + gossip_err("%s: userspace claims version" + "%d, minimum version required: %d.\n", + __func__, + head.version, + ORANGEFS_MINIMUM_USERSPACE_VERSION); + ret = -EPROTO; + goto out; + } + + if (head.magic != ORANGEFS_DEVREQ_MAGIC) { + gossip_err("Error: Device magic number does not match.\n"); + ret = -EPROTO; + goto out; + } + + op = orangefs_devreq_remove_op(head.tag); + if (!op) { + gossip_err("WARNING: No one's waiting for tag %llu\n", + llu(head.tag)); + goto out; + } + + get_op(op); /* increase ref count. */ + + n = copy_from_iter(&op->downcall, downcall_size, iter); + if (n != downcall_size) { + gossip_err("%s: failed to copy downcall.\n", __func__); + put_op(op); + ret = -EFAULT; + goto out; + } + + if (op->downcall.status) + goto wakeup; + + /* + * We've successfully peeled off the head and the downcall. + * Something has gone awry if total doesn't equal the + * sum of head_size, downcall_size and trailer_size. + */ + if ((head_size + downcall_size + op->downcall.trailer_size) != total) { + gossip_err("%s: funky write, head_size:%d" + ": downcall_size:%d: trailer_size:%lld" + ": total size:%d:\n", + __func__, + head_size, + downcall_size, + op->downcall.trailer_size, + total); + put_op(op); + ret = -EFAULT; + goto out; + } + + /* Only READDIR operations should have trailers. */ + if ((op->downcall.type != ORANGEFS_VFS_OP_READDIR) && + (op->downcall.trailer_size != 0)) { + gossip_err("%s: %x operation with trailer.", + __func__, + op->downcall.type); + put_op(op); + ret = -EFAULT; + goto out; + } + + /* READDIR operations should always have trailers. */ + if ((op->downcall.type == ORANGEFS_VFS_OP_READDIR) && + (op->downcall.trailer_size == 0)) { + gossip_err("%s: %x operation with no trailer.", + __func__, + op->downcall.type); + put_op(op); + ret = -EFAULT; + goto out; + } + + if (op->downcall.type != ORANGEFS_VFS_OP_READDIR) + goto wakeup; + + op->downcall.trailer_buf = + vmalloc(op->downcall.trailer_size); + if (op->downcall.trailer_buf == NULL) { + gossip_err("%s: failed trailer vmalloc.\n", + __func__); + put_op(op); + ret = -ENOMEM; + goto out; + } + memset(op->downcall.trailer_buf, 0, op->downcall.trailer_size); + n = copy_from_iter(op->downcall.trailer_buf, + op->downcall.trailer_size, + iter); + if (n != op->downcall.trailer_size) { + gossip_err("%s: failed to copy trailer.\n", __func__); + vfree(op->downcall.trailer_buf); + put_op(op); + ret = -EFAULT; + goto out; + } + +wakeup: + + /* + * If this operation is an I/O operation we need to wait + * for all data to be copied before we can return to avoid + * buffer corruption and races that can pull the buffers + * out from under us. + * + * Essentially we're synchronizing with other parts of the + * vfs implicitly by not allowing the user space + * application reading/writing this device to return until + * the buffers are done being used. + */ + if (op->downcall.type == ORANGEFS_VFS_OP_FILE_IO) { + int timed_out = 0; + DEFINE_WAIT(wait_entry); + + /* + * tell the vfs op waiting on a waitqueue + * that this op is done + */ + spin_lock(&op->lock); + set_op_state_serviced(op); + spin_unlock(&op->lock); + + wake_up_interruptible(&op->waitq); + + while (1) { + spin_lock(&op->lock); + prepare_to_wait_exclusive( + &op->io_completion_waitq, + &wait_entry, + TASK_INTERRUPTIBLE); + if (op->io_completed) { + spin_unlock(&op->lock); + break; + } + spin_unlock(&op->lock); + + if (!signal_pending(current)) { + int timeout = + MSECS_TO_JIFFIES(1000 * + op_timeout_secs); + if (!schedule_timeout(timeout)) { + gossip_debug(GOSSIP_DEV_DEBUG, + "%s: timed out.\n", + __func__); + timed_out = 1; + break; + } + continue; + } + + gossip_debug(GOSSIP_DEV_DEBUG, + "%s: signal on I/O wait, aborting\n", + __func__); + break; + } + + spin_lock(&op->lock); + finish_wait(&op->io_completion_waitq, &wait_entry); + spin_unlock(&op->lock); + + /* NOTE: for I/O operations we handle releasing the op + * object except in the case of timeout. the reason we + * can't free the op in timeout cases is that the op + * service logic in the vfs retries operations using + * the same op ptr, thus it can't be freed. + */ + if (!timed_out) + op_release(op); + } else { + /* + * tell the vfs op waiting on a waitqueue that + * this op is done + */ + spin_lock(&op->lock); + set_op_state_serviced(op); + spin_unlock(&op->lock); + /* + * for every other operation (i.e. non-I/O), we need to + * wake up the callers for downcall completion + * notification + */ + wake_up_interruptible(&op->waitq); + } +out: + return ret; } /* Returns whether any FS are still pending remounted */ From c817e266e408538290af06b95f07f6ee2b7d507a Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Wed, 13 Jan 2016 11:29:05 -0500 Subject: [PATCH 074/174] Orangefs: rename orangefs_kernel_op_s.aio_ref_count to just ref_count. The op structure's ref_count member hasn't got anything to do with asynchronous I/O. Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-cache.c | 2 +- fs/orangefs/orangefs-kernel.h | 11 +++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/orangefs/orangefs-cache.c b/fs/orangefs/orangefs-cache.c index b40f5d74aa97..dd4335ff8c10 100644 --- a/fs/orangefs/orangefs-cache.c +++ b/fs/orangefs/orangefs-cache.c @@ -120,7 +120,7 @@ struct orangefs_kernel_op_s *op_alloc(__s32 type) init_waitqueue_head(&new_op->waitq); init_waitqueue_head(&new_op->io_completion_waitq); - atomic_set(&new_op->aio_ref_count, 0); + atomic_set(&new_op->ref_count, 0); orangefs_op_initialize(new_op); diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 0c7a9cf9b8ef..1c87e0bbdfe8 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -115,7 +115,7 @@ enum orangefs_vfs_op_states { #define get_op(op) \ do { \ - atomic_inc(&(op)->aio_ref_count); \ + atomic_inc(&(op)->ref_count); \ gossip_debug(GOSSIP_DEV_DEBUG, \ "(get) Alloced OP (%p:%llu)\n", \ op, \ @@ -124,7 +124,7 @@ enum orangefs_vfs_op_states { #define put_op(op) \ do { \ - if (atomic_sub_and_test(1, &(op)->aio_ref_count) == 1) { \ + if (atomic_sub_and_test(1, &(op)->ref_count) == 1) { \ gossip_debug(GOSSIP_DEV_DEBUG, \ "(put) Releasing OP (%p:%llu)\n", \ op, \ @@ -133,7 +133,7 @@ enum orangefs_vfs_op_states { } \ } while (0) -#define op_wait(op) (atomic_read(&(op)->aio_ref_count) <= 2 ? 0 : 1) +#define op_wait(op) (atomic_read(&(op)->ref_count) <= 2 ? 0 : 1) /* * Defines for controlling whether I/O upcalls are for async or sync operations @@ -239,14 +239,13 @@ struct orangefs_kernel_op_s { int io_completed; wait_queue_head_t io_completion_waitq; + atomic_t ref_count; + /* VFS aio fields */ /* used by the async I/O code to stash the orangefs_kiocb_s structure */ void *priv; - /* used again for the async I/O code for deallocation */ - atomic_t aio_ref_count; - int attempts; struct list_head list; From 4c27b327b8c286cd3091e5d9ff1650573601140b Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Wed, 13 Jan 2016 11:34:59 -0500 Subject: [PATCH 075/174] Orangefs: change ORANGEFS_VERSION from "Unknown" to "upstream" Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-mod.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c index fa2fca6dca7c..cac52a9175db 100644 --- a/fs/orangefs/orangefs-mod.c +++ b/fs/orangefs/orangefs-mod.c @@ -14,7 +14,7 @@ /* ORANGEFS_VERSION is a ./configure define */ #ifndef ORANGEFS_VERSION -#define ORANGEFS_VERSION "Unknown" +#define ORANGEFS_VERSION "upstream" #endif /* From 569dbfc6b3a0e71118bc81f5f0fb56c3d1b88c54 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Wed, 13 Jan 2016 11:36:25 -0500 Subject: [PATCH 076/174] Orangefs: define a minimum compatible userspace version. Signed-off-by: Mike Marshall --- fs/orangefs/protocol.h | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/orangefs/protocol.h b/fs/orangefs/protocol.h index 03bbe7505a35..56dd65abb908 100644 --- a/fs/orangefs/protocol.h +++ b/fs/orangefs/protocol.h @@ -399,6 +399,7 @@ enum { * space. Zero signifies the upstream version of the kernel module. */ #define ORANGEFS_KERNEL_PROTO_VERSION 0 +#define ORANGEFS_MINIMUM_USERSPACE_VERSION 20904 /* * describes memory regions to map in the ORANGEFS_DEV_MAP ioctl. From be57366e14d8341f5d2b589d5b59151895afe210 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Wed, 13 Jan 2016 11:38:14 -0500 Subject: [PATCH 077/174] Orangefs: make .statfs gossip_debug more complete. Signed-off-by: Mike Marshall --- fs/orangefs/super.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index bee67b37d805..a32981239ea6 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -158,11 +158,15 @@ static int orangefs_statfs(struct dentry *dentry, struct kstatfs *buf) goto out_op_release; gossip_debug(GOSSIP_SUPER_DEBUG, - "orangefs_statfs: got %ld blocks available | " - "%ld blocks total | %ld block size\n", + "%s: got %ld blocks available | " + "%ld blocks total | %ld block size | " + "%ld files total | %ld files avail\n", + __func__, (long)new_op->downcall.resp.statfs.blocks_avail, (long)new_op->downcall.resp.statfs.blocks_total, - (long)new_op->downcall.resp.statfs.block_size); + (long)new_op->downcall.resp.statfs.block_size, + (long)new_op->downcall.resp.statfs.files_total, + (long)new_op->downcall.resp.statfs.files_avail); buf->f_type = sb->s_magic; memcpy(&buf->f_fsid, &ORANGEFS_SB(sb)->fs_id, sizeof(buf->f_fsid)); From fcac9d571567e8bf952616f4a271eea5b4b407ea Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Wed, 13 Jan 2016 14:28:13 -0500 Subject: [PATCH 078/174] Orangefs: add protocol information to Documentation/filesystems/orangefs.txt Signed-off-by: Mike Marshall --- Documentation/filesystems/orangefs.txt | 218 ++++++++++++++++++++++++- 1 file changed, 217 insertions(+), 1 deletion(-) diff --git a/Documentation/filesystems/orangefs.txt b/Documentation/filesystems/orangefs.txt index ec9c8416427e..925a53e52097 100644 --- a/Documentation/filesystems/orangefs.txt +++ b/Documentation/filesystems/orangefs.txt @@ -115,7 +115,7 @@ The following mount options are accepted: DEBUGGING ========= -If you want the debug (GOSSIP) statments in a particular +If you want the debug (GOSSIP) statements in a particular source file (inode.c for example) go to syslog: echo inode > /sys/kernel/debug/orangefs/kernel-debug @@ -135,3 +135,219 @@ All debugging: Get a list of all debugging keywords: cat /sys/kernel/debug/orangefs/debug-help + + +PROTOCOL BETWEEN KERNEL MODULE AND USERSPACE +============================================ + +Orangefs is a user space filesystem and an associated kernel module. +We'll just refer to the user space part of Orangefs as "userspace" +from here on out. Orangefs descends from PVFS, and userspace code +still uses PVFS for function and variable names. Userspace typedefs +many of the important structures. Function and variable names in +the kernel module have been transitioned to "orangefs", and The Linux +Coding Style avoids typedefs, so kernel module structures that +correspond to userspace structures are not typedefed. + +The kernel module implements a pseudo device that userspace +can read from and write to. Userspace can also manipulate the +kernel module through the pseudo device with ioctl. + +THE BUFMAP: + +At startup userspace allocates two page-size-aligned (posix_memalign) +mlocked memory buffers, one is used for IO and one is used for readdir +operations. The IO buffer is 41943040 bytes and the readdir buffer is +4194304 bytes. Each buffer contains logical chunks, or partitions, and +a pointer to each buffer is added to its own PVFS_dev_map_desc structure +which also describes its total size, as well as the size and number of +the partitions. + +A pointer to the IO buffer's PVFS_dev_map_desc structure is sent to a +mapping routine in the kernel module with an ioctl. The structure is +copied from user space to kernel space with copy_from_user and is used +to initialize the kernel module's "bufmap" (struct orangefs_bufmap), which +then contains: + + * refcnt - a reference counter + * desc_size - PVFS2_BUFMAP_DEFAULT_DESC_SIZE (4194304) - the IO buffer's + partition size, which represents the filesystem's block size and + is used for s_blocksize in super blocks. + * desc_count - PVFS2_BUFMAP_DEFAULT_DESC_COUNT (10) - the number of + partitions in the IO buffer. + * desc_shift - log2(desc_size), used for s_blocksize_bits in super blocks. + * total_size - the total size of the IO buffer. + * page_count - the number of 4096 byte pages in the IO buffer. + * page_array - a pointer to page_count * (sizeof(struct page*)) bytes + of kcalloced memory. This memory is used as an array of pointers + to each of the pages in the IO buffer through a call to get_user_pages. + * desc_array - a pointer to desc_count * (sizeof(struct orangefs_bufmap_desc)) + bytes of kcalloced memory. This memory is further intialized: + + user_desc is the kernel's copy of the IO buffer's ORANGEFS_dev_map_desc + structure. user_desc->ptr points to the IO buffer. + + pages_per_desc = bufmap->desc_size / PAGE_SIZE + offset = 0 + + bufmap->desc_array[0].page_array = &bufmap->page_array[offset] + bufmap->desc_array[0].array_count = pages_per_desc = 1024 + bufmap->desc_array[0].uaddr = (user_desc->ptr) + (0 * 1024 * 4096) + offset += 1024 + . + . + . + bufmap->desc_array[9].page_array = &bufmap->page_array[offset] + bufmap->desc_array[9].array_count = pages_per_desc = 1024 + bufmap->desc_array[9].uaddr = (user_desc->ptr) + + (9 * 1024 * 4096) + offset += 1024 + + * buffer_index_array - a desc_count sized array of ints, used to + indicate which of the IO buffer's partitions are available to use. + * buffer_index_lock - a spinlock to protect buffer_index_array during update. + * readdir_index_array - a five (ORANGEFS_READDIR_DEFAULT_DESC_COUNT) element + int array used to indicate which of the readdir buffer's partitions are + available to use. + * readdir_index_lock - a spinlock to protect readdir_index_array during + update. + +OPERATIONS: + +The kernel module builds an "op" (struct orangefs_kernel_op_s) when it +needs to communicate with userspace. Part of the op contains the "upcall" +which expresses the request to userspace. Part of the op eventually +contains the "downcall" which expresses the results of the request. + +The slab allocator is used to keep a cache of op structures handy. + +The life cycle of a typical op goes like this: + + - obtain and initialize an op structure from the op_cache. + + - queue the op to the pvfs device so that its upcall data can be + read by userspace. + + - wait for userspace to write downcall data back to the pvfs device. + + - consume the downcall and return the op struct to the op_cache. + +Some ops are atypical with respect to their payloads: readdir and io ops. + + - readdir ops use the smaller of the two pre-allocated pre-partitioned + memory buffers. The readdir buffer is only available to userspace. + The kernel module obtains an index to a free partition before launching + a readdir op. Userspace deposits the results into the indexed partition + and then writes them to back to the pvfs device. + + - io (read and write) ops use the larger of the two pre-allocated + pre-partitioned memory buffers. The IO buffer is accessible from + both userspace and the kernel module. The kernel module obtains an + index to a free partition before launching an io op. The kernel module + deposits write data into the indexed partition, to be consumed + directly by userspace. Userspace deposits the results of read + requests into the indexed partition, to be consumed directly + by the kernel module. + +Responses to kernel requests are all packaged in pvfs2_downcall_t +structs. Besides a few other members, pvfs2_downcall_t contains a +union of structs, each of which is associated with a particular +response type. + +The several members outside of the union are: + - int32_t type - type of operation. + - int32_t status - return code for the operation. + - int64_t trailer_size - 0 unless readdir operation. + - char *trailer_buf - initialized to NULL, used during readdir operations. + +The appropriate member inside the union is filled out for any +particular response. + + PVFS2_VFS_OP_FILE_IO + fill a pvfs2_io_response_t + + PVFS2_VFS_OP_LOOKUP + fill a PVFS_object_kref + + PVFS2_VFS_OP_CREATE + fill a PVFS_object_kref + + PVFS2_VFS_OP_SYMLINK + fill a PVFS_object_kref + + PVFS2_VFS_OP_GETATTR + fill in a PVFS_sys_attr_s (tons of stuff the kernel doesn't need) + fill in a string with the link target when the object is a symlink. + + PVFS2_VFS_OP_MKDIR + fill a PVFS_object_kref + + PVFS2_VFS_OP_STATFS + fill a pvfs2_statfs_response_t with useless info . It is hard for + us to know, in a timely fashion, these statistics about our + distributed network filesystem. + + PVFS2_VFS_OP_FS_MOUNT + fill a pvfs2_fs_mount_response_t which is just like a PVFS_object_kref + except its members are in a different order and "__pad1" is replaced + with "id". + + PVFS2_VFS_OP_GETXATTR + fill a pvfs2_getxattr_response_t + + PVFS2_VFS_OP_LISTXATTR + fill a pvfs2_listxattr_response_t + + PVFS2_VFS_OP_PARAM + fill a pvfs2_param_response_t + + PVFS2_VFS_OP_PERF_COUNT + fill a pvfs2_perf_count_response_t + + PVFS2_VFS_OP_FSKEY + file a pvfs2_fs_key_response_t + + PVFS2_VFS_OP_READDIR + jamb everything needed to represent a pvfs2_readdir_response_t into + the readdir buffer descriptor specified in the upcall. + +writev() on /dev/pvfs2-req is used to pass responses to the requests +made by the kernel side. + +A buffer_list containing: + - a pointer to the prepared response to the request from the + kernel (struct pvfs2_downcall_t). + - and also, in the case of a readdir request, a pointer to a + buffer containing descriptors for the objects in the target + directory. +... is sent to the function (PINT_dev_write_list) which performs +the writev. + +PINT_dev_write_list has a local iovec array: struct iovec io_array[10]; + +The first four elements of io_array are initialized like this for all +responses: + + io_array[0].iov_base = address of local variable "proto_ver" (int32_t) + io_array[0].iov_len = sizeof(int32_t) + + io_array[1].iov_base = address of global variable "pdev_magic" (int32_t) + io_array[1].iov_len = sizeof(int32_t) + + io_array[2].iov_base = address of parameter "tag" (PVFS_id_gen_t) + io_array[2].iov_len = sizeof(int64_t) + + io_array[3].iov_base = address of out_downcall member (pvfs2_downcall_t) + of global variable vfs_request (vfs_request_t) + io_array[3].iov_len = sizeof(pvfs2_downcall_t) + +Readdir responses initialize the fifth element io_array like this: + + io_array[4].iov_base = contents of member trailer_buf (char *) + from out_downcall member of global variable + vfs_request + io_array[4].iov_len = contents of member trailer_size (PVFS_size) + from out_downcall member of global variable + vfs_request + + From 1808f8cc6cb2842c53147eccfd5e88044d0d22a6 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Fri, 15 Jan 2016 13:10:52 -0500 Subject: [PATCH 079/174] Orangefs: add verification to decode_dirents Also add comments to decode_dirents and make it more readable. Signed-off-by: Mike Marshall --- fs/orangefs/dir.c | 118 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 93 insertions(+), 25 deletions(-) diff --git a/fs/orangefs/dir.c b/fs/orangefs/dir.c index 58558e37fb8a..6f5836d6a7a3 100644 --- a/fs/orangefs/dir.c +++ b/fs/orangefs/dir.c @@ -15,7 +15,14 @@ struct readdir_handle_s { }; /* - * decode routine needed by kmod to make sense of the shared page for readdirs. + * decode routine used by kmod to deal with the blob sent from + * userspace for readdirs. The blob contains zero or more of these + * sub-blobs: + * __u32 - represents length of the character string that follows. + * string - between 1 and ORANGEFS_NAME_MAX bytes long. + * padding - (if needed) to cause the __u32 plus the string to be + * eight byte aligned. + * khandle - sizeof(khandle) bytes. */ static long decode_dirents(char *ptr, size_t size, struct orangefs_readdir_response_s *readdir) @@ -24,54 +31,115 @@ static long decode_dirents(char *ptr, size_t size, struct orangefs_readdir_response_s *rd = (struct orangefs_readdir_response_s *) ptr; char *buf = ptr; + int khandle_size = sizeof(struct orangefs_khandle); + size_t offset = offsetof(struct orangefs_readdir_response_s, + dirent_array); + /* 8 reflects eight byte alignment */ + int smallest_blob = khandle_size + 8; + __u32 len; + int aligned_len; + int sizeof_u32 = sizeof(__u32); + long ret; - if (size < offsetof(struct orangefs_readdir_response_s, dirent_array)) - return -EINVAL; + gossip_debug(GOSSIP_DIR_DEBUG, "%s: size:%zu:\n", __func__, size); + + /* size is = offset on empty dirs, > offset on non-empty dirs... */ + if (size < offset) { + gossip_err("%s: size:%zu: offset:%zu:\n", + __func__, + size, + offset); + ret = -EINVAL; + goto out; + } + + if ((size == offset) && (readdir->orangefs_dirent_outcount != 0)) { + gossip_err("%s: size:%zu: dirent_outcount:%d:\n", + __func__, + size, + readdir->orangefs_dirent_outcount); + ret = -EINVAL; + goto out; + } readdir->token = rd->token; readdir->orangefs_dirent_outcount = rd->orangefs_dirent_outcount; readdir->dirent_array = kcalloc(readdir->orangefs_dirent_outcount, sizeof(*readdir->dirent_array), GFP_KERNEL); - if (readdir->dirent_array == NULL) - return -ENOMEM; + if (readdir->dirent_array == NULL) { + gossip_err("%s: kcalloc failed.\n", __func__); + ret = -ENOMEM; + goto out; + } - buf += offsetof(struct orangefs_readdir_response_s, dirent_array); - size -= offsetof(struct orangefs_readdir_response_s, dirent_array); + buf += offset; + size -= offset; for (i = 0; i < readdir->orangefs_dirent_outcount; i++) { - __u32 len; - - if (size < 4) - goto Einval; + if (size < smallest_blob) { + gossip_err("%s: size:%zu: smallest_blob:%d:\n", + __func__, + size, + smallest_blob); + ret = -EINVAL; + goto free; + } len = *(__u32 *)buf; - if (len >= (unsigned)-24) - goto Einval; + if ((len < 1) || (len > ORANGEFS_NAME_MAX)) { + gossip_err("%s: len:%d:\n", __func__, len); + ret = -EINVAL; + goto free; + } - readdir->dirent_array[i].d_name = buf + 4; + gossip_debug(GOSSIP_DIR_DEBUG, + "%s: size:%zu: len:%d:\n", + __func__, + size, + len); + + readdir->dirent_array[i].d_name = buf + sizeof_u32; readdir->dirent_array[i].d_length = len; /* - * Round 4 + len + 1, which is the encoded size plus the string - * plus the null terminator to the nearest eight byte boundry. + * Calculate "aligned" length of this string and its + * associated __u32 descriptor. */ - len = ((4 + len + 1) + 7) & ~7; - if (size < len + 16) - goto Einval; - size -= len + 16; + aligned_len = ((sizeof_u32 + len + 1) + 7) & ~7; + gossip_debug(GOSSIP_DIR_DEBUG, + "%s: aligned_len:%d:\n", + __func__, + aligned_len); - buf += len; + /* + * The end of the blob should coincide with the end + * of the last sub-blob. + */ + if (size < aligned_len + khandle_size) { + gossip_err("%s: ran off the end of the blob.\n", + __func__); + ret = -EINVAL; + goto free; + } + size -= aligned_len + khandle_size; + + buf += aligned_len; readdir->dirent_array[i].khandle = *(struct orangefs_khandle *) buf; - buf += 16; + buf += khandle_size; } - return buf - ptr; -Einval: + ret = buf - ptr; + gossip_debug(GOSSIP_DIR_DEBUG, "%s: returning:%ld:\n", __func__, ret); + goto out; + +free: kfree(readdir->dirent_array); readdir->dirent_array = NULL; - return -EINVAL; + +out: + return ret; } static long readdir_handle_ctor(struct readdir_handle_s *rhandle, void *buf, From cf0c27715bd640628d39421f3d232c87d7e08954 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Tue, 19 Jan 2016 12:04:40 -0500 Subject: [PATCH 080/174] Orangefs: make gossip statement more palatable to xtensa Thanks to Intel's kbuild test robot Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 4 ++-- fs/orangefs/orangefs-kernel.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index 0f01d3edfc2b..5da5ef616b85 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -275,10 +275,10 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb, ret); if (total < MAX_DEV_REQ_DOWNSIZE) { - gossip_err("%s: total:%d: must be at least:%lu:\n", + gossip_err("%s: total:%d: must be at least:%u:\n", __func__, total, - MAX_DEV_REQ_DOWNSIZE); + (unsigned int) MAX_DEV_REQ_DOWNSIZE); ret = -EFAULT; goto out; } diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 1c87e0bbdfe8..6dcc38a5f117 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -75,9 +75,9 @@ #define ORANGEFS_MAX_MOUNT_OPT_LEN 0x00000080 #define ORANGEFS_MAX_FSKEY_LEN 64 -#define MAX_DEV_REQ_UPSIZE (2*sizeof(__s32) + \ +#define MAX_DEV_REQ_UPSIZE (2 * sizeof(__s32) + \ sizeof(__u64) + sizeof(struct orangefs_upcall_s)) -#define MAX_DEV_REQ_DOWNSIZE (2*sizeof(__s32) + \ +#define MAX_DEV_REQ_DOWNSIZE (2 * sizeof(__s32) + \ sizeof(__u64) + sizeof(struct orangefs_downcall_s)) /* borrowed from irda.h */ From 3e1dd9aa8228cdbbf604006b179efc9312001fb3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 19 Jan 2016 11:33:40 -0500 Subject: [PATCH 081/174] orangefs: use DEFINE_MUTEX (and mutex_init() had been too late) Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-mod.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c index cac52a9175db..7434fa036328 100644 --- a/fs/orangefs/orangefs-mod.c +++ b/fs/orangefs/orangefs-mod.c @@ -70,7 +70,7 @@ module_param(op_timeout_secs, int, 0); module_param(slot_timeout_secs, int, 0); /* synchronizes the request device file */ -struct mutex devreq_mutex; +DEFINE_MUTEX(devreq_mutex); /* * Blocks non-priority requests from being queued for servicing. This @@ -78,7 +78,7 @@ struct mutex devreq_mutex; * for now it's only being used to stall the op addition to the request * list */ -struct mutex request_mutex; +DEFINE_MUTEX(request_mutex); /* hash table for storing operations waiting for matching downcall */ struct list_head *htable_ops_in_progress; @@ -160,9 +160,6 @@ static int __init orangefs_init(void) goto cleanup_kiocb; } - mutex_init(&devreq_mutex); - mutex_init(&request_mutex); - htable_ops_in_progress = kcalloc(hash_table_size, sizeof(struct list_head), GFP_KERNEL); if (!htable_ops_in_progress) { From fb6d2526e92e56d3f41bfec45daf1ce09dd59e7b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 19 Jan 2016 12:00:26 -0500 Subject: [PATCH 082/174] orangefs: generic_file_open() is pointless for character devices Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index 5da5ef616b85..fb7f092f94ba 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -85,9 +85,8 @@ static int orangefs_devreq_open(struct inode *inode, struct file *file) mutex_lock(&devreq_mutex); if (open_access_count == 0) { - ret = generic_file_open(inode, file); - if (ret == 0) - open_access_count++; + open_access_count++; + ret = 0; } else { DUMP_DEVICE_ERROR(); } From 83595db05214eb49477b2ffb7d18ce4e7468c776 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 19 Jan 2016 12:03:05 -0500 Subject: [PATCH 083/174] orangefs: ->poll() is only called between successful ->open() and ->release() Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index fb7f092f94ba..4cecc7c4f760 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -966,14 +966,12 @@ static unsigned int orangefs_devreq_poll(struct file *file, { int poll_revent_mask = 0; - if (open_access_count == 1) { - poll_wait(file, &orangefs_request_list_waitq, poll_table); + poll_wait(file, &orangefs_request_list_waitq, poll_table); - spin_lock(&orangefs_request_list_lock); - if (!list_empty(&orangefs_request_list)) - poll_revent_mask |= POLL_IN; - spin_unlock(&orangefs_request_list_lock); - } + spin_lock(&orangefs_request_list_lock); + if (!list_empty(&orangefs_request_list)) + poll_revent_mask |= POLL_IN; + spin_unlock(&orangefs_request_list_lock); return poll_revent_mask; } From 8016387ce78b4c5147241b798cf6d1fa400e4944 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 19 Jan 2016 12:05:47 -0500 Subject: [PATCH 084/174] orangefs: kill ioctl32 rudiments Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index 4cecc7c4f760..456b5189f772 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -898,23 +898,6 @@ static long orangefs_devreq_compat_ioctl(struct file *filp, unsigned int cmd, #endif /* CONFIG_COMPAT is in .config */ -/* - * The following two ioctl32 functions had been refactored into the above - * CONFIG_COMPAT ifdef, but that was an over simplification that was - * not noticed until we tried to compile on power pc... - */ -#if (defined(CONFIG_COMPAT) && !defined(HAVE_REGISTER_IOCTL32_CONVERSION)) || !defined(CONFIG_COMPAT) -static int orangefs_ioctl32_init(void) -{ - return 0; -} - -static void orangefs_ioctl32_cleanup(void) -{ - return; -} -#endif - /* the assigned character device major number */ static int orangefs_dev_major; @@ -924,13 +907,6 @@ static int orangefs_dev_major; */ int orangefs_dev_init(void) { - int ret; - - /* register the ioctl32 sub-system */ - ret = orangefs_ioctl32_init(); - if (ret < 0) - return ret; - /* register orangefs-req device */ orangefs_dev_major = register_chrdev(0, ORANGEFS_REQDEVICE_NAME, @@ -939,7 +915,6 @@ int orangefs_dev_init(void) gossip_debug(GOSSIP_DEV_DEBUG, "Failed to register /dev/%s (error %d)\n", ORANGEFS_REQDEVICE_NAME, orangefs_dev_major); - orangefs_ioctl32_cleanup(); return orangefs_dev_major; } @@ -957,8 +932,6 @@ void orangefs_dev_cleanup(void) gossip_debug(GOSSIP_DEV_DEBUG, "*** /dev/%s character device unregistered ***\n", ORANGEFS_REQDEVICE_NAME); - /* unregister the ioctl32 sub-system */ - orangefs_ioctl32_cleanup(); } static unsigned int orangefs_devreq_poll(struct file *file, From 90e54e36c95536a476db0fe01daa556d647aca2c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 19 Jan 2016 12:07:49 -0500 Subject: [PATCH 085/174] orangefs: ->poll() doesn't need spinlock not just for list_empty()... Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index 456b5189f772..b58fab2a9c26 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -941,10 +941,8 @@ static unsigned int orangefs_devreq_poll(struct file *file, poll_wait(file, &orangefs_request_list_waitq, poll_table); - spin_lock(&orangefs_request_list_lock); if (!list_empty(&orangefs_request_list)) poll_revent_mask |= POLL_IN; - spin_unlock(&orangefs_request_list_lock); return poll_revent_mask; } From fc916da52dde736605137c7d528e2cdec7f81bca Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 19 Jan 2016 12:26:13 -0500 Subject: [PATCH 086/174] orangefs: get rid of macros Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-kernel.h | 41 -------------------------------- fs/orangefs/waitqueue.c | 44 +++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 41 deletions(-) diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 6dcc38a5f117..d9b5b512bd83 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -619,47 +619,6 @@ extern wait_queue_head_t orangefs_bufmap_init_waitq; /* * misc convenience macros */ -#define add_op_to_request_list(op) \ -do { \ - spin_lock(&orangefs_request_list_lock); \ - spin_lock(&op->lock); \ - set_op_state_waiting(op); \ - list_add_tail(&op->list, &orangefs_request_list); \ - spin_unlock(&orangefs_request_list_lock); \ - spin_unlock(&op->lock); \ - wake_up_interruptible(&orangefs_request_list_waitq); \ -} while (0) - -#define add_priority_op_to_request_list(op) \ - do { \ - spin_lock(&orangefs_request_list_lock); \ - spin_lock(&op->lock); \ - set_op_state_waiting(op); \ - \ - list_add(&op->list, &orangefs_request_list); \ - spin_unlock(&orangefs_request_list_lock); \ - spin_unlock(&op->lock); \ - wake_up_interruptible(&orangefs_request_list_waitq); \ -} while (0) - -#define remove_op_from_request_list(op) \ - do { \ - struct list_head *tmp = NULL; \ - struct list_head *tmp_safe = NULL; \ - struct orangefs_kernel_op_s *tmp_op = NULL; \ - \ - spin_lock(&orangefs_request_list_lock); \ - list_for_each_safe(tmp, tmp_safe, &orangefs_request_list) { \ - tmp_op = list_entry(tmp, \ - struct orangefs_kernel_op_s, \ - list); \ - if (tmp_op && (tmp_op == op)) { \ - list_del(&tmp_op->list); \ - break; \ - } \ - } \ - spin_unlock(&orangefs_request_list_lock); \ - } while (0) #define ORANGEFS_OP_INTERRUPTIBLE 1 /* service_operation() is interruptible */ #define ORANGEFS_OP_PRIORITY 2 /* service_operation() is high priority */ diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index 751c3c640a52..4730baf686b2 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -41,6 +41,31 @@ void purge_waiting_ops(void) spin_unlock(&orangefs_request_list_lock); } +static inline void +add_op_to_request_list(struct orangefs_kernel_op_s *op) +{ + spin_lock(&orangefs_request_list_lock); + spin_lock(&op->lock); + set_op_state_waiting(op); + list_add_tail(&op->list, &orangefs_request_list); + spin_unlock(&orangefs_request_list_lock); + spin_unlock(&op->lock); + wake_up_interruptible(&orangefs_request_list_waitq); +} + +static inline +void add_priority_op_to_request_list(struct orangefs_kernel_op_s *op) +{ + spin_lock(&orangefs_request_list_lock); + spin_lock(&op->lock); + set_op_state_waiting(op); + + list_add(&op->list, &orangefs_request_list); + spin_unlock(&orangefs_request_list_lock); + spin_unlock(&op->lock); + wake_up_interruptible(&orangefs_request_list_waitq); +} + /* * submits a ORANGEFS operation and waits for it to complete * @@ -252,6 +277,25 @@ retry_servicing: return ret; } +static inline void remove_op_from_request_list(struct orangefs_kernel_op_s *op) +{ + struct list_head *tmp = NULL; + struct list_head *tmp_safe = NULL; + struct orangefs_kernel_op_s *tmp_op = NULL; + + spin_lock(&orangefs_request_list_lock); + list_for_each_safe(tmp, tmp_safe, &orangefs_request_list) { + tmp_op = list_entry(tmp, + struct orangefs_kernel_op_s, + list); + if (tmp_op && (tmp_op == op)) { + list_del(&tmp_op->list); + break; + } + } + spin_unlock(&orangefs_request_list_lock); +} + void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *op) { /* From 1264ddfdb7afda6f2c994ac30cad925fec346bae Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 21 Jan 2016 21:55:47 -0500 Subject: [PATCH 087/174] orangefs: kill orangefs_inode_s ->list no users... Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-kernel.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index d9b5b512bd83..ab2b9b061996 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -270,9 +270,6 @@ struct orangefs_inode_s { * with this object */ unsigned long pinode_flags; - - /* All allocated orangefs_inode_s objects are chained to a list */ - struct list_head list; }; #define P_ATIME_FLAG 0 From e07db0a2c2e910d6619bfff962d73bd9c886c604 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 21 Jan 2016 22:21:41 -0500 Subject: [PATCH 088/174] make orangefs_clean_up_interrupted_operation() static Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-kernel.h | 1 - fs/orangefs/waitqueue.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index ab2b9b061996..d78f3852bc4d 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -476,7 +476,6 @@ void purge_inprogress_ops(void); */ int wait_for_matching_downcall(struct orangefs_kernel_op_s *op); int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *op); -void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *op); void purge_waiting_ops(void); /* diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index 4730baf686b2..bc86f16c2037 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -296,7 +296,7 @@ static inline void remove_op_from_request_list(struct orangefs_kernel_op_s *op) spin_unlock(&orangefs_request_list_lock); } -void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *op) +static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *op) { /* * handle interrupted cases depending on what state we were in when From b7ae37b09e069a5d8d604caabd6675456a0d89fc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 21 Jan 2016 22:58:58 -0500 Subject: [PATCH 089/174] orangefs: make wait_for_...downcall() static Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-kernel.h | 2 -- fs/orangefs/waitqueue.c | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index d78f3852bc4d..825545a7d167 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -474,8 +474,6 @@ void purge_inprogress_ops(void); /* * defined in waitqueue.c */ -int wait_for_matching_downcall(struct orangefs_kernel_op_s *op); -int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *op); void purge_waiting_ops(void); /* diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index bc86f16c2037..0b04f4197526 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -382,7 +382,7 @@ static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s * operation since client-core seems to be exiting too often * or if we were interrupted. */ -int wait_for_matching_downcall(struct orangefs_kernel_op_s *op) +static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op) { int ret = -EINVAL; DEFINE_WAIT(wait_entry); @@ -488,7 +488,7 @@ int wait_for_matching_downcall(struct orangefs_kernel_op_s *op) * cancellation upcall anyway. the only way to exit this is to either * timeout or have the cancellation be serviced properly. */ -int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *op) +static int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *op) { int ret = -EINVAL; DEFINE_WAIT(wait_entry); From 831d0949799be75ed84c1c6a4541ebcd74edba6c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 21 Jan 2016 23:17:37 -0500 Subject: [PATCH 090/174] orangefs: move wakeups into set_op_state_{serviced,purged}() Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 13 ++++--------- fs/orangefs/orangefs-kernel.h | 12 ++++++++++-- fs/orangefs/orangefs-mod.c | 1 - fs/orangefs/waitqueue.c | 1 - 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index b58fab2a9c26..dadeb381f9fc 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -415,8 +415,6 @@ wakeup: set_op_state_serviced(op); spin_unlock(&op->lock); - wake_up_interruptible(&op->waitq); - while (1) { spin_lock(&op->lock); prepare_to_wait_exclusive( @@ -464,17 +462,14 @@ wakeup: } else { /* * tell the vfs op waiting on a waitqueue that - * this op is done - */ - spin_lock(&op->lock); - set_op_state_serviced(op); - spin_unlock(&op->lock); - /* + * this op is done - * for every other operation (i.e. non-I/O), we need to * wake up the callers for downcall completion * notification */ - wake_up_interruptible(&op->waitq); + spin_lock(&op->lock); + set_op_state_serviced(op); + spin_unlock(&op->lock); } out: return ret; diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 825545a7d167..160c4c6a4d17 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -105,8 +105,16 @@ enum orangefs_vfs_op_states { #define set_op_state_waiting(op) ((op)->op_state = OP_VFS_STATE_WAITING) #define set_op_state_inprogress(op) ((op)->op_state = OP_VFS_STATE_INPROGR) -#define set_op_state_serviced(op) ((op)->op_state = OP_VFS_STATE_SERVICED) -#define set_op_state_purged(op) ((op)->op_state |= OP_VFS_STATE_PURGED) +static inline void set_op_state_serviced(struct orangefs_kernel_op_s *op) +{ + op->op_state = OP_VFS_STATE_SERVICED; + wake_up_interruptible(&op->waitq); +} +static inline void set_op_state_purged(struct orangefs_kernel_op_s *op) +{ + op->op_state |= OP_VFS_STATE_PURGED; + wake_up_interruptible(&op->waitq); +} #define op_state_waiting(op) ((op)->op_state & OP_VFS_STATE_WAITING) #define op_state_in_progress(op) ((op)->op_state & OP_VFS_STATE_INPROGR) diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c index 7434fa036328..d0257f8b8cd3 100644 --- a/fs/orangefs/orangefs-mod.c +++ b/fs/orangefs/orangefs-mod.c @@ -303,7 +303,6 @@ void purge_inprogress_ops(void) get_opname_string(op)); set_op_state_purged(op); spin_unlock(&op->lock); - wake_up_interruptible(&op->waitq); } } } diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index 0b04f4197526..641de05fa739 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -36,7 +36,6 @@ void purge_waiting_ops(void) spin_lock(&op->lock); set_op_state_purged(op); spin_unlock(&op->lock); - wake_up_interruptible(&op->waitq); } spin_unlock(&orangefs_request_list_lock); } From ade3d78104e08809569acef37dc905066d320726 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 21 Jan 2016 22:58:58 -0500 Subject: [PATCH 091/174] orangefs: make wait_for_...downcall() static Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/waitqueue.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index 641de05fa739..a257891dd3ea 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -16,6 +16,9 @@ #include "orangefs-kernel.h" #include "orangefs-bufmap.h" +static int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *); +static int wait_for_matching_downcall(struct orangefs_kernel_op_s *); + /* * What we do in this function is to walk the list of operations that are * present in the request queue and mark them as purged. From 60831949cca782d54bd1f370fbadf17b772d6741 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 21 Jan 2016 23:17:37 -0500 Subject: [PATCH 092/174] orangefs: move wakeups into set_op_state_{serviced,purged}() Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-kernel.h | 36 +++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 160c4c6a4d17..4219b2f9a5ae 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -103,24 +103,6 @@ enum orangefs_vfs_op_states { OP_VFS_STATE_PURGED = 8, }; -#define set_op_state_waiting(op) ((op)->op_state = OP_VFS_STATE_WAITING) -#define set_op_state_inprogress(op) ((op)->op_state = OP_VFS_STATE_INPROGR) -static inline void set_op_state_serviced(struct orangefs_kernel_op_s *op) -{ - op->op_state = OP_VFS_STATE_SERVICED; - wake_up_interruptible(&op->waitq); -} -static inline void set_op_state_purged(struct orangefs_kernel_op_s *op) -{ - op->op_state |= OP_VFS_STATE_PURGED; - wake_up_interruptible(&op->waitq); -} - -#define op_state_waiting(op) ((op)->op_state & OP_VFS_STATE_WAITING) -#define op_state_in_progress(op) ((op)->op_state & OP_VFS_STATE_INPROGR) -#define op_state_serviced(op) ((op)->op_state & OP_VFS_STATE_SERVICED) -#define op_state_purged(op) ((op)->op_state & OP_VFS_STATE_PURGED) - #define get_op(op) \ do { \ atomic_inc(&(op)->ref_count); \ @@ -259,6 +241,24 @@ struct orangefs_kernel_op_s { struct list_head list; }; +#define set_op_state_waiting(op) ((op)->op_state = OP_VFS_STATE_WAITING) +#define set_op_state_inprogress(op) ((op)->op_state = OP_VFS_STATE_INPROGR) +static inline void set_op_state_serviced(struct orangefs_kernel_op_s *op) +{ + op->op_state = OP_VFS_STATE_SERVICED; + wake_up_interruptible(&op->waitq); +} +static inline void set_op_state_purged(struct orangefs_kernel_op_s *op) +{ + op->op_state |= OP_VFS_STATE_PURGED; + wake_up_interruptible(&op->waitq); +} + +#define op_state_waiting(op) ((op)->op_state & OP_VFS_STATE_WAITING) +#define op_state_in_progress(op) ((op)->op_state & OP_VFS_STATE_INPROGR) +#define op_state_serviced(op) ((op)->op_state & OP_VFS_STATE_SERVICED) +#define op_state_purged(op) ((op)->op_state & OP_VFS_STATE_PURGED) + /* per inode private orangefs info */ struct orangefs_inode_s { struct orangefs_object_kref refn; From 96acf9d65e70e0eb2716e3e46c45f4acb8256f1a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 22 Jan 2016 13:34:32 -0500 Subject: [PATCH 093/174] orangefs: nothing should remain in request list and in hash ... otherwise some thread is running in .text that is about to be freed. Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-mod.c | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c index d0257f8b8cd3..bd9fbfe2ccee 100644 --- a/fs/orangefs/orangefs-mod.c +++ b/fs/orangefs/orangefs-mod.c @@ -236,8 +236,6 @@ out: static void __exit orangefs_exit(void) { int i = 0; - struct orangefs_kernel_op_s *cur_op = NULL; - gossip_debug(GOSSIP_INIT_DEBUG, "orangefs: orangefs_exit called\n"); unregister_filesystem(&orangefs_fs_type); @@ -245,27 +243,9 @@ static void __exit orangefs_exit(void) orangefs_sysfs_exit(); fsid_key_table_finalize(); orangefs_dev_cleanup(); - /* clear out all pending upcall op requests */ - spin_lock(&orangefs_request_list_lock); - while (!list_empty(&orangefs_request_list)) { - cur_op = list_entry(orangefs_request_list.next, - struct orangefs_kernel_op_s, - list); - list_del(&cur_op->list); - gossip_debug(GOSSIP_INIT_DEBUG, - "Freeing unhandled upcall request type %d\n", - cur_op->upcall.type); - op_release(cur_op); - } - spin_unlock(&orangefs_request_list_lock); - + BUG_ON(!list_empty(&orangefs_request_list)); for (i = 0; i < hash_table_size; i++) - while (!list_empty(&htable_ops_in_progress[i])) { - cur_op = list_entry(htable_ops_in_progress[i].next, - struct orangefs_kernel_op_s, - list); - op_release(cur_op); - } + BUG_ON(!list_empty(&htable_ops_in_progress[i])); kiocb_cache_finalize(); orangefs_inode_cache_finalize(); From fee25ce12504ff071254fd213055c3f1d3004622 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 22 Jan 2016 19:46:08 -0500 Subject: [PATCH 094/174] orangefs: make sure that reopening pvfs2-req won't overlap with the end of close Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index dadeb381f9fc..92573d9cc17c 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -85,7 +85,7 @@ static int orangefs_devreq_open(struct inode *inode, struct file *file) mutex_lock(&devreq_mutex); if (open_access_count == 0) { - open_access_count++; + open_access_count = 1; ret = 0; } else { DUMP_DEVICE_ERROR(); @@ -533,12 +533,11 @@ static int orangefs_devreq_release(struct inode *inode, struct file *file) if (orangefs_get_bufmap_init()) orangefs_bufmap_finalize(); - open_access_count--; + open_access_count = -1; unmounted = mark_all_pending_mounts(); gossip_debug(GOSSIP_DEV_DEBUG, "ORANGEFS Device Close: Filesystem(s) %s\n", (unmounted ? "UNMOUNTED" : "MOUNTED")); - mutex_unlock(&devreq_mutex); /* * Walk through the list of ops in the request list, mark them @@ -552,6 +551,8 @@ static int orangefs_devreq_release(struct inode *inode, struct file *file) purge_inprogress_ops(); gossip_debug(GOSSIP_DEV_DEBUG, "pvfs2-client-core: device close complete\n"); + open_access_count = 0; + mutex_unlock(&devreq_mutex); return 0; } From ed42fe059389daa35a2aa10ec832e9f8d0a9e59e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 22 Jan 2016 19:47:47 -0500 Subject: [PATCH 095/174] orangefs: hopefully saner op refcounting and locking * create with refcount 1 * make op_release() decrement and free if zero (i.e. old put_op() has become that). * mark when submitter has given up waiting; from that point nobody else can move between the lists, change state, etc. * have daemon read/write_iter grab a reference when picking op and *always* give it up in the end * don't put into hash until we know it's been successfully passed to daemon * move op->lock _lower_ than htab_in_progress_lock (and make sure to take it in purge_inprogress_ops()) Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 112 ++++++++++++++++++---------------- fs/orangefs/file.c | 17 ++---- fs/orangefs/orangefs-cache.c | 5 +- fs/orangefs/orangefs-kernel.h | 46 +++++++------- fs/orangefs/orangefs-mod.c | 2 + fs/orangefs/orangefs-sysfs.c | 28 +++------ fs/orangefs/orangefs-utils.c | 16 ++--- fs/orangefs/waitqueue.c | 24 ++------ 8 files changed, 107 insertions(+), 143 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index 92573d9cc17c..b7a6aa44ce3e 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -43,9 +43,7 @@ static void orangefs_devreq_add_op(struct orangefs_kernel_op_s *op) { int index = hash_func(op->tag, hash_table_size); - spin_lock(&htable_ops_in_progress_lock); list_add_tail(&op->list, &htable_ops_in_progress[index]); - spin_unlock(&htable_ops_in_progress_lock); } static struct orangefs_kernel_op_s *orangefs_devreq_remove_op(__u64 tag) @@ -60,8 +58,9 @@ static struct orangefs_kernel_op_s *orangefs_devreq_remove_op(__u64 tag) next, &htable_ops_in_progress[index], list) { - if (op->tag == tag) { - list_del(&op->list); + if (op->tag == tag && !op_state_purged(op)) { + list_del_init(&op->list); + get_op(op); /* increase ref count. */ spin_unlock(&htable_ops_in_progress_lock); return op; } @@ -127,12 +126,17 @@ static ssize_t orangefs_devreq_read(struct file *file, return -EINVAL; } +restart: /* Get next op (if any) from top of list. */ spin_lock(&orangefs_request_list_lock); list_for_each_entry_safe(op, temp, &orangefs_request_list, list) { __s32 fsid; /* This lock is held past the end of the loop when we break. */ spin_lock(&op->lock); + if (unlikely(op_state_purged(op))) { + spin_unlock(&op->lock); + continue; + } fsid = fsid_of_op(op); if (fsid != ORANGEFS_FS_ID_NULL) { @@ -197,16 +201,10 @@ static ssize_t orangefs_devreq_read(struct file *file, spin_unlock(&orangefs_request_list_lock); return -EAGAIN; } - - /* - * Set the operation to be in progress and move it between lists since - * it has been sent to the client. - */ - set_op_state_inprogress(cur_op); - - list_del(&cur_op->list); + list_del_init(&cur_op->list); + get_op(op); spin_unlock(&orangefs_request_list_lock); - orangefs_devreq_add_op(cur_op); + spin_unlock(&cur_op->lock); /* Push the upcall out. */ @@ -224,6 +222,25 @@ static ssize_t orangefs_devreq_read(struct file *file, if (ret != 0) goto error; + spin_lock(&htable_ops_in_progress_lock); + spin_lock(&cur_op->lock); + if (unlikely(op_state_given_up(cur_op))) { + spin_unlock(&cur_op->lock); + spin_unlock(&htable_ops_in_progress_lock); + op_release(cur_op); + goto restart; + } + + /* + * Set the operation to be in progress and move it between lists since + * it has been sent to the client. + */ + set_op_state_inprogress(cur_op); + orangefs_devreq_add_op(cur_op); + spin_unlock(&cur_op->lock); + spin_unlock(&htable_ops_in_progress_lock); + op_release(cur_op); + /* The client only asks to read one size buffer. */ return MAX_DEV_REQ_UPSIZE; error: @@ -235,11 +252,13 @@ error: gossip_err("orangefs: Failed to copy data to user space\n"); spin_lock(&orangefs_request_list_lock); spin_lock(&cur_op->lock); - set_op_state_waiting(cur_op); - orangefs_devreq_remove_op(cur_op->tag); - list_add(&cur_op->list, &orangefs_request_list); + if (likely(!op_state_given_up(cur_op))) { + set_op_state_waiting(cur_op); + list_add(&cur_op->list, &orangefs_request_list); + } spin_unlock(&cur_op->lock); spin_unlock(&orangefs_request_list_lock); + op_release(cur_op); return -EFAULT; } @@ -278,15 +297,13 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb, __func__, total, (unsigned int) MAX_DEV_REQ_DOWNSIZE); - ret = -EFAULT; - goto out; + return -EFAULT; } n = copy_from_iter(&head, head_size, iter); if (n < head_size) { gossip_err("%s: failed to copy head.\n", __func__); - ret = -EFAULT; - goto out; + return -EFAULT; } if (head.version < ORANGEFS_MINIMUM_USERSPACE_VERSION) { @@ -295,31 +312,26 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb, __func__, head.version, ORANGEFS_MINIMUM_USERSPACE_VERSION); - ret = -EPROTO; - goto out; + return -EPROTO; } if (head.magic != ORANGEFS_DEVREQ_MAGIC) { gossip_err("Error: Device magic number does not match.\n"); - ret = -EPROTO; - goto out; + return -EPROTO; } op = orangefs_devreq_remove_op(head.tag); if (!op) { gossip_err("WARNING: No one's waiting for tag %llu\n", llu(head.tag)); - goto out; + return ret; } - get_op(op); /* increase ref count. */ - n = copy_from_iter(&op->downcall, downcall_size, iter); if (n != downcall_size) { gossip_err("%s: failed to copy downcall.\n", __func__); - put_op(op); ret = -EFAULT; - goto out; + goto Broken; } if (op->downcall.status) @@ -339,9 +351,8 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb, downcall_size, op->downcall.trailer_size, total); - put_op(op); ret = -EFAULT; - goto out; + goto Broken; } /* Only READDIR operations should have trailers. */ @@ -350,9 +361,8 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb, gossip_err("%s: %x operation with trailer.", __func__, op->downcall.type); - put_op(op); ret = -EFAULT; - goto out; + goto Broken; } /* READDIR operations should always have trailers. */ @@ -361,9 +371,8 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb, gossip_err("%s: %x operation with no trailer.", __func__, op->downcall.type); - put_op(op); ret = -EFAULT; - goto out; + goto Broken; } if (op->downcall.type != ORANGEFS_VFS_OP_READDIR) @@ -374,9 +383,8 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb, if (op->downcall.trailer_buf == NULL) { gossip_err("%s: failed trailer vmalloc.\n", __func__); - put_op(op); ret = -ENOMEM; - goto out; + goto Broken; } memset(op->downcall.trailer_buf, 0, op->downcall.trailer_size); n = copy_from_iter(op->downcall.trailer_buf, @@ -385,9 +393,8 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb, if (n != op->downcall.trailer_size) { gossip_err("%s: failed to copy trailer.\n", __func__); vfree(op->downcall.trailer_buf); - put_op(op); ret = -EFAULT; - goto out; + goto Broken; } wakeup: @@ -404,7 +411,6 @@ wakeup: * the buffers are done being used. */ if (op->downcall.type == ORANGEFS_VFS_OP_FILE_IO) { - int timed_out = 0; DEFINE_WAIT(wait_entry); /* @@ -412,7 +418,8 @@ wakeup: * that this op is done */ spin_lock(&op->lock); - set_op_state_serviced(op); + if (!op_state_given_up(op)) + set_op_state_serviced(op); spin_unlock(&op->lock); while (1) { @@ -435,7 +442,6 @@ wakeup: gossip_debug(GOSSIP_DEV_DEBUG, "%s: timed out.\n", __func__); - timed_out = 1; break; } continue; @@ -450,15 +456,6 @@ wakeup: spin_lock(&op->lock); finish_wait(&op->io_completion_waitq, &wait_entry); spin_unlock(&op->lock); - - /* NOTE: for I/O operations we handle releasing the op - * object except in the case of timeout. the reason we - * can't free the op in timeout cases is that the op - * service logic in the vfs retries operations using - * the same op ptr, thus it can't be freed. - */ - if (!timed_out) - op_release(op); } else { /* * tell the vfs op waiting on a waitqueue that @@ -468,11 +465,22 @@ wakeup: * notification */ spin_lock(&op->lock); - set_op_state_serviced(op); + if (!op_state_given_up(op)) + set_op_state_serviced(op); spin_unlock(&op->lock); } out: + op_release(op); return ret; + +Broken: + spin_lock(&op->lock); + if (!op_state_given_up(op)) { + op->downcall.status = ret; + set_op_state_serviced(op); + } + spin_unlock(&op->lock); + goto out; } /* Returns whether any FS are still pending remounted */ diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 171013ae0036..df3404ba60af 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -105,10 +105,9 @@ static ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inod ssize_t ret; new_op = op_alloc(ORANGEFS_VFS_OP_FILE_IO); - if (!new_op) { - ret = -ENOMEM; - goto out; - } + if (!new_op) + return -ENOMEM; + /* synchronous I/O */ new_op->upcall.req.io.async_vfs_io = ORANGEFS_VFS_SYNC_IO; new_op->upcall.req.io.readahead_size = readahead_size; @@ -234,12 +233,9 @@ populate_shared_memory: /* * tell the device file owner waiting on I/O that this read has - * completed and it can return now. in this exact case, on - * wakeup the daemon will free the op, so we *cannot* touch it - * after this. + * completed and it can return now. */ wake_up_daemon_for_return(new_op); - new_op = NULL; out: if (buffer_index >= 0) { @@ -249,10 +245,7 @@ out: __func__, handle, buffer_index); buffer_index = -1; } - if (new_op) { - op_release(new_op); - new_op = NULL; - } + op_release(new_op); return ret; } diff --git a/fs/orangefs/orangefs-cache.c b/fs/orangefs/orangefs-cache.c index dd4335ff8c10..adc3ab013fdf 100644 --- a/fs/orangefs/orangefs-cache.c +++ b/fs/orangefs/orangefs-cache.c @@ -120,7 +120,7 @@ struct orangefs_kernel_op_s *op_alloc(__s32 type) init_waitqueue_head(&new_op->waitq); init_waitqueue_head(&new_op->io_completion_waitq); - atomic_set(&new_op->ref_count, 0); + atomic_set(&new_op->ref_count, 1); orangefs_op_initialize(new_op); @@ -149,14 +149,13 @@ struct orangefs_kernel_op_s *op_alloc(__s32 type) return new_op; } -void op_release(struct orangefs_kernel_op_s *orangefs_op) +void __op_release(struct orangefs_kernel_op_s *orangefs_op) { if (orangefs_op) { gossip_debug(GOSSIP_CACHE_DEBUG, "Releasing OP (%p: %llu)\n", orangefs_op, llu(orangefs_op->tag)); - orangefs_op_initialize(orangefs_op); kmem_cache_free(op_cache, orangefs_op); } else { gossip_err("NULL pointer in op_release\n"); diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 4219b2f9a5ae..f96ec3da6b00 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -94,6 +94,7 @@ sizeof(__u64) + sizeof(struct orangefs_downcall_s)) * serviced - op has matching downcall; ok * purged - op has to start a timer since client-core * exited uncleanly before servicing op + * given up - submitter has given up waiting for it */ enum orangefs_vfs_op_states { OP_VFS_STATE_UNKNOWN = 0, @@ -101,30 +102,9 @@ enum orangefs_vfs_op_states { OP_VFS_STATE_INPROGR = 2, OP_VFS_STATE_SERVICED = 4, OP_VFS_STATE_PURGED = 8, + OP_VFS_STATE_GIVEN_UP = 16, }; -#define get_op(op) \ - do { \ - atomic_inc(&(op)->ref_count); \ - gossip_debug(GOSSIP_DEV_DEBUG, \ - "(get) Alloced OP (%p:%llu)\n", \ - op, \ - llu((op)->tag)); \ - } while (0) - -#define put_op(op) \ - do { \ - if (atomic_sub_and_test(1, &(op)->ref_count) == 1) { \ - gossip_debug(GOSSIP_DEV_DEBUG, \ - "(put) Releasing OP (%p:%llu)\n", \ - op, \ - llu((op)->tag)); \ - op_release(op); \ - } \ - } while (0) - -#define op_wait(op) (atomic_read(&(op)->ref_count) <= 2 ? 0 : 1) - /* * Defines for controlling whether I/O upcalls are for async or sync operations */ @@ -258,6 +238,25 @@ static inline void set_op_state_purged(struct orangefs_kernel_op_s *op) #define op_state_in_progress(op) ((op)->op_state & OP_VFS_STATE_INPROGR) #define op_state_serviced(op) ((op)->op_state & OP_VFS_STATE_SERVICED) #define op_state_purged(op) ((op)->op_state & OP_VFS_STATE_PURGED) +#define op_state_given_up(op) ((op)->op_state & OP_VFS_STATE_GIVEN_UP) + +static inline void get_op(struct orangefs_kernel_op_s *op) +{ + atomic_inc(&op->ref_count); + gossip_debug(GOSSIP_DEV_DEBUG, + "(get) Alloced OP (%p:%llu)\n", op, llu(op->tag)); +} + +void __op_release(struct orangefs_kernel_op_s *op); + +static inline void op_release(struct orangefs_kernel_op_s *op) +{ + if (atomic_dec_and_test(&op->ref_count)) { + gossip_debug(GOSSIP_DEV_DEBUG, + "(put) Releasing OP (%p:%llu)\n", op, llu((op)->tag)); + __op_release(op); + } +} /* per inode private orangefs info */ struct orangefs_inode_s { @@ -459,7 +458,6 @@ int op_cache_initialize(void); int op_cache_finalize(void); struct orangefs_kernel_op_s *op_alloc(__s32 type); char *get_opname_string(struct orangefs_kernel_op_s *new_op); -void op_release(struct orangefs_kernel_op_s *op); int dev_req_cache_initialize(void); int dev_req_cache_finalize(void); @@ -665,11 +663,9 @@ int service_operation(struct orangefs_kernel_op_s *op, do { \ if (!op_state_serviced(new_op)) { \ orangefs_cancel_op_in_progress(new_op->tag); \ - op_release(new_op); \ } else { \ wake_up_daemon_for_return(new_op); \ } \ - new_op = NULL; \ orangefs_bufmap_put(bufmap, buffer_index); \ buffer_index = -1; \ } while (0) diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c index bd9fbfe2ccee..e07874e26372 100644 --- a/fs/orangefs/orangefs-mod.c +++ b/fs/orangefs/orangefs-mod.c @@ -271,6 +271,7 @@ void purge_inprogress_ops(void) struct orangefs_kernel_op_s *op; struct orangefs_kernel_op_s *next; + spin_lock(&htable_ops_in_progress_lock); list_for_each_entry_safe(op, next, &htable_ops_in_progress[i], @@ -284,6 +285,7 @@ void purge_inprogress_ops(void) set_op_state_purged(op); spin_unlock(&op->lock); } + spin_unlock(&htable_ops_in_progress_lock); } } diff --git a/fs/orangefs/orangefs-sysfs.c b/fs/orangefs/orangefs-sysfs.c index 3d360383ea22..83f4053bd11b 100644 --- a/fs/orangefs/orangefs-sysfs.c +++ b/fs/orangefs/orangefs-sysfs.c @@ -773,10 +773,8 @@ static int sysfs_service_op_show(char *kobj_id, char *buf, void *attr) op_alloc_type = ORANGEFS_VFS_OP_PERF_COUNT; new_op = op_alloc(op_alloc_type); - if (!new_op) { - rc = -ENOMEM; - goto out; - } + if (!new_op) + return -ENOMEM; /* Can't do a service_operation if the client is not running... */ rc = is_daemon_in_service(); @@ -931,11 +929,7 @@ out: } } - /* - * if we got ENOMEM, then op_alloc probably failed... - */ - if (rc != -ENOMEM) - op_release(new_op); + op_release(new_op); return rc; @@ -1039,10 +1033,8 @@ static int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr) kobj_id); new_op = op_alloc(ORANGEFS_VFS_OP_PARAM); - if (!new_op) { - rc = -ENOMEM; - goto out; - } + if (!new_op) + return -EINVAL; /* sic */ /* Can't do a service_operation if the client is not running... */ rc = is_daemon_in_service(); @@ -1269,15 +1261,9 @@ static int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr) } out: - /* - * if we got ENOMEM, then op_alloc probably failed... - */ - if (rc == -ENOMEM) - rc = 0; - else - op_release(new_op); + op_release(new_op); - if (rc == 0) + if (rc == -ENOMEM || rc == 0) rc = -EINVAL; return rc; diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c index f21233201ce3..a6117787ee8d 100644 --- a/fs/orangefs/orangefs-utils.c +++ b/fs/orangefs/orangefs-utils.c @@ -429,19 +429,15 @@ int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr) ret = copy_attributes_from_inode(inode, &new_op->upcall.req.setattr.attributes, iattr); - if (ret < 0) { - op_release(new_op); - return ret; - } - - ret = service_operation(new_op, __func__, + if (ret >= 0) { + ret = service_operation(new_op, __func__, get_interruptible_flag(inode)); - gossip_debug(GOSSIP_UTILS_DEBUG, - "orangefs_inode_setattr: returning %d\n", - ret); + gossip_debug(GOSSIP_UTILS_DEBUG, + "orangefs_inode_setattr: returning %d\n", + ret); + } - /* when request is serviced properly, free req op struct */ op_release(new_op); /* diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index a257891dd3ea..2e9468f57981 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -279,25 +279,6 @@ retry_servicing: return ret; } -static inline void remove_op_from_request_list(struct orangefs_kernel_op_s *op) -{ - struct list_head *tmp = NULL; - struct list_head *tmp_safe = NULL; - struct orangefs_kernel_op_s *tmp_op = NULL; - - spin_lock(&orangefs_request_list_lock); - list_for_each_safe(tmp, tmp_safe, &orangefs_request_list) { - tmp_op = list_entry(tmp, - struct orangefs_kernel_op_s, - list); - if (tmp_op && (tmp_op == op)) { - list_del(&tmp_op->list); - break; - } - } - spin_unlock(&orangefs_request_list_lock); -} - static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *op) { /* @@ -334,6 +315,7 @@ static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s } spin_lock(&op->lock); + op->op_state |= OP_VFS_STATE_GIVEN_UP; if (op_state_waiting(op)) { /* @@ -341,7 +323,9 @@ static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s * list. */ spin_unlock(&op->lock); - remove_op_from_request_list(op); + spin_lock(&orangefs_request_list_lock); + list_del(&op->list); + spin_unlock(&orangefs_request_list_lock); gossip_debug(GOSSIP_WAIT_DEBUG, "Interrupted: Removed op %p from request_list\n", op); From e1056a9cc35c878b6615d0fc84d3f338c89a38fa Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Jan 2016 12:26:56 -0500 Subject: [PATCH 096/174] orangefs: remove cargo-culting spin_lock_irqsave() in service_operation() Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/waitqueue.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index 2e9468f57981..b8a2fcbcce64 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -86,9 +86,6 @@ int service_operation(struct orangefs_kernel_op_s *op, sigset_t orig_sigset; int ret = 0; - /* irqflags and wait_entry are only used IF the client-core aborts */ - unsigned long irqflags; - DEFINE_WAIT(wait_entry); op->upcall.tgid = current->tgid; @@ -230,11 +227,9 @@ retry_servicing: * let process sleep for a few seconds so shared * memory system can be initialized. */ - spin_lock_irqsave(&op->lock, irqflags); prepare_to_wait(&orangefs_bufmap_init_waitq, &wait_entry, TASK_INTERRUPTIBLE); - spin_unlock_irqrestore(&op->lock, irqflags); /* * Wait for orangefs_bufmap_initialize() to wake me up @@ -251,9 +246,7 @@ retry_servicing: "Is shared memory available? (%d).\n", orangefs_get_bufmap_init()); - spin_lock_irqsave(&op->lock, irqflags); finish_wait(&orangefs_bufmap_init_waitq, &wait_entry); - spin_unlock_irqrestore(&op->lock, irqflags); if (orangefs_get_bufmap_init() == 0) { gossip_err("%s:The shared memory system has not started in %d seconds after the client core restarted. Aborting user's request(%s).\n", From 70c6ea26ff2d2df420d573f8f0f22853336c0b56 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Jan 2016 13:04:19 -0500 Subject: [PATCH 097/174] orangefs: reduce nesting in wait_for_matching_downcall() reorder if branches... Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/waitqueue.c | 120 ++++++++++++++++++++-------------------- 1 file changed, 59 insertions(+), 61 deletions(-) diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index b8a2fcbcce64..8c07a070e2b6 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -376,79 +376,77 @@ static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op) } spin_unlock(&op->lock); - if (!signal_pending(current)) { + if (unlikely(signal_pending(current))) { + gossip_debug(GOSSIP_WAIT_DEBUG, + "*** %s:" + " operation interrupted by a signal (tag " + "%llu, op %p)\n", + __func__, + llu(op->tag), + op); + orangefs_clean_up_interrupted_operation(op); + ret = -EINTR; + break; + } + + /* + * if this was our first attempt and client-core + * has not purged our operation, we are happy to + * simply wait + */ + spin_lock(&op->lock); + if (op->attempts == 0 && !op_state_purged(op)) { + spin_unlock(&op->lock); + schedule(); + } else { + spin_unlock(&op->lock); /* - * if this was our first attempt and client-core - * has not purged our operation, we are happy to - * simply wait + * subsequent attempts, we retry exactly once + * with timeouts */ - spin_lock(&op->lock); - if (op->attempts == 0 && !op_state_purged(op)) { - spin_unlock(&op->lock); - schedule(); - } else { - spin_unlock(&op->lock); - /* - * subsequent attempts, we retry exactly once - * with timeouts - */ - if (!schedule_timeout(MSECS_TO_JIFFIES - (1000 * op_timeout_secs))) { - gossip_debug(GOSSIP_WAIT_DEBUG, - "*** %s:" - " operation timed out (tag" - " %llu, %p, att %d)\n", - __func__, - llu(op->tag), - op, - op->attempts); - ret = -ETIMEDOUT; - orangefs_clean_up_interrupted_operation - (op); - break; - } - } - spin_lock(&op->lock); - op->attempts++; - /* - * if the operation was purged in the meantime, it - * is better to requeue it afresh but ensure that - * we have not been purged repeatedly. This could - * happen if client-core crashes when an op - * is being serviced, so we requeue the op, client - * core crashes again so we requeue the op, client - * core starts, and so on... - */ - if (op_state_purged(op)) { - ret = (op->attempts < ORANGEFS_PURGE_RETRY_COUNT) ? - -EAGAIN : - -EIO; - spin_unlock(&op->lock); + if (!schedule_timeout(MSECS_TO_JIFFIES + (1000 * op_timeout_secs))) { gossip_debug(GOSSIP_WAIT_DEBUG, "*** %s:" - " operation purged (tag " - "%llu, %p, att %d)\n", + " operation timed out (tag" + " %llu, %p, att %d)\n", __func__, llu(op->tag), op, op->attempts); + ret = -ETIMEDOUT; orangefs_clean_up_interrupted_operation(op); break; } - spin_unlock(&op->lock); - continue; } - - gossip_debug(GOSSIP_WAIT_DEBUG, - "*** %s:" - " operation interrupted by a signal (tag " - "%llu, op %p)\n", - __func__, - llu(op->tag), - op); - orangefs_clean_up_interrupted_operation(op); - ret = -EINTR; - break; + spin_lock(&op->lock); + op->attempts++; + /* + * if the operation was purged in the meantime, it + * is better to requeue it afresh but ensure that + * we have not been purged repeatedly. This could + * happen if client-core crashes when an op + * is being serviced, so we requeue the op, client + * core crashes again so we requeue the op, client + * core starts, and so on... + */ + if (op_state_purged(op)) { + ret = (op->attempts < ORANGEFS_PURGE_RETRY_COUNT) ? + -EAGAIN : + -EIO; + spin_unlock(&op->lock); + gossip_debug(GOSSIP_WAIT_DEBUG, + "*** %s:" + " operation purged (tag " + "%llu, %p, att %d)\n", + __func__, + llu(op->tag), + op, + op->attempts); + orangefs_clean_up_interrupted_operation(op); + break; + } + spin_unlock(&op->lock); } spin_lock(&op->lock); From eab9b38939fae1b7731570478718a5d1b2f28ea9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Jan 2016 13:09:05 -0500 Subject: [PATCH 098/174] orangefs_clean_up_interrupted_operation: call with op->lock held Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/waitqueue.c | 36 ++++-------------------------------- 1 file changed, 4 insertions(+), 32 deletions(-) diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index 8c07a070e2b6..699ffd8b2a51 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -279,35 +279,8 @@ static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s * the interruption is detected. there is a coarse grained lock * across the operation. * - * NOTE: be sure not to reverse lock ordering by locking an op lock - * while holding the request_list lock. Here, we first lock the op - * and then lock the appropriate list. + * Called with op->lock held. */ - if (!op) { - gossip_debug(GOSSIP_WAIT_DEBUG, - "%s: op is null, ignoring\n", - __func__); - return; - } - - /* - * one more sanity check, make sure it's in one of the possible states - * or don't try to cancel it - */ - if (!(op_state_waiting(op) || - op_state_in_progress(op) || - op_state_serviced(op) || - op_state_purged(op))) { - gossip_debug(GOSSIP_WAIT_DEBUG, - "%s: op %p not in a valid state (%0x), " - "ignoring\n", - __func__, - op, - op->op_state); - return; - } - - spin_lock(&op->lock); op->op_state |= OP_VFS_STATE_GIVEN_UP; if (op_state_waiting(op)) { @@ -374,7 +347,6 @@ static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op) ret = 0; break; } - spin_unlock(&op->lock); if (unlikely(signal_pending(current))) { gossip_debug(GOSSIP_WAIT_DEBUG, @@ -394,7 +366,6 @@ static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op) * has not purged our operation, we are happy to * simply wait */ - spin_lock(&op->lock); if (op->attempts == 0 && !op_state_purged(op)) { spin_unlock(&op->lock); schedule(); @@ -415,6 +386,7 @@ static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op) op, op->attempts); ret = -ETIMEDOUT; + spin_lock(&op->lock); orangefs_clean_up_interrupted_operation(op); break; } @@ -434,7 +406,6 @@ static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op) ret = (op->attempts < ORANGEFS_PURGE_RETRY_COUNT) ? -EAGAIN : -EIO; - spin_unlock(&op->lock); gossip_debug(GOSSIP_WAIT_DEBUG, "*** %s:" " operation purged (tag " @@ -481,7 +452,6 @@ static int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *op) ret = 0; break; } - spin_unlock(&op->lock); if (signal_pending(current)) { gossip_debug(GOSSIP_WAIT_DEBUG, @@ -498,6 +468,7 @@ static int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *op) gossip_debug(GOSSIP_WAIT_DEBUG, "%s:About to call schedule_timeout.\n", __func__); + spin_unlock(&op->lock); ret = schedule_timeout(MSECS_TO_JIFFIES(1000 * op_timeout_secs)); @@ -510,6 +481,7 @@ static int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *op) "%s:*** operation timed out: %p\n", __func__, op); + spin_lock(&op->lock); orangefs_clean_up_interrupted_operation(op); ret = -ETIMEDOUT; break; From 727cbfea623b78d46ce8e0f8c931b5189f3fe2e0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Jan 2016 13:17:55 -0500 Subject: [PATCH 099/174] orangefs: get rid of MSECS_TO_JIFFIES All timeouts are in _seconds_, so all calls are of form MSECS_TO_JIFFIES(n * 1000), which is a convoluted way to spell n * HZ. Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 4 +--- fs/orangefs/orangefs-bufmap.c | 4 +--- fs/orangefs/orangefs-kernel.h | 5 ----- fs/orangefs/waitqueue.c | 10 ++++------ 4 files changed, 6 insertions(+), 17 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index b7a6aa44ce3e..d8c436a0aa1b 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -435,9 +435,7 @@ wakeup: spin_unlock(&op->lock); if (!signal_pending(current)) { - int timeout = - MSECS_TO_JIFFIES(1000 * - op_timeout_secs); + int timeout = op_timeout_secs * HZ; if (!schedule_timeout(timeout)) { gossip_debug(GOSSIP_DEV_DEBUG, "%s: timed out.\n", diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index 15baecb8094d..c60019de1fd8 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -377,13 +377,11 @@ static int wait_for_a_slot(struct slot_args *slargs, int *buffer_index) break; if (!signal_pending(current)) { - int timeout = - MSECS_TO_JIFFIES(1000 * slot_timeout_secs); gossip_debug(GOSSIP_BUFMAP_DEBUG, "[BUFMAP]: waiting %d " "seconds for a slot\n", slot_timeout_secs); - if (!schedule_timeout(timeout)) { + if (!schedule_timeout(slot_timeout_secs * HZ)) { gossip_debug(GOSSIP_BUFMAP_DEBUG, "*** wait_for_a_slot timed out\n"); ret = -ETIMEDOUT; diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index f96ec3da6b00..2b72806d0f68 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -80,11 +80,6 @@ sizeof(__u64) + sizeof(struct orangefs_upcall_s)) #define MAX_DEV_REQ_DOWNSIZE (2 * sizeof(__s32) + \ sizeof(__u64) + sizeof(struct orangefs_downcall_s)) -/* borrowed from irda.h */ -#ifndef MSECS_TO_JIFFIES -#define MSECS_TO_JIFFIES(ms) (((ms)*HZ+999)/1000) -#endif - /* * valid orangefs kernel operation states * diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index 699ffd8b2a51..cdbf57bef3eb 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -235,8 +235,8 @@ retry_servicing: * Wait for orangefs_bufmap_initialize() to wake me up * within the allotted time. */ - ret = schedule_timeout(MSECS_TO_JIFFIES - (1000 * ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS)); + ret = schedule_timeout( + ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS * HZ); gossip_debug(GOSSIP_WAIT_DEBUG, "Value returned from schedule_timeout:" @@ -375,8 +375,7 @@ static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op) * subsequent attempts, we retry exactly once * with timeouts */ - if (!schedule_timeout(MSECS_TO_JIFFIES - (1000 * op_timeout_secs))) { + if (!schedule_timeout(op_timeout_secs * HZ)) { gossip_debug(GOSSIP_WAIT_DEBUG, "*** %s:" " operation timed out (tag" @@ -469,8 +468,7 @@ static int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *op) "%s:About to call schedule_timeout.\n", __func__); spin_unlock(&op->lock); - ret = - schedule_timeout(MSECS_TO_JIFFIES(1000 * op_timeout_secs)); + ret = schedule_timeout(op_timeout_secs * HZ); gossip_debug(GOSSIP_WAIT_DEBUG, "%s:Value returned from schedule_timeout(%d).\n", From 4f55e39732ad0bd05d70c88e174e747d55e3685c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Jan 2016 13:27:50 -0500 Subject: [PATCH 100/174] if ORANGEFS_VFS_OP_FILE_IO request had been given up, don't bother waiting ... we are not going to get woken up anyway, so it's just going to time out and whine. Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index d8c436a0aa1b..3879f2b7cf29 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -418,8 +418,11 @@ wakeup: * that this op is done */ spin_lock(&op->lock); - if (!op_state_given_up(op)) - set_op_state_serviced(op); + if (unlikely(op_state_given_up(op))) { + spin_unlock(&op->lock); + goto out; + } + set_op_state_serviced(op); spin_unlock(&op->lock); while (1) { @@ -433,22 +436,19 @@ wakeup: break; } spin_unlock(&op->lock); - - if (!signal_pending(current)) { - int timeout = op_timeout_secs * HZ; - if (!schedule_timeout(timeout)) { - gossip_debug(GOSSIP_DEV_DEBUG, - "%s: timed out.\n", - __func__); - break; - } - continue; + if (unlikely(signal_pending(current))) { + gossip_debug(GOSSIP_DEV_DEBUG, + "%s: signal on I/O wait, aborting\n", + __func__); + break; } - gossip_debug(GOSSIP_DEV_DEBUG, - "%s: signal on I/O wait, aborting\n", - __func__); - break; + if (!schedule_timeout(op_timeout_secs * HZ)) { + gossip_debug(GOSSIP_DEV_DEBUG, + "%s: timed out.\n", + __func__); + break; + } } spin_lock(&op->lock); From 2a9e5c22605f5db6040535b10dce5fbc3a7db3bd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Jan 2016 13:45:46 -0500 Subject: [PATCH 101/174] orangefs: don't reinvent completion.h... Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 75 ++++++++++------------------------- fs/orangefs/file.c | 5 +-- fs/orangefs/orangefs-cache.c | 1 - fs/orangefs/orangefs-kernel.h | 3 +- fs/orangefs/orangefs-utils.c | 2 +- 5 files changed, 24 insertions(+), 62 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index 3879f2b7cf29..812844faa7f5 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -398,6 +398,17 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb, } wakeup: + /* + * tell the vfs op waiting on a waitqueue + * that this op is done + */ + spin_lock(&op->lock); + if (unlikely(op_state_given_up(op))) { + spin_unlock(&op->lock); + goto out; + } + set_op_state_serviced(op); + spin_unlock(&op->lock); /* * If this operation is an I/O operation we need to wait @@ -411,61 +422,17 @@ wakeup: * the buffers are done being used. */ if (op->downcall.type == ORANGEFS_VFS_OP_FILE_IO) { - DEFINE_WAIT(wait_entry); - - /* - * tell the vfs op waiting on a waitqueue - * that this op is done - */ - spin_lock(&op->lock); - if (unlikely(op_state_given_up(op))) { - spin_unlock(&op->lock); - goto out; + long n = wait_for_completion_interruptible_timeout(&op->done, + op_timeout_secs * HZ); + if (unlikely(n < 0)) { + gossip_debug(GOSSIP_DEV_DEBUG, + "%s: signal on I/O wait, aborting\n", + __func__); + } else if (unlikely(n == 0)) { + gossip_debug(GOSSIP_DEV_DEBUG, + "%s: timed out.\n", + __func__); } - set_op_state_serviced(op); - spin_unlock(&op->lock); - - while (1) { - spin_lock(&op->lock); - prepare_to_wait_exclusive( - &op->io_completion_waitq, - &wait_entry, - TASK_INTERRUPTIBLE); - if (op->io_completed) { - spin_unlock(&op->lock); - break; - } - spin_unlock(&op->lock); - if (unlikely(signal_pending(current))) { - gossip_debug(GOSSIP_DEV_DEBUG, - "%s: signal on I/O wait, aborting\n", - __func__); - break; - } - - if (!schedule_timeout(op_timeout_secs * HZ)) { - gossip_debug(GOSSIP_DEV_DEBUG, - "%s: timed out.\n", - __func__); - break; - } - } - - spin_lock(&op->lock); - finish_wait(&op->io_completion_waitq, &wait_entry); - spin_unlock(&op->lock); - } else { - /* - * tell the vfs op waiting on a waitqueue that - * this op is done - - * for every other operation (i.e. non-I/O), we need to - * wake up the callers for downcall completion - * notification - */ - spin_lock(&op->lock); - if (!op_state_given_up(op)) - set_op_state_serviced(op); - spin_unlock(&op->lock); } out: op_release(op); diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index df3404ba60af..7af0adba29aa 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -16,10 +16,7 @@ #define wake_up_daemon_for_return(op) \ do { \ - spin_lock(&op->lock); \ - op->io_completed = 1; \ - spin_unlock(&op->lock); \ - wake_up_interruptible(&op->io_completion_waitq);\ + complete(&op->done); \ } while (0) /* diff --git a/fs/orangefs/orangefs-cache.c b/fs/orangefs/orangefs-cache.c index adc3ab013fdf..90c11a0daf74 100644 --- a/fs/orangefs/orangefs-cache.c +++ b/fs/orangefs/orangefs-cache.c @@ -119,7 +119,6 @@ struct orangefs_kernel_op_s *op_alloc(__s32 type) spin_lock_init(&new_op->lock); init_waitqueue_head(&new_op->waitq); - init_waitqueue_head(&new_op->io_completion_waitq); atomic_set(&new_op->ref_count, 1); orangefs_op_initialize(new_op); diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 2b72806d0f68..58e523c23637 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -201,8 +201,7 @@ struct orangefs_kernel_op_s { wait_queue_head_t waitq; spinlock_t lock; - int io_completed; - wait_queue_head_t io_completion_waitq; + struct completion done; atomic_t ref_count; diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c index a6117787ee8d..ca7edcfae873 100644 --- a/fs/orangefs/orangefs-utils.c +++ b/fs/orangefs/orangefs-utils.c @@ -597,7 +597,7 @@ void orangefs_op_initialize(struct orangefs_kernel_op_s *op) { if (op) { spin_lock(&op->lock); - op->io_completed = 0; + init_completion(&op->done); op->upcall.type = ORANGEFS_VFS_OP_INVALID; op->downcall.type = ORANGEFS_VFS_OP_INVALID; From b0bc3a7b621cb8d7bcce507f323249a7340f4141 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Jan 2016 13:50:37 -0500 Subject: [PATCH 102/174] orangefs: move handle_io_error() to file.c Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/file.c | 47 ++++++++++++++++++++++++++++++----- fs/orangefs/orangefs-kernel.h | 40 ----------------------------- 2 files changed, 41 insertions(+), 46 deletions(-) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 7af0adba29aa..c585063d1100 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -14,11 +14,6 @@ #include #include -#define wake_up_daemon_for_return(op) \ -do { \ - complete(&op->done); \ -} while (0) - /* * Copy to client-core's address space from the buffers specified * by the iovec upto total_size bytes. @@ -87,6 +82,46 @@ static int postcopy_buffers(struct orangefs_bufmap *bufmap, return ret; } +/* + * handles two possible error cases, depending on context. + * + * by design, our vfs i/o errors need to be handled in one of two ways, + * depending on where the error occured. + * + * if the error happens in the waitqueue code because we either timed + * out or a signal was raised while waiting, we need to cancel the + * userspace i/o operation and free the op manually. this is done to + * avoid having the device start writing application data to our shared + * bufmap pages without us expecting it. + * + * FIXME: POSSIBLE OPTIMIZATION: + * However, if we timed out or if we got a signal AND our upcall was never + * picked off the queue (i.e. we were in OP_VFS_STATE_WAITING), then we don't + * need to send a cancellation upcall. The way we can handle this is + * set error_exit to 2 in such cases and 1 whenever cancellation has to be + * sent and have handle_error + * take care of this situation as well.. + * + * if a orangefs sysint level error occured and i/o has been completed, + * there is no need to cancel the operation, as the user has finished + * using the bufmap page and so there is no danger in this case. in + * this case, we wake up the device normally so that it may free the + * op, as normal. + * + * note the only reason this is a macro is because both read and write + * cases need the exact same handling code. + */ +#define handle_io_error() \ +do { \ + if (!op_state_serviced(new_op)) { \ + orangefs_cancel_op_in_progress(new_op->tag); \ + } else { \ + complete(&new_op->done); \ + } \ + orangefs_bufmap_put(bufmap, buffer_index); \ + buffer_index = -1; \ +} while (0) + /* * Post and wait for the I/O upcall to finish */ @@ -232,7 +267,7 @@ populate_shared_memory: * tell the device file owner waiting on I/O that this read has * completed and it can return now. */ - wake_up_daemon_for_return(new_op); + complete(&new_op->done); out: if (buffer_index >= 0) { diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 58e523c23637..e11fc67d7773 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -624,46 +624,6 @@ int service_operation(struct orangefs_kernel_op_s *op, const char *op_name, int flags); -/* - * handles two possible error cases, depending on context. - * - * by design, our vfs i/o errors need to be handled in one of two ways, - * depending on where the error occured. - * - * if the error happens in the waitqueue code because we either timed - * out or a signal was raised while waiting, we need to cancel the - * userspace i/o operation and free the op manually. this is done to - * avoid having the device start writing application data to our shared - * bufmap pages without us expecting it. - * - * FIXME: POSSIBLE OPTIMIZATION: - * However, if we timed out or if we got a signal AND our upcall was never - * picked off the queue (i.e. we were in OP_VFS_STATE_WAITING), then we don't - * need to send a cancellation upcall. The way we can handle this is - * set error_exit to 2 in such cases and 1 whenever cancellation has to be - * sent and have handle_error - * take care of this situation as well.. - * - * if a orangefs sysint level error occured and i/o has been completed, - * there is no need to cancel the operation, as the user has finished - * using the bufmap page and so there is no danger in this case. in - * this case, we wake up the device normally so that it may free the - * op, as normal. - * - * note the only reason this is a macro is because both read and write - * cases need the exact same handling code. - */ -#define handle_io_error() \ -do { \ - if (!op_state_serviced(new_op)) { \ - orangefs_cancel_op_in_progress(new_op->tag); \ - } else { \ - wake_up_daemon_for_return(new_op); \ - } \ - orangefs_bufmap_put(bufmap, buffer_index); \ - buffer_index = -1; \ -} while (0) - #define get_interruptible_flag(inode) \ ((ORANGEFS_SB(inode->i_sb)->flags & ORANGEFS_OPT_INTR) ? \ ORANGEFS_OP_INTERRUPTIBLE : 0) From 115b93a8595c878759c7c1fdbd95fbbeacbe9168 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Jan 2016 14:04:31 -0500 Subject: [PATCH 103/174] orangefs: clean up op_alloc() fold orangefs_op_initialize() in there, don't bother locking something nobody else could've seen yet, use kmem_cache_zalloc() instead of explicit memset()... Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-cache.c | 13 +++++++++---- fs/orangefs/orangefs-kernel.h | 2 -- fs/orangefs/orangefs-utils.c | 16 ---------------- 3 files changed, 9 insertions(+), 22 deletions(-) diff --git a/fs/orangefs/orangefs-cache.c b/fs/orangefs/orangefs-cache.c index 90c11a0daf74..e72ac2083ac0 100644 --- a/fs/orangefs/orangefs-cache.c +++ b/fs/orangefs/orangefs-cache.c @@ -111,17 +111,22 @@ struct orangefs_kernel_op_s *op_alloc(__s32 type) { struct orangefs_kernel_op_s *new_op = NULL; - new_op = kmem_cache_alloc(op_cache, ORANGEFS_CACHE_ALLOC_FLAGS); + new_op = kmem_cache_zalloc(op_cache, ORANGEFS_CACHE_ALLOC_FLAGS); if (new_op) { - memset(new_op, 0, sizeof(struct orangefs_kernel_op_s)); - INIT_LIST_HEAD(&new_op->list); spin_lock_init(&new_op->lock); init_waitqueue_head(&new_op->waitq); atomic_set(&new_op->ref_count, 1); - orangefs_op_initialize(new_op); + init_completion(&new_op->done); + + new_op->upcall.type = ORANGEFS_VFS_OP_INVALID; + new_op->downcall.type = ORANGEFS_VFS_OP_INVALID; + new_op->downcall.status = -1; + + new_op->op_state = OP_VFS_STATE_UNKNOWN; + new_op->tag = 0; /* initialize the op specific tag and upcall credentials */ spin_lock(&next_tag_value_lock); diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index e11fc67d7773..9c876762f825 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -565,8 +565,6 @@ int orangefs_inode_getattr(struct inode *inode, __u32 mask); int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr); -void orangefs_op_initialize(struct orangefs_kernel_op_s *op); - void orangefs_make_bad_inode(struct inode *inode); void orangefs_block_signals(sigset_t *); diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c index ca7edcfae873..92a38b0091f2 100644 --- a/fs/orangefs/orangefs-utils.c +++ b/fs/orangefs/orangefs-utils.c @@ -593,22 +593,6 @@ int orangefs_cancel_op_in_progress(__u64 tag) return ret; } -void orangefs_op_initialize(struct orangefs_kernel_op_s *op) -{ - if (op) { - spin_lock(&op->lock); - init_completion(&op->done); - - op->upcall.type = ORANGEFS_VFS_OP_INVALID; - op->downcall.type = ORANGEFS_VFS_OP_INVALID; - op->downcall.status = -1; - - op->op_state = OP_VFS_STATE_UNKNOWN; - op->tag = 0; - spin_unlock(&op->lock); - } -} - void orangefs_make_bad_inode(struct inode *inode) { if (is_root_handle(inode)) { From 394f647e3ad073dab19ba081501e4a0ca05302c4 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Mon, 25 Jan 2016 15:33:39 -0500 Subject: [PATCH 104/174] orangefs: Util functions shouldn't operate on inode where it can be avoided. Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-utils.c | 82 +++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 39 deletions(-) diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c index 92a38b0091f2..035f050ae0e8 100644 --- a/fs/orangefs/orangefs-utils.c +++ b/fs/orangefs/orangefs-utils.c @@ -78,24 +78,55 @@ __s32 fsid_of_op(struct orangefs_kernel_op_s *op) return fsid; } -static void orangefs_set_inode_flags(struct inode *inode, - struct ORANGEFS_sys_attr_s *attrs) +static int orangefs_inode_flags(struct ORANGEFS_sys_attr_s *attrs) { + int flags = 0; if (attrs->flags & ORANGEFS_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; + flags |= S_IMMUTABLE; else - inode->i_flags &= ~S_IMMUTABLE; - + flags &= ~S_IMMUTABLE; if (attrs->flags & ORANGEFS_APPEND_FL) - inode->i_flags |= S_APPEND; + flags |= S_APPEND; else - inode->i_flags &= ~S_APPEND; - + flags &= ~S_APPEND; if (attrs->flags & ORANGEFS_NOATIME_FL) - inode->i_flags |= S_NOATIME; + flags |= S_NOATIME; else - inode->i_flags &= ~S_NOATIME; + flags &= ~S_NOATIME; + return flags; +} +static int orangefs_inode_perms(struct ORANGEFS_sys_attr_s *attrs) +{ + int perm_mode = 0; + + if (attrs->perms & ORANGEFS_O_EXECUTE) + perm_mode |= S_IXOTH; + if (attrs->perms & ORANGEFS_O_WRITE) + perm_mode |= S_IWOTH; + if (attrs->perms & ORANGEFS_O_READ) + perm_mode |= S_IROTH; + + if (attrs->perms & ORANGEFS_G_EXECUTE) + perm_mode |= S_IXGRP; + if (attrs->perms & ORANGEFS_G_WRITE) + perm_mode |= S_IWGRP; + if (attrs->perms & ORANGEFS_G_READ) + perm_mode |= S_IRGRP; + + if (attrs->perms & ORANGEFS_U_EXECUTE) + perm_mode |= S_IXUSR; + if (attrs->perms & ORANGEFS_U_WRITE) + perm_mode |= S_IWUSR; + if (attrs->perms & ORANGEFS_U_READ) + perm_mode |= S_IRUSR; + + if (attrs->perms & ORANGEFS_G_SGID) + perm_mode |= S_ISGID; + if (attrs->perms & ORANGEFS_U_SUID) + perm_mode |= S_ISUID; + + return perm_mode; } /* NOTE: symname is ignored unless the inode is a sym link */ @@ -104,7 +135,6 @@ static int copy_attributes_to_inode(struct inode *inode, char *symname) { int ret = -1; - int perm_mode = 0; struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); loff_t inode_size = 0; loff_t rounded_up_size = 0; @@ -134,7 +164,7 @@ static int copy_attributes_to_inode(struct inode *inode, switch (attrs->objtype) { case ORANGEFS_TYPE_METAFILE: - orangefs_set_inode_flags(inode, attrs); + inode->i_flags = orangefs_inode_flags(attrs); if (attrs->mask & ORANGEFS_ATTR_SYS_SIZE) { inode_size = (loff_t) attrs->size; rounded_up_size = @@ -179,33 +209,7 @@ static int copy_attributes_to_inode(struct inode *inode, inode->i_mtime.tv_nsec = 0; inode->i_ctime.tv_nsec = 0; - if (attrs->perms & ORANGEFS_O_EXECUTE) - perm_mode |= S_IXOTH; - if (attrs->perms & ORANGEFS_O_WRITE) - perm_mode |= S_IWOTH; - if (attrs->perms & ORANGEFS_O_READ) - perm_mode |= S_IROTH; - - if (attrs->perms & ORANGEFS_G_EXECUTE) - perm_mode |= S_IXGRP; - if (attrs->perms & ORANGEFS_G_WRITE) - perm_mode |= S_IWGRP; - if (attrs->perms & ORANGEFS_G_READ) - perm_mode |= S_IRGRP; - - if (attrs->perms & ORANGEFS_U_EXECUTE) - perm_mode |= S_IXUSR; - if (attrs->perms & ORANGEFS_U_WRITE) - perm_mode |= S_IWUSR; - if (attrs->perms & ORANGEFS_U_READ) - perm_mode |= S_IRUSR; - - if (attrs->perms & ORANGEFS_G_SGID) - perm_mode |= S_ISGID; - if (attrs->perms & ORANGEFS_U_SUID) - perm_mode |= S_ISUID; - - inode->i_mode = perm_mode; + inode->i_mode = orangefs_inode_perms(attrs); if (is_root_handle(inode)) { /* special case: mark the root inode as sticky */ From 99109822f5cbe6d530eb55193b25aa5348f6134d Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Thu, 28 Jan 2016 10:19:40 -0500 Subject: [PATCH 105/174] orangefs: Fix revalidate. Previously, it would update a live inode. This was fixed, but it did not ever check that the inode attributes in the dcache are correct. This checks all inode attributes and rejects any that are not correct, which causes a lookup and thus a new getattr. Perhaps inode_operations->permission should replace or augment some of this. There is no actual caching, and this does a rather excessive amount of network operations back to the filesystem server. Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/dcache.c | 98 ++++++++++++++--------- fs/orangefs/file.c | 4 +- fs/orangefs/inode.c | 6 +- fs/orangefs/orangefs-kernel.h | 2 +- fs/orangefs/orangefs-utils.c | 141 +++++++++++++++++++++++++++++----- 5 files changed, 187 insertions(+), 64 deletions(-) diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c index 0419981f773e..e8fb79de37c6 100644 --- a/fs/orangefs/dcache.c +++ b/fs/orangefs/dcache.c @@ -43,24 +43,34 @@ static int orangefs_revalidate_lookup(struct dentry *dentry) err = service_operation(new_op, "orangefs_lookup", get_interruptible_flag(parent_inode)); - if (err) - goto out_drop; - if (new_op->downcall.status != 0 || - !match_handle(new_op->downcall.resp.lookup.refn.khandle, inode)) { - gossip_debug(GOSSIP_DCACHE_DEBUG, - "%s:%s:%d " - "lookup failure |%s| or no match |%s|.\n", - __FILE__, - __func__, - __LINE__, - new_op->downcall.status ? "true" : "false", - match_handle(new_op->downcall.resp.lookup.refn.khandle, - inode) ? "false" : "true"); - gossip_debug(GOSSIP_DCACHE_DEBUG, - "%s:%s:%d revalidate failed\n", - __FILE__, __func__, __LINE__); - goto out_drop; + /* Positive dentry: reject if error or not the same inode. */ + if (inode) { + if (err) { + gossip_debug(GOSSIP_DCACHE_DEBUG, + "%s:%s:%d lookup failure.\n", + __FILE__, __func__, __LINE__); + goto out_drop; + } + if (!match_handle(new_op->downcall.resp.lookup.refn.khandle, + inode)) { + gossip_debug(GOSSIP_DCACHE_DEBUG, + "%s:%s:%d no match.\n", + __FILE__, __func__, __LINE__); + goto out_drop; + } + + /* Negative dentry: reject if success or error other than ENOENT. */ + } else { + gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: negative dentry.\n", + __func__); + if (!err || err != -ENOENT) { + if (new_op->downcall.status != 0) + gossip_debug(GOSSIP_DCACHE_DEBUG, + "%s:%s:%d lookup failure.\n", + __FILE__, __func__, __LINE__); + goto out_drop; + } } ret = 1; @@ -70,6 +80,8 @@ out_put_parent: dput(parent_dentry); return ret; out_drop: + gossip_debug(GOSSIP_DCACHE_DEBUG, "%s:%s:%d revalidate failed\n", + __FILE__, __func__, __LINE__); d_drop(dentry); goto out_release_op; } @@ -81,8 +93,7 @@ out_drop: */ static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags) { - struct inode *inode; - int ret = 0; + int ret; if (flags & LOOKUP_RCU) return -ECHILD; @@ -90,29 +101,42 @@ static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags) gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: called on dentry %p.\n", __func__, dentry); - /* find inode from dentry */ - if (!dentry->d_inode) { - gossip_debug(GOSSIP_DCACHE_DEBUG, - "%s: negative dentry.\n", - __func__); - goto out; - } - - gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: inode valid.\n", __func__); - inode = dentry->d_inode; - /* skip root handle lookups. */ - if (is_root_handle(inode)) { - ret = 1; - goto out; + if (dentry->d_inode && is_root_handle(dentry->d_inode)) + return 1; + + /* + * If this passes, the positive dentry still exists or the negative + * dentry still does not exist. + */ + if (!orangefs_revalidate_lookup(dentry)) { + d_drop(dentry); + return 0; } - /* lookup the object. */ - if (orangefs_revalidate_lookup(dentry)) - ret = 1; + /* We do not need to continue with negative dentries. */ + if (!dentry->d_inode) + goto out; + + /* Now we must perform a getattr to validate the inode contents. */ + ret = orangefs_inode_getattr(dentry->d_inode, + ORANGEFS_ATTR_SYS_ALL_NOHINT, 1); + if (ret < 0) { + gossip_debug(GOSSIP_DCACHE_DEBUG, "%s:%s:%d getattr failure.\n", + __FILE__, __func__, __LINE__); + d_drop(dentry); + return 0; + } + if (ret == 0) { + d_drop(dentry); + return 0; + } out: - return ret; + gossip_debug(GOSSIP_DCACHE_DEBUG, + "%s: negative dentry or positive dentry and inode valid.\n", + __func__); + return 1; } const struct dentry_operations orangefs_dentry_operations = { diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index c585063d1100..7e6fe8d8ab2b 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -467,7 +467,7 @@ static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *ite /* Make sure generic_write_checks sees an up to date inode size. */ if (file->f_flags & O_APPEND) { rc = orangefs_inode_getattr(file->f_mapping->host, - ORANGEFS_ATTR_SYS_SIZE); + ORANGEFS_ATTR_SYS_SIZE, 0); if (rc) { gossip_err("%s: orangefs_inode_getattr failed, rc:%zd:.\n", __func__, rc); @@ -681,7 +681,7 @@ static loff_t orangefs_file_llseek(struct file *file, loff_t offset, int origin) * NOTE: We are only interested in file size here, * so we set mask accordingly. */ - ret = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_SIZE); + ret = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_SIZE, 0); if (ret) { gossip_debug(GOSSIP_FILE_DEBUG, "%s:%s:%d calling make bad inode\n", diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 4724c92b61ac..040cd95b51c2 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -273,7 +273,7 @@ int orangefs_getattr(struct vfsmount *mnt, * fields/attributes of the inode would be refreshed. So again, we * dont have too much of a choice but refresh all the attributes. */ - ret = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT); + ret = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT, 0); if (ret == 0) { generic_fillattr(inode, kstat); /* override block size reported to stat */ @@ -392,7 +392,7 @@ struct inode *orangefs_iget(struct super_block *sb, struct orangefs_object_kref if (!inode || !(inode->i_state & I_NEW)) return inode; - error = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT); + error = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT, 0); if (error) { iget_failed(inode); return ERR_PTR(error); @@ -437,7 +437,7 @@ struct inode *orangefs_new_inode(struct super_block *sb, struct inode *dir, orangefs_set_inode(inode, ref); inode->i_ino = hash; /* needed for stat etc */ - error = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT); + error = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT, 0); if (error) goto out_iput; diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 9c876762f825..3e258554688d 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -561,7 +561,7 @@ int orangefs_inode_setxattr(struct inode *inode, size_t size, int flags); -int orangefs_inode_getattr(struct inode *inode, __u32 mask); +int orangefs_inode_getattr(struct inode *inode, __u32 mask, int check); int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr); diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c index 035f050ae0e8..6cf29a439211 100644 --- a/fs/orangefs/orangefs-utils.c +++ b/fs/orangefs/orangefs-utils.c @@ -353,11 +353,91 @@ static inline int copy_attributes_from_inode(struct inode *inode, return 0; } +static int compare_attributes_to_inode(struct inode *inode, + struct ORANGEFS_sys_attr_s *attrs, + char *symname) +{ + struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); + loff_t inode_size, rounded_up_size; + + /* Compare file size. */ + + switch (attrs->objtype) { + case ORANGEFS_TYPE_METAFILE: + if(inode->i_flags != orangefs_inode_flags(attrs)) + return 0; + inode_size = attrs->size; + rounded_up_size = inode_size + (4096 - (inode_size % 4096)); + if (inode->i_bytes != inode_size || + inode->i_blocks != rounded_up_size/512) + return 0; + break; + case ORANGEFS_TYPE_SYMLINK: + if (symname && strlen(symname) != inode->i_size) + return 0; + break; + default: + if (inode->i_size != PAGE_CACHE_SIZE && + inode_get_bytes(inode) != PAGE_CACHE_SIZE) + return 0; + } + + /* Compare general attributes. */ + + if (!uid_eq(inode->i_uid, make_kuid(&init_user_ns, attrs->owner)) || + !gid_eq(inode->i_gid, make_kgid(&init_user_ns, attrs->group)) || + inode->i_atime.tv_sec != attrs->atime || + inode->i_mtime.tv_sec != attrs->mtime || + inode->i_ctime.tv_sec != attrs->ctime || + inode->i_atime.tv_nsec != 0 || + inode->i_mtime.tv_nsec != 0 || + inode->i_ctime.tv_nsec != 0) + return 0; + + if ((inode->i_mode & ~(S_ISVTX|S_IFREG|S_IFDIR|S_IFLNK)) != + orangefs_inode_perms(attrs)) + return 0; + + if (is_root_handle(inode)) + if (!(inode->i_mode & S_ISVTX)) + return 0; + + /* Compare file type. */ + + switch (attrs->objtype) { + case ORANGEFS_TYPE_METAFILE: + if (!(inode->i_mode & S_IFREG)) + return 0; + break; + case ORANGEFS_TYPE_DIRECTORY: + if (!(inode->i_mode & S_IFDIR)) + return 0; + if (inode->i_nlink != 1) + return 0; + break; + case ORANGEFS_TYPE_SYMLINK: + if (!(inode->i_mode & S_IFLNK)) + return 0; + if (orangefs_inode && symname) + if (strcmp(orangefs_inode->link_target, symname)) + return 0; + break; + default: + gossip_err("orangefs: compare_attributes_to_inode: got invalid attribute type %x\n", + attrs->objtype); + + } + + return 1; +} + /* - * issues a orangefs getattr request and fills in the appropriate inode - * attributes if successful. returns 0 on success; -errno otherwise + * Issues a orangefs getattr request and fills in the appropriate inode + * attributes if successful. When check is 0, returns 0 on success and -errno + * otherwise. When check is 1, returns 1 on success where the inode is valid + * and 0 on success where the inode is stale and -errno otherwise. */ -int orangefs_inode_getattr(struct inode *inode, __u32 getattr_mask) +int orangefs_inode_getattr(struct inode *inode, __u32 getattr_mask, int check) { struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); struct orangefs_kernel_op_s *new_op; @@ -379,27 +459,46 @@ int orangefs_inode_getattr(struct inode *inode, __u32 getattr_mask) if (ret != 0) goto out; - if (copy_attributes_to_inode(inode, - &new_op->downcall.resp.getattr.attributes, - new_op->downcall.resp.getattr.link_target)) { - gossip_err("%s: failed to copy attributes\n", __func__); - ret = -ENOENT; - goto out; - } + if (check) { + ret = compare_attributes_to_inode(inode, + &new_op->downcall.resp.getattr.attributes, + new_op->downcall.resp.getattr.link_target); - /* - * Store blksize in orangefs specific part of inode structure; we are - * only going to use this to report to stat to make sure it doesn't - * perturb any inode related code paths. - */ - if (new_op->downcall.resp.getattr.attributes.objtype == - ORANGEFS_TYPE_METAFILE) { - orangefs_inode->blksize = - new_op->downcall.resp.getattr.attributes.blksize; + if (new_op->downcall.resp.getattr.attributes.objtype == + ORANGEFS_TYPE_METAFILE) { + if (orangefs_inode->blksize != + new_op->downcall.resp.getattr.attributes.blksize) + ret = 0; + } else { + if (orangefs_inode->blksize != 1 << inode->i_blkbits) + ret = 0; + } } else { - /* mimic behavior of generic_fillattr() for other types. */ - orangefs_inode->blksize = (1 << inode->i_blkbits); + if (copy_attributes_to_inode(inode, + &new_op->downcall.resp.getattr.attributes, + new_op->downcall.resp.getattr.link_target)) { + gossip_err("%s: failed to copy attributes\n", __func__); + ret = -ENOENT; + goto out; + } + /* + * Store blksize in orangefs specific part of inode structure; + * we are only going to use this to report to stat to make sure + * it doesn't perturb any inode related code paths. + */ + if (new_op->downcall.resp.getattr.attributes.objtype == + ORANGEFS_TYPE_METAFILE) { + orangefs_inode->blksize = new_op->downcall.resp. + getattr.attributes.blksize; + } else { + /* + * mimic behavior of generic_fillattr() for other file + * types. + */ + orangefs_inode->blksize = (1 << inode->i_blkbits); + + } } out: From 5090c9670de03511834bc894cfc9737e3d61a414 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Thu, 4 Feb 2016 13:29:27 -0500 Subject: [PATCH 106/174] Orangefs: improve gossip statement There were two just alike, making it hard maybe to tell which one you were looking at in syslog... so I changed it a little by adding some extra interesting tidbits to it... Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index 812844faa7f5..37278f5878b3 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -145,8 +145,11 @@ restart: ret = fs_mount_pending(fsid); if (ret == 1) { gossip_debug(GOSSIP_DEV_DEBUG, - "orangefs: skipping op tag %llu %s\n", - llu(op->tag), get_opname_string(op)); + "%s: mount pending, skipping op tag " + "%llu %s\n", + __func__, + llu(op->tag), + get_opname_string(op)); spin_unlock(&op->lock); continue; /* From 2d4cae0d175acae2ea2efbc17b52b71d4ffd886d Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Thu, 4 Feb 2016 13:48:16 -0500 Subject: [PATCH 107/174] Orangefs: clean up slab allocation. A couple of caches were no longer needed: - iov_iter improvements to orangefs_devreq_write_iter eliminated the need for the dev_req_cache. - removal (months ago) of the old AIO code eliminated the need for the kiocb_cache. Also, deobfuscation of use of GFP_KERNEL when calling kmem_cache_(z)alloc for remaining caches. Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-cache.c | 92 +---------------------------------- fs/orangefs/orangefs-kernel.h | 15 +----- fs/orangefs/orangefs-mod.c | 20 +------- fs/orangefs/super.c | 3 +- 4 files changed, 6 insertions(+), 124 deletions(-) diff --git a/fs/orangefs/orangefs-cache.c b/fs/orangefs/orangefs-cache.c index e72ac2083ac0..3b3de91406ca 100644 --- a/fs/orangefs/orangefs-cache.c +++ b/fs/orangefs/orangefs-cache.c @@ -16,12 +16,6 @@ static DEFINE_SPINLOCK(next_tag_value_lock); /* a cache for orangefs upcall/downcall operations */ static struct kmem_cache *op_cache; -/* a cache for device (/dev/pvfs2-req) communication */ -static struct kmem_cache *dev_req_cache; - -/* a cache for orangefs_kiocb objects (i.e orangefs iocb structures ) */ -static struct kmem_cache *orangefs_kiocb_cache; - int op_cache_initialize(void) { op_cache = kmem_cache_create("orangefs_op_cache", @@ -111,7 +105,7 @@ struct orangefs_kernel_op_s *op_alloc(__s32 type) { struct orangefs_kernel_op_s *new_op = NULL; - new_op = kmem_cache_zalloc(op_cache, ORANGEFS_CACHE_ALLOC_FLAGS); + new_op = kmem_cache_zalloc(op_cache, GFP_KERNEL); if (new_op) { INIT_LIST_HEAD(&new_op->list); spin_lock_init(&new_op->lock); @@ -148,7 +142,7 @@ struct orangefs_kernel_op_s *op_alloc(__s32 type) new_op->upcall.gid = from_kgid(current_user_ns(), current_fsgid()); } else { - gossip_err("op_alloc: kmem_cache_alloc failed!\n"); + gossip_err("op_alloc: kmem_cache_zalloc failed!\n"); } return new_op; } @@ -165,85 +159,3 @@ void __op_release(struct orangefs_kernel_op_s *orangefs_op) gossip_err("NULL pointer in op_release\n"); } } - -int dev_req_cache_initialize(void) -{ - dev_req_cache = kmem_cache_create("orangefs_devreqcache", - MAX_DEV_REQ_DOWNSIZE, - 0, - ORANGEFS_CACHE_CREATE_FLAGS, - NULL); - - if (!dev_req_cache) { - gossip_err("Cannot create orangefs_dev_req_cache\n"); - return -ENOMEM; - } - return 0; -} - -int dev_req_cache_finalize(void) -{ - kmem_cache_destroy(dev_req_cache); - return 0; -} - -void *dev_req_alloc(void) -{ - void *buffer; - - buffer = kmem_cache_alloc(dev_req_cache, ORANGEFS_CACHE_ALLOC_FLAGS); - if (buffer == NULL) - gossip_err("Failed to allocate from dev_req_cache\n"); - else - memset(buffer, 0, sizeof(MAX_DEV_REQ_DOWNSIZE)); - return buffer; -} - -void dev_req_release(void *buffer) -{ - if (buffer) - kmem_cache_free(dev_req_cache, buffer); - else - gossip_err("NULL pointer passed to dev_req_release\n"); -} - -int kiocb_cache_initialize(void) -{ - orangefs_kiocb_cache = kmem_cache_create("orangefs_kiocbcache", - sizeof(struct orangefs_kiocb_s), - 0, - ORANGEFS_CACHE_CREATE_FLAGS, - NULL); - - if (!orangefs_kiocb_cache) { - gossip_err("Cannot create orangefs_kiocb_cache!\n"); - return -ENOMEM; - } - return 0; -} - -int kiocb_cache_finalize(void) -{ - kmem_cache_destroy(orangefs_kiocb_cache); - return 0; -} - -struct orangefs_kiocb_s *kiocb_alloc(void) -{ - struct orangefs_kiocb_s *x = NULL; - - x = kmem_cache_alloc(orangefs_kiocb_cache, ORANGEFS_CACHE_ALLOC_FLAGS); - if (x == NULL) - gossip_err("kiocb_alloc: kmem_cache_alloc failed!\n"); - else - memset(x, 0, sizeof(struct orangefs_kiocb_s)); - return x; -} - -void kiocb_release(struct orangefs_kiocb_s *x) -{ - if (x) - kmem_cache_free(orangefs_kiocb_cache, x); - else - gossip_err("kiocb_release: kmem_cache_free NULL pointer!\n"); -} diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 3e258554688d..d4db96223dac 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -128,7 +128,6 @@ struct client_debug_mask { #define ORANGEFS_CACHE_CREATE_FLAGS 0 #endif /* ((defined ORANGEFS_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB)) */ -#define ORANGEFS_CACHE_ALLOC_FLAGS (GFP_KERNEL) #define ORANGEFS_GFP_FLAGS (GFP_KERNEL) #define ORANGEFS_BUFMAP_GFP_FLAGS (GFP_KERNEL) @@ -207,9 +206,6 @@ struct orangefs_kernel_op_s { /* VFS aio fields */ - /* used by the async I/O code to stash the orangefs_kiocb_s structure */ - void *priv; - int attempts; struct list_head list; @@ -217,6 +213,7 @@ struct orangefs_kernel_op_s { #define set_op_state_waiting(op) ((op)->op_state = OP_VFS_STATE_WAITING) #define set_op_state_inprogress(op) ((op)->op_state = OP_VFS_STATE_INPROGR) +#define set_op_state_given_up(op) ((op)->op_state = OP_VFS_STATE_GIVEN_UP) static inline void set_op_state_serviced(struct orangefs_kernel_op_s *op) { op->op_state = OP_VFS_STATE_SERVICED; @@ -453,19 +450,9 @@ int op_cache_finalize(void); struct orangefs_kernel_op_s *op_alloc(__s32 type); char *get_opname_string(struct orangefs_kernel_op_s *new_op); -int dev_req_cache_initialize(void); -int dev_req_cache_finalize(void); -void *dev_req_alloc(void); -void dev_req_release(void *); - int orangefs_inode_cache_initialize(void); int orangefs_inode_cache_finalize(void); -int kiocb_cache_initialize(void); -int kiocb_cache_finalize(void); -struct orangefs_kiocb_s *kiocb_alloc(void); -void kiocb_release(struct orangefs_kiocb_s *ptr); - /* * defined in orangefs-mod.c */ diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c index e07874e26372..7639ab2df711 100644 --- a/fs/orangefs/orangefs-mod.c +++ b/fs/orangefs/orangefs-mod.c @@ -140,24 +140,16 @@ static int __init orangefs_init(void) if (ret < 0) goto err; - ret = dev_req_cache_initialize(); - if (ret < 0) - goto cleanup_op; - ret = orangefs_inode_cache_initialize(); if (ret < 0) - goto cleanup_req; - - ret = kiocb_cache_initialize(); - if (ret < 0) - goto cleanup_inode; + goto cleanup_op; /* Initialize the orangefsdev subsystem. */ ret = orangefs_dev_init(); if (ret < 0) { gossip_err("orangefs: could not initialize device subsystem %d!\n", ret); - goto cleanup_kiocb; + goto cleanup_inode; } htable_ops_in_progress = @@ -214,15 +206,9 @@ cleanup_progress_table: cleanup_device: orangefs_dev_cleanup(); -cleanup_kiocb: - kiocb_cache_finalize(); - cleanup_inode: orangefs_inode_cache_finalize(); -cleanup_req: - dev_req_cache_finalize(); - cleanup_op: op_cache_finalize(); @@ -247,9 +233,7 @@ static void __exit orangefs_exit(void) for (i = 0; i < hash_table_size; i++) BUG_ON(!list_empty(&htable_ops_in_progress[i])); - kiocb_cache_finalize(); orangefs_inode_cache_finalize(); - dev_req_cache_finalize(); op_cache_finalize(); kfree(htable_ops_in_progress); diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index a32981239ea6..93cc352be360 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -92,8 +92,7 @@ static struct inode *orangefs_alloc_inode(struct super_block *sb) { struct orangefs_inode_s *orangefs_inode; - orangefs_inode = kmem_cache_alloc(orangefs_inode_cache, - ORANGEFS_CACHE_ALLOC_FLAGS); + orangefs_inode = kmem_cache_alloc(orangefs_inode_cache, GFP_KERNEL); if (orangefs_inode == NULL) { gossip_err("Failed to allocate orangefs_inode\n"); return NULL; From fe88adc3661ff9eb2a9777277f9c3abf5909449f Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Sat, 30 Jan 2016 13:46:11 -0500 Subject: [PATCH 108/174] orangefs: Only compare attributes specified in orangefs_inode_getattr. Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-utils.c | 62 +++++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c index 6cf29a439211..fa3ed8ad35be 100644 --- a/fs/orangefs/orangefs-utils.c +++ b/fs/orangefs/orangefs-utils.c @@ -355,26 +355,37 @@ static inline int copy_attributes_from_inode(struct inode *inode, static int compare_attributes_to_inode(struct inode *inode, struct ORANGEFS_sys_attr_s *attrs, - char *symname) + char *symname, + int mask) { struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); loff_t inode_size, rounded_up_size; + /* Much of what happens below relies on the type being around. */ + if (!(mask & ORANGEFS_ATTR_SYS_TYPE)) + return 0; + + if (attrs->objtype == ORANGEFS_TYPE_METAFILE && + inode->i_flags != orangefs_inode_flags(attrs)) + return 0; + /* Compare file size. */ switch (attrs->objtype) { case ORANGEFS_TYPE_METAFILE: - if(inode->i_flags != orangefs_inode_flags(attrs)) - return 0; - inode_size = attrs->size; - rounded_up_size = inode_size + (4096 - (inode_size % 4096)); - if (inode->i_bytes != inode_size || - inode->i_blocks != rounded_up_size/512) - return 0; + if (mask & ORANGEFS_ATTR_SYS_SIZE) { + inode_size = attrs->size; + rounded_up_size = inode_size + + (4096 - (inode_size % 4096)); + if (inode->i_bytes != inode_size || + inode->i_blocks != rounded_up_size/512) + return 0; + } break; case ORANGEFS_TYPE_SYMLINK: - if (symname && strlen(symname) != inode->i_size) - return 0; + if (mask & ORANGEFS_ATTR_SYS_SIZE) + if (symname && strlen(symname) != inode->i_size) + return 0; break; default: if (inode->i_size != PAGE_CACHE_SIZE && @@ -384,17 +395,28 @@ static int compare_attributes_to_inode(struct inode *inode, /* Compare general attributes. */ - if (!uid_eq(inode->i_uid, make_kuid(&init_user_ns, attrs->owner)) || - !gid_eq(inode->i_gid, make_kgid(&init_user_ns, attrs->group)) || - inode->i_atime.tv_sec != attrs->atime || - inode->i_mtime.tv_sec != attrs->mtime || - inode->i_ctime.tv_sec != attrs->ctime || - inode->i_atime.tv_nsec != 0 || + if (mask & ORANGEFS_ATTR_SYS_UID && + !uid_eq(inode->i_uid, make_kuid(&init_user_ns, attrs->owner))) + return 0; + if (mask & ORANGEFS_ATTR_SYS_GID && + !gid_eq(inode->i_gid, make_kgid(&init_user_ns, attrs->group))) + return 0; + if (mask & ORANGEFS_ATTR_SYS_ATIME && + inode->i_atime.tv_sec != attrs->atime) + return 0; + if (mask & ORANGEFS_ATTR_SYS_MTIME && + inode->i_atime.tv_sec != attrs->mtime) + return 0; + if (mask & ORANGEFS_ATTR_SYS_CTIME && + inode->i_atime.tv_sec != attrs->ctime) + return 0; + if (inode->i_atime.tv_nsec != 0 || inode->i_mtime.tv_nsec != 0 || inode->i_ctime.tv_nsec != 0) return 0; - if ((inode->i_mode & ~(S_ISVTX|S_IFREG|S_IFDIR|S_IFLNK)) != + if (mask & ORANGEFS_ATTR_SYS_PERM && + (inode->i_mode & ~(S_ISVTX|S_IFREG|S_IFDIR|S_IFLNK)) != orangefs_inode_perms(attrs)) return 0; @@ -418,7 +440,8 @@ static int compare_attributes_to_inode(struct inode *inode, case ORANGEFS_TYPE_SYMLINK: if (!(inode->i_mode & S_IFLNK)) return 0; - if (orangefs_inode && symname) + if (orangefs_inode && symname && + mask & ORANGEFS_ATTR_SYS_LNK_TARGET) if (strcmp(orangefs_inode->link_target, symname)) return 0; break; @@ -462,7 +485,8 @@ int orangefs_inode_getattr(struct inode *inode, __u32 getattr_mask, int check) if (check) { ret = compare_attributes_to_inode(inode, &new_op->downcall.resp.getattr.attributes, - new_op->downcall.resp.getattr.link_target); + new_op->downcall.resp.getattr.link_target, + getattr_mask); if (new_op->downcall.resp.getattr.attributes.objtype == ORANGEFS_TYPE_METAFILE) { From 933287da750edefbf0f449750fd67b4fc6c10013 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Sat, 30 Jan 2016 13:46:54 -0500 Subject: [PATCH 109/174] orangefs: Implement inode_operations->permission(). Thus d_revalidate is not obliged to check on as much, which will eventually lead the way to hammering the filesystem servers much less. Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/dcache.c | 3 ++- fs/orangefs/inode.c | 19 +++++++++++++++++++ fs/orangefs/namei.c | 1 + fs/orangefs/orangefs-kernel.h | 2 ++ fs/orangefs/protocol.h | 9 +++++++++ fs/orangefs/symlink.c | 1 + 6 files changed, 34 insertions(+), 1 deletion(-) diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c index e8fb79de37c6..a6911dbbf3e5 100644 --- a/fs/orangefs/dcache.c +++ b/fs/orangefs/dcache.c @@ -119,8 +119,9 @@ static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags) goto out; /* Now we must perform a getattr to validate the inode contents. */ + ret = orangefs_inode_getattr(dentry->d_inode, - ORANGEFS_ATTR_SYS_ALL_NOHINT, 1); + ORANGEFS_ATTR_SYS_TYPE|ORANGEFS_ATTR_SYS_LNK_TARGET, 1); if (ret < 0) { gossip_debug(GOSSIP_DCACHE_DEBUG, "%s:%s:%d getattr failure.\n", __FILE__, __func__, __LINE__); diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 040cd95b51c2..e9688f0b99d7 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -291,6 +291,24 @@ int orangefs_getattr(struct vfsmount *mnt, return ret; } +int orangefs_permission(struct inode *inode, int mask) +{ + int ret; + + if (mask & MAY_NOT_BLOCK) + return -ECHILD; + + gossip_debug(GOSSIP_INODE_DEBUG, "%s: refreshing\n", __func__); + + /* Make sure the permission (and other common attrs) are up to date. */ + ret = orangefs_inode_getattr(inode, + ORANGEFS_ATTR_SYS_ALL_NOHINT_NOSIZE, 0); + if (ret < 0) + return ret; + + return generic_permission(inode, mask); +} + /* ORANGEDS2 implementation of VFS inode operations for files */ struct inode_operations orangefs_file_inode_operations = { .get_acl = orangefs_get_acl, @@ -301,6 +319,7 @@ struct inode_operations orangefs_file_inode_operations = { .getxattr = generic_getxattr, .listxattr = orangefs_listxattr, .removexattr = generic_removexattr, + .permission = orangefs_permission, }; static int orangefs_init_iops(struct inode *inode) diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index 50bc45d02009..8fc55c6f58db 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -443,4 +443,5 @@ struct inode_operations orangefs_dir_inode_operations = { .getxattr = generic_getxattr, .removexattr = generic_removexattr, .listxattr = orangefs_listxattr, + .permission = orangefs_permission, }; diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index d4db96223dac..a8cde9019efe 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -493,6 +493,8 @@ int orangefs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *kstat); +int orangefs_permission(struct inode *inode, int mask); + /* * defined in xattr.c */ diff --git a/fs/orangefs/protocol.h b/fs/orangefs/protocol.h index 56dd65abb908..6ac0c60c9f5e 100644 --- a/fs/orangefs/protocol.h +++ b/fs/orangefs/protocol.h @@ -205,6 +205,15 @@ typedef __s64 ORANGEFS_offset; ORANGEFS_ATTR_SYS_MIRROR_COPIES_COUNT | \ ORANGEFS_ATTR_SYS_DIRENT_COUNT | \ ORANGEFS_ATTR_SYS_BLKSIZE) + +#define ORANGEFS_ATTR_SYS_ALL_NOHINT_NOSIZE \ + (ORANGEFS_ATTR_SYS_COMMON_ALL | \ + ORANGEFS_ATTR_SYS_LNK_TARGET | \ + ORANGEFS_ATTR_SYS_DFILE_COUNT | \ + ORANGEFS_ATTR_SYS_MIRROR_COPIES_COUNT | \ + ORANGEFS_ATTR_SYS_DIRENT_COUNT | \ + ORANGEFS_ATTR_SYS_BLKSIZE) + #define ORANGEFS_XATTR_REPLACE 0x2 #define ORANGEFS_XATTR_CREATE 0x1 #define ORANGEFS_MAX_SERVER_ADDR_LEN 256 diff --git a/fs/orangefs/symlink.c b/fs/orangefs/symlink.c index 1b3ae63463dc..2b8541a7fc43 100644 --- a/fs/orangefs/symlink.c +++ b/fs/orangefs/symlink.c @@ -28,4 +28,5 @@ struct inode_operations orangefs_symlink_inode_operations = { .getattr = orangefs_getattr, .listxattr = orangefs_listxattr, .setxattr = generic_setxattr, + .permission = orangefs_permission, }; From 237f8282c04ba81926f4dfc33cd2ca20bb0c50e7 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Wed, 3 Feb 2016 16:56:24 -0500 Subject: [PATCH 110/174] orangefs: Do not retrieve size from servers unless it it necessary. Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/inode.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index e9688f0b99d7..d2923dc91388 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -411,7 +411,8 @@ struct inode *orangefs_iget(struct super_block *sb, struct orangefs_object_kref if (!inode || !(inode->i_state & I_NEW)) return inode; - error = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT, 0); + error = orangefs_inode_getattr(inode, + ORANGEFS_ATTR_SYS_ALL_NOHINT_NOSIZE, 0); if (error) { iget_failed(inode); return ERR_PTR(error); @@ -456,7 +457,8 @@ struct inode *orangefs_new_inode(struct super_block *sb, struct inode *dir, orangefs_set_inode(inode, ref); inode->i_ino = hash; /* needed for stat etc */ - error = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT, 0); + error = orangefs_inode_getattr(inode, + ORANGEFS_ATTR_SYS_ALL_NOHINT_NOSIZE, 0); if (error) goto out_iput; From 6ebcc3fcdac1f70078a02ab11f2aa5a88a4fdaee Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Thu, 4 Feb 2016 16:28:31 -0500 Subject: [PATCH 111/174] Orangefs: added a couple of WARN_ONs, perhaps just temporarily. Signed-off-by: Mike Marshall --- fs/orangefs/file.c | 1 + fs/orangefs/waitqueue.c | 1 + 2 files changed, 2 insertions(+) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 7e6fe8d8ab2b..d865b58fb1fc 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -250,6 +250,7 @@ populate_shared_memory: * put error codes in downcall so that handle_io_error() * preserves it properly */ + WARN_ON(!op_state_serviced(new_op)); new_op->downcall.status = ret; handle_io_error(); goto out; diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index cdbf57bef3eb..191d886ccc57 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -205,6 +205,7 @@ retry_servicing: /* op uses shared memory */ if (orangefs_get_bufmap_init() == 0) { + WARN_ON(1); /* * This operation uses the shared memory system AND * the system is not yet ready. This situation occurs From e17be9fd4d51302c41b17e22f9ec96751f47951b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 6 Feb 2016 14:59:38 -0500 Subject: [PATCH 112/174] orangefs: avoid freeing a slot twice in wait_for_direct_io() Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index d865b58fb1fc..40b38057b826 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -210,6 +210,7 @@ populate_shared_memory: */ if (ret == -EAGAIN && op_state_purged(new_op)) { orangefs_bufmap_put(bufmap, buffer_index); + buffer_index = -1; gossip_debug(GOSSIP_FILE_DEBUG, "%s:going to repopulate_shared_memory.\n", __func__); From 7b9761af86b63baf4ce304fbdfdb87227d4bfbed Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 7 Feb 2016 01:25:06 -0500 Subject: [PATCH 113/174] orangefs: wait_for_direct_io(): restore the position in iter when restarting Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/file.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 40b38057b826..c767ec746c76 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -133,6 +133,7 @@ static ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inod struct orangefs_khandle *handle = &orangefs_inode->refn.khandle; struct orangefs_bufmap *bufmap = NULL; struct orangefs_kernel_op_s *new_op = NULL; + struct iov_iter saved = *iter; int buffer_index = -1; ssize_t ret; @@ -211,6 +212,8 @@ populate_shared_memory: if (ret == -EAGAIN && op_state_purged(new_op)) { orangefs_bufmap_put(bufmap, buffer_index); buffer_index = -1; + if (type == ORANGEFS_IO_WRITE) + *iter = saved; gossip_debug(GOSSIP_FILE_DEBUG, "%s:going to repopulate_shared_memory.\n", __func__); From c0eae8cd77bc34b7e4c52037eeb53712f46fa05c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 11 Feb 2016 21:28:52 -0500 Subject: [PATCH 114/174] orangefs: get rid of handle_io_error() the second caller never needs to cancel, actually Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/file.c | 65 ++++++++++------------------------------------ 1 file changed, 14 insertions(+), 51 deletions(-) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index c767ec746c76..dafa03ef0107 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -82,46 +82,6 @@ static int postcopy_buffers(struct orangefs_bufmap *bufmap, return ret; } -/* - * handles two possible error cases, depending on context. - * - * by design, our vfs i/o errors need to be handled in one of two ways, - * depending on where the error occured. - * - * if the error happens in the waitqueue code because we either timed - * out or a signal was raised while waiting, we need to cancel the - * userspace i/o operation and free the op manually. this is done to - * avoid having the device start writing application data to our shared - * bufmap pages without us expecting it. - * - * FIXME: POSSIBLE OPTIMIZATION: - * However, if we timed out or if we got a signal AND our upcall was never - * picked off the queue (i.e. we were in OP_VFS_STATE_WAITING), then we don't - * need to send a cancellation upcall. The way we can handle this is - * set error_exit to 2 in such cases and 1 whenever cancellation has to be - * sent and have handle_error - * take care of this situation as well.. - * - * if a orangefs sysint level error occured and i/o has been completed, - * there is no need to cancel the operation, as the user has finished - * using the bufmap page and so there is no danger in this case. in - * this case, we wake up the device normally so that it may free the - * op, as normal. - * - * note the only reason this is a macro is because both read and write - * cases need the exact same handling code. - */ -#define handle_io_error() \ -do { \ - if (!op_state_serviced(new_op)) { \ - orangefs_cancel_op_in_progress(new_op->tag); \ - } else { \ - complete(&new_op->done); \ - } \ - orangefs_bufmap_put(bufmap, buffer_index); \ - buffer_index = -1; \ -} while (0) - /* * Post and wait for the I/O upcall to finish */ @@ -221,7 +181,17 @@ populate_shared_memory: } if (ret < 0) { - handle_io_error(); + /* + * XXX: needs to be optimized - we only need to cancel if it + * had been seen by daemon and not completed + */ + if (!op_state_serviced(new_op)) { + orangefs_cancel_op_in_progress(new_op->tag); + } else { + complete(&new_op->done); + } + orangefs_bufmap_put(bufmap, buffer_index); + buffer_index = -1; /* * don't write an error to syslog on signaled operation * termination unless we've got debugging turned on, as @@ -249,16 +219,8 @@ populate_shared_memory: buffer_index, iter, new_op->downcall.resp.io.amt_complete); - if (ret < 0) { - /* - * put error codes in downcall so that handle_io_error() - * preserves it properly - */ - WARN_ON(!op_state_serviced(new_op)); - new_op->downcall.status = ret; - handle_io_error(); - goto out; - } + if (ret < 0) + goto done_copying; } gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): Amount written as returned by the sys-io call:%d\n", @@ -268,6 +230,7 @@ populate_shared_memory: ret = new_op->downcall.resp.io.amt_complete; +done_copying: /* * tell the device file owner waiting on I/O that this read has * completed and it can return now. From 1357d06d49d1f87af48ab768d34af55bff18b0c3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 11 Feb 2016 21:34:52 -0500 Subject: [PATCH 115/174] get rid of bufmap argument of orangefs_bufmap_put() it's always equal to __orangefs_bufmap and the latter can't change until we are done Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/file.c | 6 +++--- fs/orangefs/orangefs-bufmap.c | 3 ++- fs/orangefs/orangefs-bufmap.h | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index dafa03ef0107..193671c137c3 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -170,7 +170,7 @@ populate_shared_memory: * a new shared memory location. */ if (ret == -EAGAIN && op_state_purged(new_op)) { - orangefs_bufmap_put(bufmap, buffer_index); + orangefs_bufmap_put(buffer_index); buffer_index = -1; if (type == ORANGEFS_IO_WRITE) *iter = saved; @@ -190,7 +190,7 @@ populate_shared_memory: } else { complete(&new_op->done); } - orangefs_bufmap_put(bufmap, buffer_index); + orangefs_bufmap_put(buffer_index); buffer_index = -1; /* * don't write an error to syslog on signaled operation @@ -239,7 +239,7 @@ done_copying: out: if (buffer_index >= 0) { - orangefs_bufmap_put(bufmap, buffer_index); + orangefs_bufmap_put(buffer_index); gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): PUT buffer_index %d\n", __func__, handle, buffer_index); diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index c60019de1fd8..1819dee58433 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -458,9 +458,10 @@ int orangefs_bufmap_get(struct orangefs_bufmap **mapp, int *buffer_index) * * no return value */ -void orangefs_bufmap_put(struct orangefs_bufmap *bufmap, int buffer_index) +void orangefs_bufmap_put(int buffer_index) { struct slot_args slargs; + struct orangefs_bufmap *bufmap = __orangefs_bufmap; slargs.slot_count = bufmap->desc_count; slargs.slot_array = bufmap->buffer_index_array; diff --git a/fs/orangefs/orangefs-bufmap.h b/fs/orangefs/orangefs-bufmap.h index dff55e2857c5..2a2d4269d03e 100644 --- a/fs/orangefs/orangefs-bufmap.h +++ b/fs/orangefs/orangefs-bufmap.h @@ -21,7 +21,7 @@ void orangefs_bufmap_finalize(void); int orangefs_bufmap_get(struct orangefs_bufmap **mapp, int *buffer_index); -void orangefs_bufmap_put(struct orangefs_bufmap *bufmap, int buffer_index); +void orangefs_bufmap_put(int buffer_index); int orangefs_readdir_index_get(struct orangefs_bufmap **mapp, int *buffer_index); From 78699e29fd784a4613d254a22627f336c55c4a76 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 11 Feb 2016 23:07:19 -0500 Subject: [PATCH 116/174] orangefs: delay freeing slot until cancel completes Make cancels reuse the aborted read/write op, to make sure they do not fail on lack of memory. Don't issue a cancel unless the daemon has seen our read/write, has not replied and isn't being shut down. If cancel *is* issued, don't wait for it to complete; stash the slot in there and just have it freed when cancel is finally replied to or purged (and delay dropping the reference until then, obviously). Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 7 ++ fs/orangefs/file.c | 16 +--- fs/orangefs/orangefs-cache.c | 16 ++-- fs/orangefs/orangefs-kernel.h | 40 ++++++++-- fs/orangefs/orangefs-mod.c | 2 - fs/orangefs/orangefs-utils.c | 32 -------- fs/orangefs/waitqueue.c | 137 +++++++++++----------------------- 7 files changed, 95 insertions(+), 155 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index 37278f5878b3..6a7df1204bfc 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -438,6 +438,8 @@ wakeup: } } out: + if (unlikely(op_is_cancel(op))) + put_cancel(op); op_release(op); return ret; @@ -546,6 +548,11 @@ int is_daemon_in_service(void) return in_service; } +bool __is_daemon_in_service(void) +{ + return open_access_count == 1; +} + static inline long check_ioctl_command(unsigned int command) { /* Check for valid ioctl codes */ diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 193671c137c3..3b1e9e83eb91 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -181,17 +181,6 @@ populate_shared_memory: } if (ret < 0) { - /* - * XXX: needs to be optimized - we only need to cancel if it - * had been seen by daemon and not completed - */ - if (!op_state_serviced(new_op)) { - orangefs_cancel_op_in_progress(new_op->tag); - } else { - complete(&new_op->done); - } - orangefs_bufmap_put(buffer_index); - buffer_index = -1; /* * don't write an error to syslog on signaled operation * termination unless we've got debugging turned on, as @@ -207,7 +196,10 @@ populate_shared_memory: type == ORANGEFS_IO_READ ? "read from" : "write to", handle, ret); - goto out; + if (orangefs_cancel_op_in_progress(new_op)) + return ret; + + goto done_copying; } /* diff --git a/fs/orangefs/orangefs-cache.c b/fs/orangefs/orangefs-cache.c index 3b3de91406ca..59ab0c207e90 100644 --- a/fs/orangefs/orangefs-cache.c +++ b/fs/orangefs/orangefs-cache.c @@ -101,6 +101,15 @@ char *get_opname_string(struct orangefs_kernel_op_s *new_op) return "OP_UNKNOWN?"; } +void orangefs_new_tag(struct orangefs_kernel_op_s *op) +{ + spin_lock(&next_tag_value_lock); + op->tag = next_tag_value++; + if (next_tag_value == 0) + next_tag_value = 100; + spin_unlock(&next_tag_value_lock); +} + struct orangefs_kernel_op_s *op_alloc(__s32 type) { struct orangefs_kernel_op_s *new_op = NULL; @@ -120,14 +129,9 @@ struct orangefs_kernel_op_s *op_alloc(__s32 type) new_op->downcall.status = -1; new_op->op_state = OP_VFS_STATE_UNKNOWN; - new_op->tag = 0; /* initialize the op specific tag and upcall credentials */ - spin_lock(&next_tag_value_lock); - new_op->tag = next_tag_value++; - if (next_tag_value == 0) - next_tag_value = 100; - spin_unlock(&next_tag_value_lock); + orangefs_new_tag(new_op); new_op->upcall.type = type; new_op->attempts = 0; gossip_debug(GOSSIP_CACHE_DEBUG, diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index a8cde9019efe..3ceeeaed4143 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -190,9 +190,14 @@ struct orangefs_kernel_op_s { /* * Set uses_shared_memory to 1 if this operation uses shared memory. * If true, then a retry on the op must also get a new shared memory - * buffer and re-populate it. + * buffer and re-populate it. Cancels don't care - it only matters + * for service_operation() retry logics and cancels don't go through + * it anymore. */ - int uses_shared_memory; + union { + int uses_shared_memory; + int slot_to_free; + }; struct orangefs_upcall_s upcall; struct orangefs_downcall_s downcall; @@ -219,17 +224,13 @@ static inline void set_op_state_serviced(struct orangefs_kernel_op_s *op) op->op_state = OP_VFS_STATE_SERVICED; wake_up_interruptible(&op->waitq); } -static inline void set_op_state_purged(struct orangefs_kernel_op_s *op) -{ - op->op_state |= OP_VFS_STATE_PURGED; - wake_up_interruptible(&op->waitq); -} #define op_state_waiting(op) ((op)->op_state & OP_VFS_STATE_WAITING) #define op_state_in_progress(op) ((op)->op_state & OP_VFS_STATE_INPROGR) #define op_state_serviced(op) ((op)->op_state & OP_VFS_STATE_SERVICED) #define op_state_purged(op) ((op)->op_state & OP_VFS_STATE_PURGED) #define op_state_given_up(op) ((op)->op_state & OP_VFS_STATE_GIVEN_UP) +#define op_is_cancel(op) ((op)->upcall.type == ORANGEFS_VFS_OP_CANCEL) static inline void get_op(struct orangefs_kernel_op_s *op) { @@ -249,6 +250,27 @@ static inline void op_release(struct orangefs_kernel_op_s *op) } } +extern void orangefs_bufmap_put(int); +static inline void put_cancel(struct orangefs_kernel_op_s *op) +{ + orangefs_bufmap_put(op->slot_to_free); + op_release(op); +} + +static inline void set_op_state_purged(struct orangefs_kernel_op_s *op) +{ + spin_lock(&op->lock); + if (unlikely(op_is_cancel(op))) { + list_del(&op->list); + spin_unlock(&op->lock); + put_cancel(op); + } else { + op->op_state |= OP_VFS_STATE_PURGED; + wake_up_interruptible(&op->waitq); + spin_unlock(&op->lock); + } +} + /* per inode private orangefs info */ struct orangefs_inode_s { struct orangefs_object_kref refn; @@ -448,6 +470,7 @@ static inline int match_handle(struct orangefs_khandle resp_handle, int op_cache_initialize(void); int op_cache_finalize(void); struct orangefs_kernel_op_s *op_alloc(__s32 type); +void orangefs_new_tag(struct orangefs_kernel_op_s *op); char *get_opname_string(struct orangefs_kernel_op_s *new_op); int orangefs_inode_cache_initialize(void); @@ -528,6 +551,7 @@ ssize_t orangefs_inode_read(struct inode *inode, int orangefs_dev_init(void); void orangefs_dev_cleanup(void); int is_daemon_in_service(void); +bool __is_daemon_in_service(void); int fs_mount_pending(__s32 fsid); /* @@ -562,7 +586,7 @@ void orangefs_set_signals(sigset_t *); int orangefs_unmount_sb(struct super_block *sb); -int orangefs_cancel_op_in_progress(__u64 tag); +bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op); static inline __u64 orangefs_convert_time_field(const struct timespec *ts) { diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c index 7639ab2df711..965959cb11d1 100644 --- a/fs/orangefs/orangefs-mod.c +++ b/fs/orangefs/orangefs-mod.c @@ -260,14 +260,12 @@ void purge_inprogress_ops(void) next, &htable_ops_in_progress[i], list) { - spin_lock(&op->lock); gossip_debug(GOSSIP_INIT_DEBUG, "pvfs2-client-core: purging in-progress op tag " "%llu %s\n", llu(op->tag), get_opname_string(op)); set_op_state_purged(op); - spin_unlock(&op->lock); } spin_unlock(&htable_ops_in_progress_lock); } diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c index fa3ed8ad35be..08f9c2dab0fe 100644 --- a/fs/orangefs/orangefs-utils.c +++ b/fs/orangefs/orangefs-utils.c @@ -688,38 +688,6 @@ int orangefs_unmount_sb(struct super_block *sb) return ret; } -/* - * NOTE: on successful cancellation, be sure to return -EINTR, as - * that's the return value the caller expects - */ -int orangefs_cancel_op_in_progress(__u64 tag) -{ - int ret = -EINVAL; - struct orangefs_kernel_op_s *new_op = NULL; - - gossip_debug(GOSSIP_UTILS_DEBUG, - "orangefs_cancel_op_in_progress called on tag %llu\n", - llu(tag)); - - new_op = op_alloc(ORANGEFS_VFS_OP_CANCEL); - if (!new_op) - return -ENOMEM; - new_op->upcall.req.cancel.op_tag = tag; - - gossip_debug(GOSSIP_UTILS_DEBUG, - "Attempting ORANGEFS operation cancellation of tag %llu\n", - llu(new_op->upcall.req.cancel.op_tag)); - - ret = service_operation(new_op, "orangefs_cancel", ORANGEFS_OP_CANCELLATION); - - gossip_debug(GOSSIP_UTILS_DEBUG, - "orangefs_cancel_op_in_progress: got return value of %d\n", - ret); - - op_release(new_op); - return ret; -} - void orangefs_make_bad_inode(struct inode *inode) { if (is_root_handle(inode)) { diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index 191d886ccc57..3ea1665efdf0 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -16,7 +16,6 @@ #include "orangefs-kernel.h" #include "orangefs-bufmap.h" -static int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *); static int wait_for_matching_downcall(struct orangefs_kernel_op_s *); /* @@ -36,23 +35,27 @@ void purge_waiting_ops(void) "pvfs2-client-core: purging op tag %llu %s\n", llu(op->tag), get_opname_string(op)); - spin_lock(&op->lock); set_op_state_purged(op); - spin_unlock(&op->lock); } spin_unlock(&orangefs_request_list_lock); } +static inline void +__add_op_to_request_list(struct orangefs_kernel_op_s *op) +{ + spin_lock(&op->lock); + set_op_state_waiting(op); + list_add_tail(&op->list, &orangefs_request_list); + spin_unlock(&op->lock); + wake_up_interruptible(&orangefs_request_list_waitq); +} + static inline void add_op_to_request_list(struct orangefs_kernel_op_s *op) { spin_lock(&orangefs_request_list_lock); - spin_lock(&op->lock); - set_op_state_waiting(op); - list_add_tail(&op->list, &orangefs_request_list); + __add_op_to_request_list(op); spin_unlock(&orangefs_request_list_lock); - spin_unlock(&op->lock); - wake_up_interruptible(&orangefs_request_list_waitq); } static inline @@ -159,15 +162,7 @@ retry_servicing: if (flags & ORANGEFS_OP_ASYNC) return 0; - if (flags & ORANGEFS_OP_CANCELLATION) { - gossip_debug(GOSSIP_WAIT_DEBUG, - "%s:" - "About to call wait_for_cancellation_downcall.\n", - __func__); - ret = wait_for_cancellation_downcall(op); - } else { - ret = wait_for_matching_downcall(op); - } + ret = wait_for_matching_downcall(op); if (ret < 0) { /* failed to get matching downcall */ @@ -273,6 +268,36 @@ retry_servicing: return ret; } +bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op) +{ + u64 tag = op->tag; + if (!op_state_in_progress(op)) + return false; + + op->slot_to_free = op->upcall.req.io.buf_index; + memset(&op->upcall, 0, sizeof(op->upcall)); + memset(&op->downcall, 0, sizeof(op->downcall)); + op->upcall.type = ORANGEFS_VFS_OP_CANCEL; + op->upcall.req.cancel.op_tag = tag; + op->downcall.type = ORANGEFS_VFS_OP_INVALID; + op->downcall.status = -1; + orangefs_new_tag(op); + + spin_lock(&orangefs_request_list_lock); + /* orangefs_request_list_lock is enough of a barrier here */ + if (!__is_daemon_in_service()) { + spin_unlock(&orangefs_request_list_lock); + return false; + } + __add_op_to_request_list(op); + spin_unlock(&orangefs_request_list_lock); + + gossip_debug(GOSSIP_UTILS_DEBUG, + "Attempting ORANGEFS operation cancellation of tag %llu\n", + llu(tag)); + return true; +} + static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *op) { /* @@ -426,81 +451,3 @@ static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op) return ret; } - -/* - * similar to wait_for_matching_downcall(), but used in the special case - * of I/O cancellations. - * - * Note we need a special wait function because if this is called we already - * know that a signal is pending in current and need to service the - * cancellation upcall anyway. the only way to exit this is to either - * timeout or have the cancellation be serviced properly. - */ -static int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *op) -{ - int ret = -EINVAL; - DEFINE_WAIT(wait_entry); - - while (1) { - spin_lock(&op->lock); - prepare_to_wait(&op->waitq, &wait_entry, TASK_INTERRUPTIBLE); - if (op_state_serviced(op)) { - gossip_debug(GOSSIP_WAIT_DEBUG, - "%s:op-state is SERVICED.\n", - __func__); - spin_unlock(&op->lock); - ret = 0; - break; - } - - if (signal_pending(current)) { - gossip_debug(GOSSIP_WAIT_DEBUG, - "%s:operation interrupted by a signal (tag" - " %llu, op %p)\n", - __func__, - llu(op->tag), - op); - orangefs_clean_up_interrupted_operation(op); - ret = -EINTR; - break; - } - - gossip_debug(GOSSIP_WAIT_DEBUG, - "%s:About to call schedule_timeout.\n", - __func__); - spin_unlock(&op->lock); - ret = schedule_timeout(op_timeout_secs * HZ); - - gossip_debug(GOSSIP_WAIT_DEBUG, - "%s:Value returned from schedule_timeout(%d).\n", - __func__, - ret); - if (!ret) { - gossip_debug(GOSSIP_WAIT_DEBUG, - "%s:*** operation timed out: %p\n", - __func__, - op); - spin_lock(&op->lock); - orangefs_clean_up_interrupted_operation(op); - ret = -ETIMEDOUT; - break; - } - - gossip_debug(GOSSIP_WAIT_DEBUG, - "%s:Breaking out of loop, regardless of value returned by schedule_timeout.\n", - __func__); - ret = -ETIMEDOUT; - break; - } - - spin_lock(&op->lock); - finish_wait(&op->waitq, &wait_entry); - spin_unlock(&op->lock); - - gossip_debug(GOSSIP_WAIT_DEBUG, - "%s:returning ret(%d)\n", - __func__, - ret); - - return ret; -} From cf22644a0e5f1a66c61e90da15784effe3ba7ced Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Fri, 5 Feb 2016 16:37:00 -0500 Subject: [PATCH 117/174] orangefs: use S_ISREG(mode) and friends instead of mode & S_IFREG. Suggestion from Dan Carpenter. Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-utils.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c index 08f9c2dab0fe..63e8c9bc912e 100644 --- a/fs/orangefs/orangefs-utils.c +++ b/fs/orangefs/orangefs-utils.c @@ -428,17 +428,17 @@ static int compare_attributes_to_inode(struct inode *inode, switch (attrs->objtype) { case ORANGEFS_TYPE_METAFILE: - if (!(inode->i_mode & S_IFREG)) + if (!S_ISREG(inode->i_mode)) return 0; break; case ORANGEFS_TYPE_DIRECTORY: - if (!(inode->i_mode & S_IFDIR)) + if (!S_ISDIR(inode->i_mode)) return 0; if (inode->i_nlink != 1) return 0; break; case ORANGEFS_TYPE_SYMLINK: - if (!(inode->i_mode & S_IFLNK)) + if (!S_ISLNK(inode->i_mode)) return 0; if (orangefs_inode && symname && mask & ORANGEFS_ATTR_SYS_LNK_TARGET) From d2d87a3b6df3088a991e277d42cd6a549ff2bc66 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 13 Feb 2016 10:15:22 -0500 Subject: [PATCH 118/174] orangefs: get rid of loop in wait_for_matching_downcall() turn op->waitq into struct completion... Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-cache.c | 2 +- fs/orangefs/orangefs-kernel.h | 6 +- fs/orangefs/waitqueue.c | 131 ++++++++++++---------------------- 3 files changed, 50 insertions(+), 89 deletions(-) diff --git a/fs/orangefs/orangefs-cache.c b/fs/orangefs/orangefs-cache.c index 59ab0c207e90..09194e69875f 100644 --- a/fs/orangefs/orangefs-cache.c +++ b/fs/orangefs/orangefs-cache.c @@ -118,7 +118,7 @@ struct orangefs_kernel_op_s *op_alloc(__s32 type) if (new_op) { INIT_LIST_HEAD(&new_op->list); spin_lock_init(&new_op->lock); - init_waitqueue_head(&new_op->waitq); + init_completion(&new_op->waitq); atomic_set(&new_op->ref_count, 1); diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 3ceeeaed4143..de898bda7859 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -202,7 +202,7 @@ struct orangefs_kernel_op_s { struct orangefs_upcall_s upcall; struct orangefs_downcall_s downcall; - wait_queue_head_t waitq; + struct completion waitq; spinlock_t lock; struct completion done; @@ -222,7 +222,7 @@ struct orangefs_kernel_op_s { static inline void set_op_state_serviced(struct orangefs_kernel_op_s *op) { op->op_state = OP_VFS_STATE_SERVICED; - wake_up_interruptible(&op->waitq); + complete(&op->waitq); } #define op_state_waiting(op) ((op)->op_state & OP_VFS_STATE_WAITING) @@ -266,7 +266,7 @@ static inline void set_op_state_purged(struct orangefs_kernel_op_s *op) put_cancel(op); } else { op->op_state |= OP_VFS_STATE_PURGED; - wake_up_interruptible(&op->waitq); + complete(&op->waitq); spin_unlock(&op->lock); } } diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index 3ea1665efdf0..89622717a06d 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -17,6 +17,7 @@ #include "orangefs-bufmap.h" static int wait_for_matching_downcall(struct orangefs_kernel_op_s *); +static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *); /* * What we do in this function is to walk the list of operations that are @@ -170,8 +171,10 @@ retry_servicing: gossip_err("orangefs: %s -- wait timed out; aborting attempt.\n", op_name); } + orangefs_clean_up_interrupted_operation(op); op->downcall.status = ret; } else { + spin_unlock(&op->lock); /* got matching downcall; make sure status is in errno format */ op->downcall.status = orangefs_normalize_to_errno(op->downcall.status); @@ -343,6 +346,7 @@ static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s gossip_err("%s: can't get here.\n", __func__); spin_unlock(&op->lock); } + reinit_completion(&op->waitq); } /* @@ -359,95 +363,52 @@ static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s * EINTR/EIO/ETIMEDOUT indicating we are done trying to service this * operation since client-core seems to be exiting too often * or if we were interrupted. + * + * Returns with op->lock taken. */ static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op) { - int ret = -EINVAL; - DEFINE_WAIT(wait_entry); - - while (1) { - spin_lock(&op->lock); - prepare_to_wait(&op->waitq, &wait_entry, TASK_INTERRUPTIBLE); - if (op_state_serviced(op)) { - spin_unlock(&op->lock); - ret = 0; - break; - } - - if (unlikely(signal_pending(current))) { - gossip_debug(GOSSIP_WAIT_DEBUG, - "*** %s:" - " operation interrupted by a signal (tag " - "%llu, op %p)\n", - __func__, - llu(op->tag), - op); - orangefs_clean_up_interrupted_operation(op); - ret = -EINTR; - break; - } - - /* - * if this was our first attempt and client-core - * has not purged our operation, we are happy to - * simply wait - */ - if (op->attempts == 0 && !op_state_purged(op)) { - spin_unlock(&op->lock); - schedule(); - } else { - spin_unlock(&op->lock); - /* - * subsequent attempts, we retry exactly once - * with timeouts - */ - if (!schedule_timeout(op_timeout_secs * HZ)) { - gossip_debug(GOSSIP_WAIT_DEBUG, - "*** %s:" - " operation timed out (tag" - " %llu, %p, att %d)\n", - __func__, - llu(op->tag), - op, - op->attempts); - ret = -ETIMEDOUT; - spin_lock(&op->lock); - orangefs_clean_up_interrupted_operation(op); - break; - } - } - spin_lock(&op->lock); - op->attempts++; - /* - * if the operation was purged in the meantime, it - * is better to requeue it afresh but ensure that - * we have not been purged repeatedly. This could - * happen if client-core crashes when an op - * is being serviced, so we requeue the op, client - * core crashes again so we requeue the op, client - * core starts, and so on... - */ - if (op_state_purged(op)) { - ret = (op->attempts < ORANGEFS_PURGE_RETRY_COUNT) ? - -EAGAIN : - -EIO; - gossip_debug(GOSSIP_WAIT_DEBUG, - "*** %s:" - " operation purged (tag " - "%llu, %p, att %d)\n", - __func__, - llu(op->tag), - op, - op->attempts); - orangefs_clean_up_interrupted_operation(op); - break; - } - spin_unlock(&op->lock); - } + long timeout, n; + timeout = op->attempts ? op_timeout_secs * HZ : MAX_SCHEDULE_TIMEOUT; + n = wait_for_completion_interruptible_timeout(&op->waitq, timeout); spin_lock(&op->lock); - finish_wait(&op->waitq, &wait_entry); - spin_unlock(&op->lock); - return ret; + if (op_state_serviced(op)) + return 0; + + if (unlikely(n < 0)) { + gossip_debug(GOSSIP_WAIT_DEBUG, + "*** %s:" + " operation interrupted by a signal (tag " + "%llu, op %p)\n", + __func__, + llu(op->tag), + op); + return -EINTR; + } + op->attempts++; + if (op_state_purged(op)) { + gossip_debug(GOSSIP_WAIT_DEBUG, + "*** %s:" + " operation purged (tag " + "%llu, %p, att %d)\n", + __func__, + llu(op->tag), + op, + op->attempts); + return (op->attempts < ORANGEFS_PURGE_RETRY_COUNT) ? + -EAGAIN : + -EIO; + } + /* must have timed out, then... */ + gossip_debug(GOSSIP_WAIT_DEBUG, + "*** %s:" + " operation timed out (tag" + " %llu, %p, att %d)\n", + __func__, + llu(op->tag), + op, + op->attempts); + return -ETIMEDOUT; } From 98815ade9eaca3c4729710129a651aa0b43d007a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 13 Feb 2016 10:38:23 -0500 Subject: [PATCH 119/174] orangefs: sanitize handling of request list * checking that daemon is running (to decide whether we want to limit the timeout) should be done *after* the damn thing is included into the list; doing that before means that if the daemon gets shut down in between, we'll end up waiting indefinitely (== up to kill -9). * cancels should go into the head of the queue - the sooner they are picked, the less work daemon has to do and the sooner we get to free the slot held by aborted operation. Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/waitqueue.c | 68 +++++++++++------------------------------ 1 file changed, 18 insertions(+), 50 deletions(-) diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index 89622717a06d..6cae77400a5b 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -41,37 +41,6 @@ void purge_waiting_ops(void) spin_unlock(&orangefs_request_list_lock); } -static inline void -__add_op_to_request_list(struct orangefs_kernel_op_s *op) -{ - spin_lock(&op->lock); - set_op_state_waiting(op); - list_add_tail(&op->list, &orangefs_request_list); - spin_unlock(&op->lock); - wake_up_interruptible(&orangefs_request_list_waitq); -} - -static inline void -add_op_to_request_list(struct orangefs_kernel_op_s *op) -{ - spin_lock(&orangefs_request_list_lock); - __add_op_to_request_list(op); - spin_unlock(&orangefs_request_list_lock); -} - -static inline -void add_priority_op_to_request_list(struct orangefs_kernel_op_s *op) -{ - spin_lock(&orangefs_request_list_lock); - spin_lock(&op->lock); - set_op_state_waiting(op); - - list_add(&op->list, &orangefs_request_list); - spin_unlock(&orangefs_request_list_lock); - spin_unlock(&op->lock); - wake_up_interruptible(&orangefs_request_list_waitq); -} - /* * submits a ORANGEFS operation and waits for it to complete * @@ -126,32 +95,28 @@ retry_servicing: } } - gossip_debug(GOSSIP_WAIT_DEBUG, - "%s:About to call is_daemon_in_service().\n", - __func__); - - if (is_daemon_in_service() < 0) { + /* queue up the operation */ + spin_lock(&orangefs_request_list_lock); + spin_lock(&op->lock); + set_op_state_waiting(op); + if (flags & ORANGEFS_OP_PRIORITY) + list_add(&op->list, &orangefs_request_list); + else + list_add_tail(&op->list, &orangefs_request_list); + spin_unlock(&op->lock); + wake_up_interruptible(&orangefs_request_list_waitq); + if (!__is_daemon_in_service()) { /* * By incrementing the per-operation attempt counter, we * directly go into the timeout logic while waiting for * the matching downcall to be read */ gossip_debug(GOSSIP_WAIT_DEBUG, - "%s:client core is NOT in service(%d).\n", - __func__, - is_daemon_in_service()); + "%s:client core is NOT in service.\n", + __func__); op->attempts++; } - - /* queue up the operation */ - if (flags & ORANGEFS_OP_PRIORITY) { - add_priority_op_to_request_list(op); - } else { - gossip_debug(GOSSIP_WAIT_DEBUG, - "%s:About to call add_op_to_request_list().\n", - __func__); - add_op_to_request_list(op); - } + spin_unlock(&orangefs_request_list_lock); if (!(flags & ORANGEFS_OP_NO_SEMAPHORE)) mutex_unlock(&request_mutex); @@ -292,7 +257,10 @@ bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op) spin_unlock(&orangefs_request_list_lock); return false; } - __add_op_to_request_list(op); + spin_lock(&op->lock); + set_op_state_waiting(op); + list_add(&op->list, &orangefs_request_list); + spin_unlock(&op->lock); spin_unlock(&orangefs_request_list_lock); gossip_debug(GOSSIP_UTILS_DEBUG, From c72f15b7d9b3cc744f066776dd0e61e6ab25e7d2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 13 Feb 2016 10:49:24 -0500 Subject: [PATCH 120/174] service_operation(): don't block signals, just use ..._killable Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-kernel.h | 4 ---- fs/orangefs/orangefs-utils.c | 21 --------------------- fs/orangefs/waitqueue.c | 29 ++++++++++++++--------------- 3 files changed, 14 insertions(+), 40 deletions(-) diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index de898bda7859..8613d4166d0f 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -580,10 +580,6 @@ int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr); void orangefs_make_bad_inode(struct inode *inode); -void orangefs_block_signals(sigset_t *); - -void orangefs_set_signals(sigset_t *); - int orangefs_unmount_sb(struct super_block *sb); bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op); diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c index 63e8c9bc912e..488f3501b09c 100644 --- a/fs/orangefs/orangefs-utils.c +++ b/fs/orangefs/orangefs-utils.c @@ -707,27 +707,6 @@ void orangefs_make_bad_inode(struct inode *inode) } } -/* Block all blockable signals... */ -void orangefs_block_signals(sigset_t *orig_sigset) -{ - sigset_t mask; - - /* - * Initialize all entries in the signal set to the - * inverse of the given mask. - */ - siginitsetinv(&mask, sigmask(SIGKILL)); - - /* Block 'em Danno... */ - sigprocmask(SIG_BLOCK, &mask, orig_sigset); -} - -/* set the signal mask to the given template... */ -void orangefs_set_signals(sigset_t *sigset) -{ - sigprocmask(SIG_SETMASK, sigset, NULL); -} - /* * The following is a very dirty hack that is now a permanent part of the * ORANGEFS protocol. See protocol.h for more error definitions. diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index 6cae77400a5b..86b4b1fc0b14 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -16,7 +16,7 @@ #include "orangefs-kernel.h" #include "orangefs-bufmap.h" -static int wait_for_matching_downcall(struct orangefs_kernel_op_s *); +static int wait_for_matching_downcall(struct orangefs_kernel_op_s *, bool); static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *); /* @@ -56,7 +56,6 @@ int service_operation(struct orangefs_kernel_op_s *op, int flags) { /* flags to modify behavior */ - sigset_t orig_sigset; int ret = 0; DEFINE_WAIT(wait_entry); @@ -75,19 +74,16 @@ retry_servicing: current->comm, current->pid); - /* mask out signals if this operation is not to be interrupted */ - if (!(flags & ORANGEFS_OP_INTERRUPTIBLE)) - orangefs_block_signals(&orig_sigset); - if (!(flags & ORANGEFS_OP_NO_SEMAPHORE)) { - ret = mutex_lock_interruptible(&request_mutex); + if (flags & ORANGEFS_OP_INTERRUPTIBLE) + ret = mutex_lock_interruptible(&request_mutex); + else + ret = mutex_lock_killable(&request_mutex); /* * check to see if we were interrupted while waiting for * semaphore */ if (ret < 0) { - if (!(flags & ORANGEFS_OP_INTERRUPTIBLE)) - orangefs_set_signals(&orig_sigset); op->downcall.status = ret; gossip_debug(GOSSIP_WAIT_DEBUG, "orangefs: service_operation interrupted.\n"); @@ -128,7 +124,7 @@ retry_servicing: if (flags & ORANGEFS_OP_ASYNC) return 0; - ret = wait_for_matching_downcall(op); + ret = wait_for_matching_downcall(op, flags & ORANGEFS_OP_INTERRUPTIBLE); if (ret < 0) { /* failed to get matching downcall */ @@ -146,9 +142,6 @@ retry_servicing: ret = op->downcall.status; } - if (!(flags & ORANGEFS_OP_INTERRUPTIBLE)) - orangefs_set_signals(&orig_sigset); - BUG_ON(ret != op->downcall.status); /* retry if operation has not been serviced and if requested */ if (!op_state_serviced(op) && op->downcall.status == -EAGAIN) { @@ -334,12 +327,18 @@ static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s * * Returns with op->lock taken. */ -static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op) +static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op, + bool interruptible) { long timeout, n; timeout = op->attempts ? op_timeout_secs * HZ : MAX_SCHEDULE_TIMEOUT; - n = wait_for_completion_interruptible_timeout(&op->waitq, timeout); + + if (interruptible) + n = wait_for_completion_interruptible_timeout(&op->waitq, timeout); + else + n = wait_for_completion_killable_timeout(&op->waitq, timeout); + spin_lock(&op->lock); if (op_state_serviced(op)) From 05b39a8b5cecaaf356497ee7df2f8acbc59eb2ee Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 13 Feb 2016 11:04:19 -0500 Subject: [PATCH 121/174] orangefs: lift handling of timeouts and attempts count to service_operation() Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/waitqueue.c | 46 +++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index 86b4b1fc0b14..378cdcf43252 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -16,7 +16,7 @@ #include "orangefs-kernel.h" #include "orangefs-bufmap.h" -static int wait_for_matching_downcall(struct orangefs_kernel_op_s *, bool); +static int wait_for_matching_downcall(struct orangefs_kernel_op_s *, long, bool); static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *); /* @@ -55,6 +55,7 @@ int service_operation(struct orangefs_kernel_op_s *op, const char *op_name, int flags) { + long timeout = MAX_SCHEDULE_TIMEOUT; /* flags to modify behavior */ int ret = 0; @@ -102,15 +103,10 @@ retry_servicing: spin_unlock(&op->lock); wake_up_interruptible(&orangefs_request_list_waitq); if (!__is_daemon_in_service()) { - /* - * By incrementing the per-operation attempt counter, we - * directly go into the timeout logic while waiting for - * the matching downcall to be read - */ gossip_debug(GOSSIP_WAIT_DEBUG, "%s:client core is NOT in service.\n", __func__); - op->attempts++; + timeout = op_timeout_secs * HZ; } spin_unlock(&orangefs_request_list_lock); @@ -124,33 +120,34 @@ retry_servicing: if (flags & ORANGEFS_OP_ASYNC) return 0; - ret = wait_for_matching_downcall(op, flags & ORANGEFS_OP_INTERRUPTIBLE); - - if (ret < 0) { - /* failed to get matching downcall */ - if (ret == -ETIMEDOUT) { - gossip_err("orangefs: %s -- wait timed out; aborting attempt.\n", - op_name); - } - orangefs_clean_up_interrupted_operation(op); - op->downcall.status = ret; - } else { + ret = wait_for_matching_downcall(op, timeout, + flags & ORANGEFS_OP_INTERRUPTIBLE); + if (!ret) { spin_unlock(&op->lock); /* got matching downcall; make sure status is in errno format */ op->downcall.status = orangefs_normalize_to_errno(op->downcall.status); ret = op->downcall.status; + goto out; } - BUG_ON(ret != op->downcall.status); + /* failed to get matching downcall */ + if (ret == -ETIMEDOUT) { + gossip_err("orangefs: %s -- wait timed out; aborting attempt.\n", + op_name); + } + orangefs_clean_up_interrupted_operation(op); + op->downcall.status = ret; /* retry if operation has not been serviced and if requested */ - if (!op_state_serviced(op) && op->downcall.status == -EAGAIN) { + if (ret == -EAGAIN) { + op->attempts++; + timeout = op_timeout_secs * HZ; gossip_debug(GOSSIP_WAIT_DEBUG, "orangefs: tag %llu (%s)" " -- operation to be retried (%d attempt)\n", llu(op->tag), op_name, - op->attempts + 1); + op->attempts); if (!op->uses_shared_memory) /* @@ -221,6 +218,7 @@ retry_servicing: } } +out: gossip_debug(GOSSIP_WAIT_DEBUG, "orangefs: service_operation %s returning: %d for %p.\n", op_name, @@ -328,11 +326,10 @@ static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s * Returns with op->lock taken. */ static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op, + long timeout, bool interruptible) { - long timeout, n; - - timeout = op->attempts ? op_timeout_secs * HZ : MAX_SCHEDULE_TIMEOUT; + long n; if (interruptible) n = wait_for_completion_interruptible_timeout(&op->waitq, timeout); @@ -354,7 +351,6 @@ static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op, op); return -EINTR; } - op->attempts++; if (op_state_purged(op)) { gossip_debug(GOSSIP_WAIT_DEBUG, "*** %s:" From 178041848a6e7072cc6ebc1c6c7763e33f564722 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 13 Feb 2016 11:16:37 -0500 Subject: [PATCH 122/174] orangefs_bufmap_..._query(): don't bother with refcounts ... just hold the spinlock while fetching the field in question. Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-bufmap.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index 1819dee58433..cd484665bf72 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -91,11 +91,11 @@ int orangefs_bufmap_size_query(void) { struct orangefs_bufmap *bufmap; int size = 0; - bufmap = orangefs_bufmap_ref(); - if (bufmap) { + spin_lock(&orangefs_bufmap_lock); + bufmap = __orangefs_bufmap; + if (bufmap) size = bufmap->desc_size; - orangefs_bufmap_unref(bufmap); - } + spin_unlock(&orangefs_bufmap_lock); return size; } @@ -103,11 +103,11 @@ int orangefs_bufmap_shift_query(void) { struct orangefs_bufmap *bufmap; int shift = 0; - bufmap = orangefs_bufmap_ref(); - if (bufmap) { + spin_lock(&orangefs_bufmap_lock); + bufmap = __orangefs_bufmap; + if (bufmap) shift = bufmap->desc_shift; - orangefs_bufmap_unref(bufmap); - } + spin_unlock(&orangefs_bufmap_lock); return shift; } From ea2c9c9f6574e835cbc903c94b82b5a34a334866 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 13 Feb 2016 21:01:21 -0500 Subject: [PATCH 123/174] orangefs: bufmap rewrite new waiting-for-slot logics: * make request for slot wait for bufmap to be set up if it comes before it's installed *OR* while it's running down * make closing control device wait for all slots to be freed * waiting itself rewritten to (open-coded) analogues of wait_event_... primitives - we would need wait_event_locked() and, pardon an obscenely long name, wait_event_interruptible_exclusive_timeout_locked(). * we never wait for more than slot_timeout_secs in total and, if during the wait the daemon goes away, we only allow ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS for it to come back. * (cosmetical) bitmap is used instead of an array of zeroes and ones * old (and only reached if we are about to corrupt memory) waiting for daemon restart in service_operation() removed. [Martin's fixes folded] Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 15 +- fs/orangefs/orangefs-bufmap.c | 341 +++++++++++++++++----------------- fs/orangefs/orangefs-bufmap.h | 4 +- fs/orangefs/waitqueue.c | 61 ------ 4 files changed, 174 insertions(+), 247 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index 6a7df1204bfc..790855a72e32 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -508,8 +508,7 @@ static int orangefs_devreq_release(struct inode *inode, struct file *file) __func__); mutex_lock(&devreq_mutex); - if (orangefs_get_bufmap_init()) - orangefs_bufmap_finalize(); + orangefs_bufmap_finalize(); open_access_count = -1; @@ -527,6 +526,9 @@ static int orangefs_devreq_release(struct inode *inode, struct file *file) * them as purged and wake them up */ purge_inprogress_ops(); + + orangefs_bufmap_run_down(); + gossip_debug(GOSSIP_DEV_DEBUG, "pvfs2-client-core: device close complete\n"); open_access_count = 0; @@ -607,13 +609,8 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg) (struct ORANGEFS_dev_map_desc __user *) arg, sizeof(struct ORANGEFS_dev_map_desc)); - if (orangefs_get_bufmap_init()) { - return -EINVAL; - } else { - return ret ? - -EIO : - orangefs_bufmap_initialize(&user_desc); - } + /* WTF -EIO and not -EFAULT? */ + return ret ? -EIO : orangefs_bufmap_initialize(&user_desc); case ORANGEFS_DEV_REMOUNT_ALL: gossip_debug(GOSSIP_DEV_DEBUG, "%s: got ORANGEFS_DEV_REMOUNT_ALL\n", diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index cd484665bf72..96faf4ee6529 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -7,7 +7,133 @@ #include "orangefs-kernel.h" #include "orangefs-bufmap.h" -DECLARE_WAIT_QUEUE_HEAD(orangefs_bufmap_init_waitq); +struct slot_map { + int c; + wait_queue_head_t q; + int count; + unsigned long *map; +}; + +static struct slot_map rw_map = { + .c = -1, + .q = __WAIT_QUEUE_HEAD_INITIALIZER(rw_map.q) +}; +static struct slot_map readdir_map = { + .c = -1, + .q = __WAIT_QUEUE_HEAD_INITIALIZER(readdir_map.q) +}; + + +static void install(struct slot_map *m, int count, unsigned long *map) +{ + spin_lock(&m->q.lock); + m->c = m->count = count; + m->map = map; + wake_up_all_locked(&m->q); + spin_unlock(&m->q.lock); +} + +static void mark_killed(struct slot_map *m) +{ + spin_lock(&m->q.lock); + m->c -= m->count + 1; + spin_unlock(&m->q.lock); +} + +static void run_down(struct slot_map *m) +{ + DEFINE_WAIT(wait); + spin_lock(&m->q.lock); + if (m->c != -1) { + for (;;) { + if (likely(list_empty(&wait.task_list))) + __add_wait_queue_tail(&m->q, &wait); + set_current_state(TASK_UNINTERRUPTIBLE); + + if (m->c == -1) + break; + + spin_unlock(&m->q.lock); + schedule(); + spin_lock(&m->q.lock); + } + __remove_wait_queue(&m->q, &wait); + __set_current_state(TASK_RUNNING); + } + m->map = NULL; + spin_unlock(&m->q.lock); +} + +static void put(struct slot_map *m, int slot) +{ + int v; + spin_lock(&m->q.lock); + __clear_bit(slot, m->map); + v = ++m->c; + if (unlikely(v == 1)) /* no free slots -> one free slot */ + wake_up_locked(&m->q); + else if (unlikely(v == -1)) /* finished dying */ + wake_up_all_locked(&m->q); + spin_unlock(&m->q.lock); +} + +static int wait_for_free(struct slot_map *m) +{ + long left = slot_timeout_secs * HZ; + DEFINE_WAIT(wait); + + do { + long n = left, t; + if (likely(list_empty(&wait.task_list))) + __add_wait_queue_tail_exclusive(&m->q, &wait); + set_current_state(TASK_INTERRUPTIBLE); + + if (m->c > 0) + break; + + if (m->c < 0) { + /* we are waiting for map to be installed */ + /* it would better be there soon, or we go away */ + if (n > ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS * HZ) + n = ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS * HZ; + } + spin_unlock(&m->q.lock); + t = schedule_timeout(n); + spin_lock(&m->q.lock); + if (unlikely(!t) && n != left && m->c < 0) + left = t; + else + left = t + (left - n); + if (unlikely(signal_pending(current))) + left = -EINTR; + } while (left > 0); + + if (!list_empty(&wait.task_list)) + list_del(&wait.task_list); + else if (left <= 0 && waitqueue_active(&m->q)) + __wake_up_locked_key(&m->q, TASK_INTERRUPTIBLE, NULL); + __set_current_state(TASK_RUNNING); + + if (likely(left > 0)) + return 0; + + return left < 0 ? -EINTR : -ETIMEDOUT; +} + +static int get(struct slot_map *m) +{ + int res = 0; + spin_lock(&m->q.lock); + if (unlikely(m->c <= 0)) + res = wait_for_free(m); + if (likely(!res)) { + m->c--; + res = find_first_zero_bit(m->map, m->count); + __set_bit(res, m->map); + } + spin_unlock(&m->q.lock); + return res; +} /* used to describe mapped buffers */ struct orangefs_bufmap_desc { @@ -18,8 +144,6 @@ struct orangefs_bufmap_desc { }; static struct orangefs_bufmap { - atomic_t refcnt; - int desc_size; int desc_shift; int desc_count; @@ -30,12 +154,12 @@ static struct orangefs_bufmap { struct orangefs_bufmap_desc *desc_array; /* array to track usage of buffer descriptors */ - int *buffer_index_array; - spinlock_t buffer_index_lock; + unsigned long *buffer_index_array; /* array to track usage of buffer descriptors for readdir */ - int readdir_index_array[ORANGEFS_READDIR_DEFAULT_DESC_COUNT]; - spinlock_t readdir_index_lock; +#define N DIV_ROUND_UP(ORANGEFS_READDIR_DEFAULT_DESC_COUNT, BITS_PER_LONG) + unsigned long readdir_index_array[N]; +#undef N } *__orangefs_bufmap; static DEFINE_SPINLOCK(orangefs_bufmap_lock); @@ -58,30 +182,6 @@ orangefs_bufmap_free(struct orangefs_bufmap *bufmap) kfree(bufmap); } -static struct orangefs_bufmap *orangefs_bufmap_ref(void) -{ - struct orangefs_bufmap *bufmap = NULL; - - spin_lock(&orangefs_bufmap_lock); - if (__orangefs_bufmap) { - bufmap = __orangefs_bufmap; - atomic_inc(&bufmap->refcnt); - } - spin_unlock(&orangefs_bufmap_lock); - return bufmap; -} - -static void orangefs_bufmap_unref(struct orangefs_bufmap *bufmap) -{ - if (atomic_dec_and_lock(&bufmap->refcnt, &orangefs_bufmap_lock)) { - __orangefs_bufmap = NULL; - spin_unlock(&orangefs_bufmap_lock); - - orangefs_bufmap_unmap(bufmap); - orangefs_bufmap_free(bufmap); - } -} - /* * XXX: Can the size and shift change while the caller gives up the * XXX: lock between calling this and doing something useful? @@ -137,21 +237,18 @@ orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc *user_desc) if (!bufmap) goto out; - atomic_set(&bufmap->refcnt, 1); bufmap->total_size = user_desc->total_size; bufmap->desc_count = user_desc->count; bufmap->desc_size = user_desc->size; bufmap->desc_shift = ilog2(bufmap->desc_size); - spin_lock_init(&bufmap->buffer_index_lock); bufmap->buffer_index_array = - kcalloc(bufmap->desc_count, sizeof(int), GFP_KERNEL); + kzalloc(DIV_ROUND_UP(bufmap->desc_count, BITS_PER_LONG), GFP_KERNEL); if (!bufmap->buffer_index_array) { gossip_err("orangefs: could not allocate %d buffer indices\n", bufmap->desc_count); goto out_free_bufmap; } - spin_lock_init(&bufmap->readdir_index_lock); bufmap->desc_array = kcalloc(bufmap->desc_count, sizeof(struct orangefs_bufmap_desc), @@ -294,24 +391,18 @@ int orangefs_bufmap_initialize(struct ORANGEFS_dev_map_desc *user_desc) if (__orangefs_bufmap) { spin_unlock(&orangefs_bufmap_lock); gossip_err("orangefs: error: bufmap already initialized.\n"); - ret = -EALREADY; + ret = -EINVAL; goto out_unmap_bufmap; } __orangefs_bufmap = bufmap; + install(&rw_map, + bufmap->desc_count, + bufmap->buffer_index_array); + install(&readdir_map, + ORANGEFS_READDIR_DEFAULT_DESC_COUNT, + bufmap->readdir_index_array); spin_unlock(&orangefs_bufmap_lock); - /* - * If there are operations in orangefs_bufmap_init_waitq, wake them up. - * This scenario occurs when the client-core is restarted and I/O - * requests in the in-progress or waiting tables are restarted. I/O - * requests cannot be restarted until the shared memory system is - * completely re-initialized, so we put the I/O requests in this - * waitq until initialization has completed. NOTE: the I/O requests - * are also on a timer, so they don't wait forever just in case the - * client-core doesn't come back up. - */ - wake_up_interruptible(&orangefs_bufmap_init_waitq); - gossip_debug(GOSSIP_BUFMAP_DEBUG, "orangefs_bufmap_initialize: exiting normally\n"); return 0; @@ -334,91 +425,28 @@ out: */ void orangefs_bufmap_finalize(void) { + struct orangefs_bufmap *bufmap = __orangefs_bufmap; + if (!bufmap) + return; gossip_debug(GOSSIP_BUFMAP_DEBUG, "orangefs_bufmap_finalize: called\n"); - BUG_ON(!__orangefs_bufmap); - orangefs_bufmap_unref(__orangefs_bufmap); + mark_killed(&rw_map); + mark_killed(&readdir_map); gossip_debug(GOSSIP_BUFMAP_DEBUG, "orangefs_bufmap_finalize: exiting normally\n"); } -struct slot_args { - int slot_count; - int *slot_array; - spinlock_t *slot_lock; - wait_queue_head_t *slot_wq; -}; - -static int wait_for_a_slot(struct slot_args *slargs, int *buffer_index) +void orangefs_bufmap_run_down(void) { - int ret = -1; - int i = 0; - DEFINE_WAIT(wait_entry); - - while (1) { - /* - * check for available desc, slot_lock is the appropriate - * index_lock - */ - spin_lock(slargs->slot_lock); - prepare_to_wait_exclusive(slargs->slot_wq, - &wait_entry, - TASK_INTERRUPTIBLE); - for (i = 0; i < slargs->slot_count; i++) - if (slargs->slot_array[i] == 0) { - slargs->slot_array[i] = 1; - *buffer_index = i; - ret = 0; - break; - } - spin_unlock(slargs->slot_lock); - - /* if we acquired a buffer, then break out of while */ - if (ret == 0) - break; - - if (!signal_pending(current)) { - gossip_debug(GOSSIP_BUFMAP_DEBUG, - "[BUFMAP]: waiting %d " - "seconds for a slot\n", - slot_timeout_secs); - if (!schedule_timeout(slot_timeout_secs * HZ)) { - gossip_debug(GOSSIP_BUFMAP_DEBUG, - "*** wait_for_a_slot timed out\n"); - ret = -ETIMEDOUT; - break; - } - gossip_debug(GOSSIP_BUFMAP_DEBUG, - "[BUFMAP]: woken up by a slot becoming available.\n"); - continue; - } - - gossip_debug(GOSSIP_BUFMAP_DEBUG, "orangefs: %s interrupted.\n", - __func__); - ret = -EINTR; - break; - } - - spin_lock(slargs->slot_lock); - finish_wait(slargs->slot_wq, &wait_entry); - spin_unlock(slargs->slot_lock); - return ret; -} - -static void put_back_slot(struct slot_args *slargs, int buffer_index) -{ - /* slot_lock is the appropriate index_lock */ - spin_lock(slargs->slot_lock); - if (buffer_index < 0 || buffer_index >= slargs->slot_count) { - spin_unlock(slargs->slot_lock); + struct orangefs_bufmap *bufmap = __orangefs_bufmap; + if (!bufmap) return; - } - - /* put the desc back on the queue */ - slargs->slot_array[buffer_index] = 0; - spin_unlock(slargs->slot_lock); - - /* wake up anyone who may be sleeping on the queue */ - wake_up_interruptible(slargs->slot_wq); + run_down(&rw_map); + run_down(&readdir_map); + spin_lock(&orangefs_bufmap_lock); + __orangefs_bufmap = NULL; + spin_unlock(&orangefs_bufmap_lock); + orangefs_bufmap_unmap(bufmap); + orangefs_bufmap_free(bufmap); } /* @@ -431,23 +459,12 @@ static void put_back_slot(struct slot_args *slargs, int buffer_index) */ int orangefs_bufmap_get(struct orangefs_bufmap **mapp, int *buffer_index) { - struct orangefs_bufmap *bufmap = orangefs_bufmap_ref(); - struct slot_args slargs; - int ret; - - if (!bufmap) { - gossip_err("orangefs: please confirm that pvfs2-client daemon is running.\n"); - return -EIO; + int ret = get(&rw_map); + if (ret >= 0) { + *mapp = __orangefs_bufmap; + *buffer_index = ret; + ret = 0; } - - slargs.slot_count = bufmap->desc_count; - slargs.slot_array = bufmap->buffer_index_array; - slargs.slot_lock = &bufmap->buffer_index_lock; - slargs.slot_wq = &bufmap_waitq; - ret = wait_for_a_slot(&slargs, buffer_index); - if (ret) - orangefs_bufmap_unref(bufmap); - *mapp = bufmap; return ret; } @@ -460,15 +477,7 @@ int orangefs_bufmap_get(struct orangefs_bufmap **mapp, int *buffer_index) */ void orangefs_bufmap_put(int buffer_index) { - struct slot_args slargs; - struct orangefs_bufmap *bufmap = __orangefs_bufmap; - - slargs.slot_count = bufmap->desc_count; - slargs.slot_array = bufmap->buffer_index_array; - slargs.slot_lock = &bufmap->buffer_index_lock; - slargs.slot_wq = &bufmap_waitq; - put_back_slot(&slargs, buffer_index); - orangefs_bufmap_unref(bufmap); + put(&rw_map, buffer_index); } /* @@ -484,36 +493,18 @@ void orangefs_bufmap_put(int buffer_index) */ int orangefs_readdir_index_get(struct orangefs_bufmap **mapp, int *buffer_index) { - struct orangefs_bufmap *bufmap = orangefs_bufmap_ref(); - struct slot_args slargs; - int ret; - - if (!bufmap) { - gossip_err("orangefs: please confirm that pvfs2-client daemon is running.\n"); - return -EIO; + int ret = get(&readdir_map); + if (ret >= 0) { + *mapp = __orangefs_bufmap; + *buffer_index = ret; + ret = 0; } - - slargs.slot_count = ORANGEFS_READDIR_DEFAULT_DESC_COUNT; - slargs.slot_array = bufmap->readdir_index_array; - slargs.slot_lock = &bufmap->readdir_index_lock; - slargs.slot_wq = &readdir_waitq; - ret = wait_for_a_slot(&slargs, buffer_index); - if (ret) - orangefs_bufmap_unref(bufmap); - *mapp = bufmap; return ret; } void orangefs_readdir_index_put(struct orangefs_bufmap *bufmap, int buffer_index) { - struct slot_args slargs; - - slargs.slot_count = ORANGEFS_READDIR_DEFAULT_DESC_COUNT; - slargs.slot_array = bufmap->readdir_index_array; - slargs.slot_lock = &bufmap->readdir_index_lock; - slargs.slot_wq = &readdir_waitq; - put_back_slot(&slargs, buffer_index); - orangefs_bufmap_unref(bufmap); + put(&readdir_map, buffer_index); } /* diff --git a/fs/orangefs/orangefs-bufmap.h b/fs/orangefs/orangefs-bufmap.h index 2a2d4269d03e..f0684f0085d1 100644 --- a/fs/orangefs/orangefs-bufmap.h +++ b/fs/orangefs/orangefs-bufmap.h @@ -15,10 +15,10 @@ int orangefs_bufmap_shift_query(void); int orangefs_bufmap_initialize(struct ORANGEFS_dev_map_desc *user_desc); -int orangefs_get_bufmap_init(void); - void orangefs_bufmap_finalize(void); +void orangefs_bufmap_run_down(void); + int orangefs_bufmap_get(struct orangefs_bufmap **mapp, int *buffer_index); void orangefs_bufmap_put(int buffer_index); diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index 378cdcf43252..36eedd6a8335 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -155,67 +155,6 @@ retry_servicing: * system */ goto retry_servicing; - - /* op uses shared memory */ - if (orangefs_get_bufmap_init() == 0) { - WARN_ON(1); - /* - * This operation uses the shared memory system AND - * the system is not yet ready. This situation occurs - * when the client-core is restarted AND there were - * operations waiting to be processed or were already - * in process. - */ - gossip_debug(GOSSIP_WAIT_DEBUG, - "uses_shared_memory is true.\n"); - gossip_debug(GOSSIP_WAIT_DEBUG, - "Client core in-service status(%d).\n", - is_daemon_in_service()); - gossip_debug(GOSSIP_WAIT_DEBUG, "bufmap_init:%d.\n", - orangefs_get_bufmap_init()); - gossip_debug(GOSSIP_WAIT_DEBUG, - "operation's status is 0x%0x.\n", - op->op_state); - - /* - * let process sleep for a few seconds so shared - * memory system can be initialized. - */ - prepare_to_wait(&orangefs_bufmap_init_waitq, - &wait_entry, - TASK_INTERRUPTIBLE); - - /* - * Wait for orangefs_bufmap_initialize() to wake me up - * within the allotted time. - */ - ret = schedule_timeout( - ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS * HZ); - - gossip_debug(GOSSIP_WAIT_DEBUG, - "Value returned from schedule_timeout:" - "%d.\n", - ret); - gossip_debug(GOSSIP_WAIT_DEBUG, - "Is shared memory available? (%d).\n", - orangefs_get_bufmap_init()); - - finish_wait(&orangefs_bufmap_init_waitq, &wait_entry); - - if (orangefs_get_bufmap_init() == 0) { - gossip_err("%s:The shared memory system has not started in %d seconds after the client core restarted. Aborting user's request(%s).\n", - __func__, - ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS, - get_opname_string(op)); - return -EIO; - } - - /* - * Return to the calling function and re-populate a - * shared memory buffer. - */ - return -EAGAIN; - } } out: From 82d37f19ff885ece97b8a072182e39c9dc4ead7d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 13 Feb 2016 21:04:51 -0500 Subject: [PATCH 124/174] orangefs_readdir_index_put(): get rid of bufmap argument Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/dir.c | 15 +++++++-------- fs/orangefs/orangefs-bufmap.c | 2 +- fs/orangefs/orangefs-bufmap.h | 2 +- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/fs/orangefs/dir.c b/fs/orangefs/dir.c index 6f5836d6a7a3..c9b8d0ced833 100644 --- a/fs/orangefs/dir.c +++ b/fs/orangefs/dir.c @@ -170,8 +170,7 @@ static long readdir_handle_ctor(struct readdir_handle_s *rhandle, void *buf, return ret; } -static void readdir_handle_dtor(struct orangefs_bufmap *bufmap, - struct readdir_handle_s *rhandle) +static void readdir_handle_dtor(struct readdir_handle_s *rhandle) { if (rhandle == NULL) return; @@ -181,7 +180,7 @@ static void readdir_handle_dtor(struct orangefs_bufmap *bufmap, rhandle->readdir_response.dirent_array = NULL; if (rhandle->buffer_index >= 0) { - orangefs_readdir_index_put(bufmap, rhandle->buffer_index); + orangefs_readdir_index_put(rhandle->buffer_index); rhandle->buffer_index = -1; } if (rhandle->dents_buf) { @@ -284,14 +283,14 @@ get_new_buffer_index: gossip_debug(GOSSIP_DIR_DEBUG, "%s: Getting new buffer_index for retry of readdir..\n", __func__); - orangefs_readdir_index_put(bufmap, buffer_index); + orangefs_readdir_index_put(buffer_index); goto get_new_buffer_index; } if (ret == -EIO && op_state_purged(new_op)) { gossip_err("%s: Client is down. Aborting readdir call.\n", __func__); - orangefs_readdir_index_put(bufmap, buffer_index); + orangefs_readdir_index_put(buffer_index); goto out_free_op; } @@ -299,7 +298,7 @@ get_new_buffer_index: gossip_debug(GOSSIP_DIR_DEBUG, "Readdir request failed. Status:%d\n", new_op->downcall.status); - orangefs_readdir_index_put(bufmap, buffer_index); + orangefs_readdir_index_put(buffer_index); if (ret >= 0) ret = new_op->downcall.status; goto out_free_op; @@ -314,7 +313,7 @@ get_new_buffer_index: gossip_err("orangefs_readdir: Could not decode trailer buffer into a readdir response %d\n", ret); ret = bytes_decoded; - orangefs_readdir_index_put(bufmap, buffer_index); + orangefs_readdir_index_put(buffer_index); goto out_free_op; } @@ -410,7 +409,7 @@ get_new_buffer_index: } out_destroy_handle: - readdir_handle_dtor(bufmap, &rhandle); + readdir_handle_dtor(&rhandle); out_free_op: op_release(new_op); gossip_debug(GOSSIP_DIR_DEBUG, "orangefs_readdir returning %d\n", ret); diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index 96faf4ee6529..44d437dbfce0 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -502,7 +502,7 @@ int orangefs_readdir_index_get(struct orangefs_bufmap **mapp, int *buffer_index) return ret; } -void orangefs_readdir_index_put(struct orangefs_bufmap *bufmap, int buffer_index) +void orangefs_readdir_index_put(int buffer_index) { put(&readdir_map, buffer_index); } diff --git a/fs/orangefs/orangefs-bufmap.h b/fs/orangefs/orangefs-bufmap.h index f0684f0085d1..0be62be373f7 100644 --- a/fs/orangefs/orangefs-bufmap.h +++ b/fs/orangefs/orangefs-bufmap.h @@ -25,7 +25,7 @@ void orangefs_bufmap_put(int buffer_index); int orangefs_readdir_index_get(struct orangefs_bufmap **mapp, int *buffer_index); -void orangefs_readdir_index_put(struct orangefs_bufmap *bufmap, int buffer_index); +void orangefs_readdir_index_put(int buffer_index); int orangefs_bufmap_copy_from_iovec(struct orangefs_bufmap *bufmap, struct iov_iter *iter, From 897c5df6cf8c10d2557c098641faa62f65ef8598 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 13 Feb 2016 21:06:50 -0500 Subject: [PATCH 125/174] orangefs: get rid of op->done shouldn't be needed now Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 13 ------------- fs/orangefs/file.c | 6 ++---- fs/orangefs/orangefs-cache.c | 2 -- fs/orangefs/orangefs-kernel.h | 2 -- 4 files changed, 2 insertions(+), 21 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index 790855a72e32..b27ed1cb9a36 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -424,19 +424,6 @@ wakeup: * application reading/writing this device to return until * the buffers are done being used. */ - if (op->downcall.type == ORANGEFS_VFS_OP_FILE_IO) { - long n = wait_for_completion_interruptible_timeout(&op->done, - op_timeout_secs * HZ); - if (unlikely(n < 0)) { - gossip_debug(GOSSIP_DEV_DEBUG, - "%s: signal on I/O wait, aborting\n", - __func__); - } else if (unlikely(n == 0)) { - gossip_debug(GOSSIP_DEV_DEBUG, - "%s: timed out.\n", - __func__); - } - } out: if (unlikely(op_is_cancel(op))) put_cancel(op); diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 3b1e9e83eb91..4eb009e8f19f 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -199,7 +199,7 @@ populate_shared_memory: if (orangefs_cancel_op_in_progress(new_op)) return ret; - goto done_copying; + goto out; } /* @@ -212,7 +212,7 @@ populate_shared_memory: iter, new_op->downcall.resp.io.amt_complete); if (ret < 0) - goto done_copying; + goto out; } gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): Amount written as returned by the sys-io call:%d\n", @@ -222,12 +222,10 @@ populate_shared_memory: ret = new_op->downcall.resp.io.amt_complete; -done_copying: /* * tell the device file owner waiting on I/O that this read has * completed and it can return now. */ - complete(&new_op->done); out: if (buffer_index >= 0) { diff --git a/fs/orangefs/orangefs-cache.c b/fs/orangefs/orangefs-cache.c index 09194e69875f..817092a14429 100644 --- a/fs/orangefs/orangefs-cache.c +++ b/fs/orangefs/orangefs-cache.c @@ -122,8 +122,6 @@ struct orangefs_kernel_op_s *op_alloc(__s32 type) atomic_set(&new_op->ref_count, 1); - init_completion(&new_op->done); - new_op->upcall.type = ORANGEFS_VFS_OP_INVALID; new_op->downcall.type = ORANGEFS_VFS_OP_INVALID; new_op->downcall.status = -1; diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 8613d4166d0f..1d20eadaefd8 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -205,8 +205,6 @@ struct orangefs_kernel_op_s { struct completion waitq; spinlock_t lock; - struct completion done; - atomic_t ref_count; /* VFS aio fields */ From 5253487e0445d7bc9b7488e78aa3d65d4bbb158e Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Tue, 16 Feb 2016 17:09:09 -0500 Subject: [PATCH 126/174] Orangefs: make some gossip statements more helpful. Signed-off-by: Mike Marshall --- fs/orangefs/inode.c | 4 ++-- fs/orangefs/namei.c | 44 +++++++++++++++++++++++++++-------------- fs/orangefs/waitqueue.c | 14 +++++++++---- 3 files changed, 41 insertions(+), 21 deletions(-) diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index d2923dc91388..4e923ece1e09 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -443,8 +443,8 @@ struct inode *orangefs_new_inode(struct super_block *sb, struct inode *dir, int error; gossip_debug(GOSSIP_INODE_DEBUG, - "orangefs_get_custom_inode_common: called\n" - "(sb is %p | MAJOR(dev)=%u | MINOR(dev)=%u mode=%o)\n", + "%s:(sb is %p | MAJOR(dev)=%u | MINOR(dev)=%u mode=%o)\n", + __func__, sb, MAJOR(dev), MINOR(dev), diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index 8fc55c6f58db..b3ae3749a932 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -24,7 +24,9 @@ static int orangefs_create(struct inode *dir, struct inode *inode; int ret; - gossip_debug(GOSSIP_NAME_DEBUG, "%s: called\n", __func__); + gossip_debug(GOSSIP_NAME_DEBUG, "%s: %s\n", + __func__, + dentry->d_name.name); new_op = op_alloc(ORANGEFS_VFS_OP_CREATE); if (!new_op) @@ -41,35 +43,39 @@ static int orangefs_create(struct inode *dir, ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); gossip_debug(GOSSIP_NAME_DEBUG, - "Create Got ORANGEFS handle %pU on fsid %d (ret=%d)\n", + "%s: %s: handle:%pU: fsid:%d: new_op:%p: ret:%d:\n", + __func__, + dentry->d_name.name, &new_op->downcall.resp.create.refn.khandle, - new_op->downcall.resp.create.refn.fs_id, ret); + new_op->downcall.resp.create.refn.fs_id, + new_op, + ret); - if (ret < 0) { - gossip_debug(GOSSIP_NAME_DEBUG, - "%s: failed with error code %d\n", - __func__, ret); + if (ret < 0) goto out; - } inode = orangefs_new_inode(dir->i_sb, dir, S_IFREG | mode, 0, &new_op->downcall.resp.create.refn); if (IS_ERR(inode)) { - gossip_err("*** Failed to allocate orangefs file inode\n"); + gossip_err("%s: Failed to allocate inode for file :%s:\n", + __func__, + dentry->d_name.name); ret = PTR_ERR(inode); goto out; } gossip_debug(GOSSIP_NAME_DEBUG, - "Assigned file inode new number of %pU\n", - get_khandle_from_ino(inode)); + "%s: Assigned inode :%pU: for file :%s:\n", + __func__, + get_khandle_from_ino(inode), + dentry->d_name.name); d_instantiate(dentry, inode); unlock_new_inode(inode); gossip_debug(GOSSIP_NAME_DEBUG, - "Inode (Regular File) %pU -> %s\n", - get_khandle_from_ino(inode), + "%s: dentry instantiated for %s\n", + __func__, dentry->d_name.name); SetMtimeFlag(parent); @@ -78,7 +84,11 @@ static int orangefs_create(struct inode *dir, ret = 0; out: op_release(new_op); - gossip_debug(GOSSIP_NAME_DEBUG, "%s: returning %d\n", __func__, ret); + gossip_debug(GOSSIP_NAME_DEBUG, + "%s: %s: returning %d\n", + __func__, + dentry->d_name.name, + ret); return ret; } @@ -229,7 +239,11 @@ static int orangefs_unlink(struct inode *dir, struct dentry *dentry) ret = service_operation(new_op, "orangefs_unlink", get_interruptible_flag(inode)); - /* when request is serviced properly, free req op struct */ + gossip_debug(GOSSIP_NAME_DEBUG, + "%s: service_operation returned:%d:\n", + __func__, + ret); + op_release(new_op); if (!ret) { diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index 36eedd6a8335..2c47f159d1d8 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -67,11 +67,10 @@ int service_operation(struct orangefs_kernel_op_s *op, retry_servicing: op->downcall.status = 0; gossip_debug(GOSSIP_WAIT_DEBUG, - "orangefs: service_operation: %s %p\n", + "%s: %s op:%p: process:%s: pid:%d:\n", + __func__, op_name, - op); - gossip_debug(GOSSIP_WAIT_DEBUG, - "orangefs: operation posted by process: %s, pid: %i\n", + op, current->comm, current->pid); @@ -122,6 +121,13 @@ retry_servicing: ret = wait_for_matching_downcall(op, timeout, flags & ORANGEFS_OP_INTERRUPTIBLE); + + gossip_debug(GOSSIP_WAIT_DEBUG, + "%s: wait_for_matching_downcall returned %d for %p\n", + __func__, + ret, + op); + if (!ret) { spin_unlock(&op->lock); /* got matching downcall; make sure status is in errno format */ From ddb84da38d0f050ff3582d5bb5e70cc7f2c6ef18 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Tue, 16 Feb 2016 17:10:28 -0500 Subject: [PATCH 127/174] Orangefs: remove vestigial ASYNC code Signed-off-by: Mike Marshall --- fs/orangefs/waitqueue.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index 2c47f159d1d8..d980240b0fa7 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -112,13 +112,6 @@ retry_servicing: if (!(flags & ORANGEFS_OP_NO_SEMAPHORE)) mutex_unlock(&request_mutex); - /* - * If we are asked to service an asynchronous operation from - * VFS perspective, we are done. - */ - if (flags & ORANGEFS_OP_ASYNC) - return 0; - ret = wait_for_matching_downcall(op, timeout, flags & ORANGEFS_OP_INTERRUPTIBLE); From 5964c1b83912dd5052f66ceb50634df958129981 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 18 Feb 2016 18:53:41 -0500 Subject: [PATCH 128/174] orangefs: set correct ->downcall.status on failing to copy reply from daemon ... and clean the end of control device ->write_iter() while we are at it Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 61 +++++++++++++---------------------- 1 file changed, 22 insertions(+), 39 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index b27ed1cb9a36..89c282afeb29 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -333,8 +333,7 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb, n = copy_from_iter(&op->downcall, downcall_size, iter); if (n != downcall_size) { gossip_err("%s: failed to copy downcall.\n", __func__); - ret = -EFAULT; - goto Broken; + goto Efault; } if (op->downcall.status) @@ -354,8 +353,7 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb, downcall_size, op->downcall.trailer_size, total); - ret = -EFAULT; - goto Broken; + goto Efault; } /* Only READDIR operations should have trailers. */ @@ -364,8 +362,7 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb, gossip_err("%s: %x operation with trailer.", __func__, op->downcall.type); - ret = -EFAULT; - goto Broken; + goto Efault; } /* READDIR operations should always have trailers. */ @@ -374,8 +371,7 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb, gossip_err("%s: %x operation with no trailer.", __func__, op->downcall.type); - ret = -EFAULT; - goto Broken; + goto Efault; } if (op->downcall.type != ORANGEFS_VFS_OP_READDIR) @@ -386,8 +382,7 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb, if (op->downcall.trailer_buf == NULL) { gossip_err("%s: failed trailer vmalloc.\n", __func__); - ret = -ENOMEM; - goto Broken; + goto Enomem; } memset(op->downcall.trailer_buf, 0, op->downcall.trailer_size); n = copy_from_iter(op->downcall.trailer_buf, @@ -396,8 +391,7 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb, if (n != op->downcall.trailer_size) { gossip_err("%s: failed to copy trailer.\n", __func__); vfree(op->downcall.trailer_buf); - ret = -EFAULT; - goto Broken; + goto Efault; } wakeup: @@ -406,38 +400,27 @@ wakeup: * that this op is done */ spin_lock(&op->lock); - if (unlikely(op_state_given_up(op))) { + if (unlikely(op_is_cancel(op))) { spin_unlock(&op->lock); - goto out; - } - set_op_state_serviced(op); - spin_unlock(&op->lock); - - /* - * If this operation is an I/O operation we need to wait - * for all data to be copied before we can return to avoid - * buffer corruption and races that can pull the buffers - * out from under us. - * - * Essentially we're synchronizing with other parts of the - * vfs implicitly by not allowing the user space - * application reading/writing this device to return until - * the buffers are done being used. - */ -out: - if (unlikely(op_is_cancel(op))) put_cancel(op); + } else if (unlikely(op_state_given_up(op))) { + spin_unlock(&op->lock); + } else { + set_op_state_serviced(op); + spin_unlock(&op->lock); + } op_release(op); return ret; -Broken: - spin_lock(&op->lock); - if (!op_state_given_up(op)) { - op->downcall.status = ret; - set_op_state_serviced(op); - } - spin_unlock(&op->lock); - goto out; +Efault: + op->downcall.status = -(ORANGEFS_ERROR_BIT | 9); + ret = -EFAULT; + goto wakeup; + +Enomem: + op->downcall.status = -(ORANGEFS_ERROR_BIT | 8); + ret = -ENOMEM; + goto wakeup; } /* Returns whether any FS are still pending remounted */ From 05a50a5be897004b6c1399645256bcf2e768b4ef Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 18 Feb 2016 18:59:44 -0500 Subject: [PATCH 129/174] orangefs: have ..._clean_interrupted_...() wait for copy to/from daemon * turn all those list_del(&op->list) into list_del_init() * don't pick ops that are already given up in control device ->read()/->write_iter(). * have orangefs_clean_interrupted_operation() notice if op is currently being copied to/from daemon (by said ->read()/->write_iter()) and wait for that to finish. * when we are done copying to/from daemon and find that it had been given up while we were doing that, wake the waiting ..._clean_interrupted_... As the result, we are guaranteed that orangefs_clean_interrupted_operation(op) doesn't return until nobody else can see op. Moreover, we don't need to play with op refcounts anymore. Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 20 ++++++++++---------- fs/orangefs/orangefs-kernel.h | 2 +- fs/orangefs/waitqueue.c | 22 ++++++++++------------ 3 files changed, 21 insertions(+), 23 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index 89c282afeb29..f7914f5d296f 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -58,9 +58,9 @@ static struct orangefs_kernel_op_s *orangefs_devreq_remove_op(__u64 tag) next, &htable_ops_in_progress[index], list) { - if (op->tag == tag && !op_state_purged(op)) { + if (op->tag == tag && !op_state_purged(op) && + !op_state_given_up(op)) { list_del_init(&op->list); - get_op(op); /* increase ref count. */ spin_unlock(&htable_ops_in_progress_lock); return op; } @@ -133,7 +133,7 @@ restart: __s32 fsid; /* This lock is held past the end of the loop when we break. */ spin_lock(&op->lock); - if (unlikely(op_state_purged(op))) { + if (unlikely(op_state_purged(op) || op_state_given_up(op))) { spin_unlock(&op->lock); continue; } @@ -199,13 +199,12 @@ restart: */ if (op_state_in_progress(cur_op) || op_state_serviced(cur_op)) { gossip_err("orangefs: ERROR: Current op already queued.\n"); - list_del(&cur_op->list); + list_del_init(&cur_op->list); spin_unlock(&cur_op->lock); spin_unlock(&orangefs_request_list_lock); return -EAGAIN; } list_del_init(&cur_op->list); - get_op(op); spin_unlock(&orangefs_request_list_lock); spin_unlock(&cur_op->lock); @@ -230,7 +229,7 @@ restart: if (unlikely(op_state_given_up(cur_op))) { spin_unlock(&cur_op->lock); spin_unlock(&htable_ops_in_progress_lock); - op_release(cur_op); + complete(&cur_op->waitq); goto restart; } @@ -242,7 +241,6 @@ restart: orangefs_devreq_add_op(cur_op); spin_unlock(&cur_op->lock); spin_unlock(&htable_ops_in_progress_lock); - op_release(cur_op); /* The client only asks to read one size buffer. */ return MAX_DEV_REQ_UPSIZE; @@ -258,10 +256,12 @@ error: if (likely(!op_state_given_up(cur_op))) { set_op_state_waiting(cur_op); list_add(&cur_op->list, &orangefs_request_list); + spin_unlock(&cur_op->lock); + } else { + spin_unlock(&cur_op->lock); + complete(&cur_op->waitq); } - spin_unlock(&cur_op->lock); spin_unlock(&orangefs_request_list_lock); - op_release(cur_op); return -EFAULT; } @@ -405,11 +405,11 @@ wakeup: put_cancel(op); } else if (unlikely(op_state_given_up(op))) { spin_unlock(&op->lock); + complete(&op->waitq); } else { set_op_state_serviced(op); spin_unlock(&op->lock); } - op_release(op); return ret; Efault: diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 1d20eadaefd8..7d0c8b3afc7e 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -259,7 +259,7 @@ static inline void set_op_state_purged(struct orangefs_kernel_op_s *op) { spin_lock(&op->lock); if (unlikely(op_is_cancel(op))) { - list_del(&op->list); + list_del_init(&op->list); spin_unlock(&op->lock); put_cancel(op); } else { diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index d980240b0fa7..3f9e43066444 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -208,15 +208,20 @@ static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s * Called with op->lock held. */ op->op_state |= OP_VFS_STATE_GIVEN_UP; - - if (op_state_waiting(op)) { + /* from that point on it can't be moved by anybody else */ + if (list_empty(&op->list)) { + /* caught copying to/from daemon */ + BUG_ON(op_state_serviced(op)); + spin_unlock(&op->lock); + wait_for_completion(&op->waitq); + } else if (op_state_waiting(op)) { /* * upcall hasn't been read; remove op from upcall request * list. */ spin_unlock(&op->lock); spin_lock(&orangefs_request_list_lock); - list_del(&op->list); + list_del_init(&op->list); spin_unlock(&orangefs_request_list_lock); gossip_debug(GOSSIP_WAIT_DEBUG, "Interrupted: Removed op %p from request_list\n", @@ -225,23 +230,16 @@ static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s /* op must be removed from the in progress htable */ spin_unlock(&op->lock); spin_lock(&htable_ops_in_progress_lock); - list_del(&op->list); + list_del_init(&op->list); spin_unlock(&htable_ops_in_progress_lock); gossip_debug(GOSSIP_WAIT_DEBUG, "Interrupted: Removed op %p" " from htable_ops_in_progress\n", op); - } else if (!op_state_serviced(op)) { + } else { spin_unlock(&op->lock); gossip_err("interrupted operation is in a weird state 0x%x\n", op->op_state); - } else { - /* - * It is not intended for execution to flow here, - * but having this unlock here makes sparse happy. - */ - gossip_err("%s: can't get here.\n", __func__); - spin_unlock(&op->lock); } reinit_completion(&op->waitq); } From c1223ca48baa867e9abc77fbb7f97500dc2a0cf8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 18 Feb 2016 19:17:51 -0500 Subject: [PATCH 130/174] orangefs: get rid of op refcounts not needed anymore Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-cache.c | 4 +--- fs/orangefs/orangefs-kernel.h | 20 +------------------- 2 files changed, 2 insertions(+), 22 deletions(-) diff --git a/fs/orangefs/orangefs-cache.c b/fs/orangefs/orangefs-cache.c index 817092a14429..900a2e38e11b 100644 --- a/fs/orangefs/orangefs-cache.c +++ b/fs/orangefs/orangefs-cache.c @@ -120,8 +120,6 @@ struct orangefs_kernel_op_s *op_alloc(__s32 type) spin_lock_init(&new_op->lock); init_completion(&new_op->waitq); - atomic_set(&new_op->ref_count, 1); - new_op->upcall.type = ORANGEFS_VFS_OP_INVALID; new_op->downcall.type = ORANGEFS_VFS_OP_INVALID; new_op->downcall.status = -1; @@ -149,7 +147,7 @@ struct orangefs_kernel_op_s *op_alloc(__s32 type) return new_op; } -void __op_release(struct orangefs_kernel_op_s *orangefs_op) +void op_release(struct orangefs_kernel_op_s *orangefs_op) { if (orangefs_op) { gossip_debug(GOSSIP_CACHE_DEBUG, diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 7d0c8b3afc7e..6290c24d8270 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -205,8 +205,6 @@ struct orangefs_kernel_op_s { struct completion waitq; spinlock_t lock; - atomic_t ref_count; - /* VFS aio fields */ int attempts; @@ -230,23 +228,7 @@ static inline void set_op_state_serviced(struct orangefs_kernel_op_s *op) #define op_state_given_up(op) ((op)->op_state & OP_VFS_STATE_GIVEN_UP) #define op_is_cancel(op) ((op)->upcall.type == ORANGEFS_VFS_OP_CANCEL) -static inline void get_op(struct orangefs_kernel_op_s *op) -{ - atomic_inc(&op->ref_count); - gossip_debug(GOSSIP_DEV_DEBUG, - "(get) Alloced OP (%p:%llu)\n", op, llu(op->tag)); -} - -void __op_release(struct orangefs_kernel_op_s *op); - -static inline void op_release(struct orangefs_kernel_op_s *op) -{ - if (atomic_dec_and_test(&op->ref_count)) { - gossip_debug(GOSSIP_DEV_DEBUG, - "(put) Releasing OP (%p:%llu)\n", op, llu((op)->tag)); - __op_release(op); - } -} +void op_release(struct orangefs_kernel_op_s *op); extern void orangefs_bufmap_put(int); static inline void put_cancel(struct orangefs_kernel_op_s *op) From d37c0f307adb1d15712cb8d3cec23d81389c1937 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Wed, 24 Feb 2016 13:24:14 -0500 Subject: [PATCH 131/174] Orangefs: clean up orangefs_kernel_op_s comments. Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-kernel.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 6290c24d8270..4ed64e555ca0 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -188,11 +188,12 @@ struct orangefs_kernel_op_s { __u64 tag; /* - * Set uses_shared_memory to 1 if this operation uses shared memory. - * If true, then a retry on the op must also get a new shared memory - * buffer and re-populate it. Cancels don't care - it only matters - * for service_operation() retry logics and cancels don't go through - * it anymore. + * Set uses_shared_memory to non zero if this operation uses + * shared memory. If true, then a retry on the op must also + * get a new shared memory buffer and re-populate it. + * Cancels don't care - it only matters for service_operation() + * retry logics and cancels don't go through it anymore. It + * safely stays non-zero when we use it as slot_to_free. */ union { int uses_shared_memory; @@ -205,8 +206,6 @@ struct orangefs_kernel_op_s { struct completion waitq; spinlock_t lock; - /* VFS aio fields */ - int attempts; struct list_head list; From adcf34a2893386c99e80feee36e30a782b3815e7 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Wed, 24 Feb 2016 16:54:27 -0500 Subject: [PATCH 132/174] Orangefs: code sanitation Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 2 +- fs/orangefs/orangefs-kernel.h | 2 +- fs/orangefs/super.c | 4 ++-- fs/orangefs/waitqueue.c | 35 +++++++++++++++++++++++++---------- 4 files changed, 29 insertions(+), 14 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index f7914f5d296f..0db3a57f974d 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -590,7 +590,7 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg) * remount all mounted orangefs volumes to regain the lost * dynamic mount tables (if any) -- NOTE: this is done * without keeping the superblock list locked due to the - * upcall/downcall waiting. also, the request semaphore is + * upcall/downcall waiting. also, the request mutex is * used to ensure that no operations will be serviced until * all of the remounts are serviced (to avoid ops between * mounts to fail) diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 4ed64e555ca0..c3b3b22115eb 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -603,7 +603,7 @@ extern wait_queue_head_t orangefs_bufmap_init_waitq; #define ORANGEFS_OP_INTERRUPTIBLE 1 /* service_operation() is interruptible */ #define ORANGEFS_OP_PRIORITY 2 /* service_operation() is high priority */ #define ORANGEFS_OP_CANCELLATION 4 /* this is a cancellation */ -#define ORANGEFS_OP_NO_SEMAPHORE 8 /* don't acquire semaphore */ +#define ORANGEFS_OP_NO_MUTEX 8 /* don't acquire request_mutex */ #define ORANGEFS_OP_ASYNC 16 /* Queue it, but don't wait */ int service_operation(struct orangefs_kernel_op_s *op, diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 93cc352be360..65ddc74e96b6 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -229,12 +229,12 @@ int orangefs_remount(struct super_block *sb) new_op->upcall.req.fs_mount.orangefs_config_server); /* - * we assume that the calling function has already acquire the + * we assume that the calling function has already acquired the * request_mutex to prevent other operations from bypassing * this one */ ret = service_operation(new_op, "orangefs_remount", - ORANGEFS_OP_PRIORITY | ORANGEFS_OP_NO_SEMAPHORE); + ORANGEFS_OP_PRIORITY | ORANGEFS_OP_NO_MUTEX); gossip_debug(GOSSIP_SUPER_DEBUG, "orangefs_remount: mount got return value of %d\n", ret); diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index 3f9e43066444..1eadf69cc919 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -56,7 +56,6 @@ int service_operation(struct orangefs_kernel_op_s *op, int flags) { long timeout = MAX_SCHEDULE_TIMEOUT; - /* flags to modify behavior */ int ret = 0; DEFINE_WAIT(wait_entry); @@ -74,14 +73,20 @@ retry_servicing: current->comm, current->pid); - if (!(flags & ORANGEFS_OP_NO_SEMAPHORE)) { + /* + * If ORANGEFS_OP_NO_MUTEX was set in flags, we need to avoid + * aquiring the request_mutex because we're servicing a + * high priority remount operation and the request_mutex is + * already taken. + */ + if (!(flags & ORANGEFS_OP_NO_MUTEX)) { if (flags & ORANGEFS_OP_INTERRUPTIBLE) ret = mutex_lock_interruptible(&request_mutex); else ret = mutex_lock_killable(&request_mutex); /* * check to see if we were interrupted while waiting for - * semaphore + * mutex */ if (ret < 0) { op->downcall.status = ret; @@ -95,6 +100,7 @@ retry_servicing: spin_lock(&orangefs_request_list_lock); spin_lock(&op->lock); set_op_state_waiting(op); + /* add high priority remount op to the front of the line. */ if (flags & ORANGEFS_OP_PRIORITY) list_add(&op->list, &orangefs_request_list); else @@ -109,7 +115,7 @@ retry_servicing: } spin_unlock(&orangefs_request_list_lock); - if (!(flags & ORANGEFS_OP_NO_SEMAPHORE)) + if (!(flags & ORANGEFS_OP_NO_MUTEX)) mutex_unlock(&request_mutex); ret = wait_for_matching_downcall(op, timeout, @@ -132,10 +138,17 @@ retry_servicing: /* failed to get matching downcall */ if (ret == -ETIMEDOUT) { - gossip_err("orangefs: %s -- wait timed out; aborting attempt.\n", + gossip_err("%s: %s -- wait timed out; aborting attempt.\n", + __func__, op_name); } + + /* + * remove waiting ops from the request list or + * remove in-progress ops from the in-progress list. + */ orangefs_clean_up_interrupted_operation(op); + op->downcall.status = ret; /* retry if operation has not been serviced and if requested */ if (ret == -EAGAIN) { @@ -148,11 +161,12 @@ retry_servicing: op_name, op->attempts); + /* + * io ops (ops that use the shared memory buffer) have + * to be returned to their caller for a retry. Other ops + * can just be recycled here. + */ if (!op->uses_shared_memory) - /* - * this operation doesn't use the shared memory - * system - */ goto retry_servicing; } @@ -268,7 +282,8 @@ static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op, long n; if (interruptible) - n = wait_for_completion_interruptible_timeout(&op->waitq, timeout); + n = wait_for_completion_interruptible_timeout(&op->waitq, + timeout); else n = wait_for_completion_killable_timeout(&op->waitq, timeout); From ee3b8d377ca014b0ec4ea8988ba6fbe8463bd8d9 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Wed, 17 Feb 2016 12:55:42 -0500 Subject: [PATCH 133/174] orangefs: free readdir buffer index before the dir_emit loop We only need it while the service operation is actually in progress since it is only used to co-ordinate the client-core's memory use. The kernel allocates its own space. Also clean up some comments which mislead the reader into thinking the readdir buffers are shared memory. Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/dir.c | 34 +++++++++------------------------- 1 file changed, 9 insertions(+), 25 deletions(-) diff --git a/fs/orangefs/dir.c b/fs/orangefs/dir.c index c9b8d0ced833..43e3aeb529ea 100644 --- a/fs/orangefs/dir.c +++ b/fs/orangefs/dir.c @@ -9,7 +9,6 @@ #include "orangefs-bufmap.h" struct readdir_handle_s { - int buffer_index; struct orangefs_readdir_response_s readdir_response; void *dents_buf; }; @@ -143,7 +142,7 @@ out: } static long readdir_handle_ctor(struct readdir_handle_s *rhandle, void *buf, - size_t size, int buffer_index) + size_t size) { long ret; @@ -152,17 +151,10 @@ static long readdir_handle_ctor(struct readdir_handle_s *rhandle, void *buf, ("Invalid NULL buffer specified in readdir_handle_ctor\n"); return -ENOMEM; } - if (buffer_index < 0) { - gossip_err - ("Invalid buffer index specified in readdir_handle_ctor\n"); - return -EINVAL; - } - rhandle->buffer_index = buffer_index; rhandle->dents_buf = buf; ret = decode_dirents(buf, size, &rhandle->readdir_response); if (ret < 0) { gossip_err("Could not decode readdir from buffer %ld\n", ret); - rhandle->buffer_index = -1; gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", buf); vfree(buf); rhandle->dents_buf = NULL; @@ -179,10 +171,6 @@ static void readdir_handle_dtor(struct readdir_handle_s *rhandle) kfree(rhandle->readdir_response.dirent_array); rhandle->readdir_response.dirent_array = NULL; - if (rhandle->buffer_index >= 0) { - orangefs_readdir_index_put(rhandle->buffer_index); - rhandle->buffer_index = -1; - } if (rhandle->dents_buf) { gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", rhandle->dents_buf); @@ -236,7 +224,6 @@ static int orangefs_readdir(struct file *file, struct dir_context *ctx) "orangefs_readdir called on %s (pos=%llu)\n", dentry->d_name.name, llu(pos)); - rhandle.buffer_index = -1; rhandle.dents_buf = NULL; memset(&rhandle.readdir_response, 0, sizeof(rhandle.readdir_response)); @@ -244,6 +231,10 @@ static int orangefs_readdir(struct file *file, struct dir_context *ctx) if (!new_op) return -ENOMEM; + /* + * Only the indices are shared. No memory is actually shared, but the + * mechanism is used. + */ new_op->uses_shared_memory = 1; new_op->upcall.req.readdir.refn = orangefs_inode->refn; new_op->upcall.req.readdir.max_dirent_count = @@ -274,23 +265,19 @@ get_new_buffer_index: new_op->downcall.status, ret); + orangefs_readdir_index_put(buffer_index); + if (ret == -EAGAIN && op_state_purged(new_op)) { - /* - * readdir shared memory aread has been wiped due to - * pvfs2-client-core restarting, so we must get a new - * index into the shared memory. - */ + /* Client-core indices are invalid after it restarted. */ gossip_debug(GOSSIP_DIR_DEBUG, "%s: Getting new buffer_index for retry of readdir..\n", __func__); - orangefs_readdir_index_put(buffer_index); goto get_new_buffer_index; } if (ret == -EIO && op_state_purged(new_op)) { gossip_err("%s: Client is down. Aborting readdir call.\n", __func__); - orangefs_readdir_index_put(buffer_index); goto out_free_op; } @@ -298,7 +285,6 @@ get_new_buffer_index: gossip_debug(GOSSIP_DIR_DEBUG, "Readdir request failed. Status:%d\n", new_op->downcall.status); - orangefs_readdir_index_put(buffer_index); if (ret >= 0) ret = new_op->downcall.status; goto out_free_op; @@ -307,13 +293,11 @@ get_new_buffer_index: bytes_decoded = readdir_handle_ctor(&rhandle, new_op->downcall.trailer_buf, - new_op->downcall.trailer_size, - buffer_index); + new_op->downcall.trailer_size); if (bytes_decoded < 0) { gossip_err("orangefs_readdir: Could not decode trailer buffer into a readdir response %d\n", ret); ret = bytes_decoded; - orangefs_readdir_index_put(buffer_index); goto out_free_op; } From ee70fca0bc9a4a85c239e4f08b7ebf8351d2a733 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Sat, 20 Feb 2016 13:10:47 -0500 Subject: [PATCH 134/174] orangefs: don't d_drop in d_revalidate since the caller will Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/dcache.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c index a6911dbbf3e5..3c1703fbb60e 100644 --- a/fs/orangefs/dcache.c +++ b/fs/orangefs/dcache.c @@ -82,7 +82,6 @@ out_put_parent: out_drop: gossip_debug(GOSSIP_DCACHE_DEBUG, "%s:%s:%d revalidate failed\n", __FILE__, __func__, __LINE__); - d_drop(dentry); goto out_release_op; } @@ -109,10 +108,8 @@ static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags) * If this passes, the positive dentry still exists or the negative * dentry still does not exist. */ - if (!orangefs_revalidate_lookup(dentry)) { - d_drop(dentry); + if (!orangefs_revalidate_lookup(dentry)) return 0; - } /* We do not need to continue with negative dentries. */ if (!dentry->d_inode) @@ -125,13 +122,10 @@ static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags) if (ret < 0) { gossip_debug(GOSSIP_DCACHE_DEBUG, "%s:%s:%d getattr failure.\n", __FILE__, __func__, __LINE__); - d_drop(dentry); return 0; } - if (ret == 0) { - d_drop(dentry); + if (ret == 0) return 0; - } out: gossip_debug(GOSSIP_DCACHE_DEBUG, From 47b4948fdb1055cd8d8f86aebb0b3fcb06ba36d5 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Sat, 20 Feb 2016 14:22:40 -0500 Subject: [PATCH 135/174] orangefs: use ORANGEFS_NAME_LEN everywhere; remove ORANGEFS_NAME_MAX Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/dcache.c | 2 +- fs/orangefs/downcall.h | 2 +- fs/orangefs/namei.c | 18 +++++++++--------- fs/orangefs/orangefs-dev-proto.h | 1 - fs/orangefs/super.c | 2 +- fs/orangefs/upcall.h | 16 ++++++++-------- 6 files changed, 20 insertions(+), 21 deletions(-) diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c index 3c1703fbb60e..2de92b71d861 100644 --- a/fs/orangefs/dcache.c +++ b/fs/orangefs/dcache.c @@ -32,7 +32,7 @@ static int orangefs_revalidate_lookup(struct dentry *dentry) new_op->upcall.req.lookup.parent_refn = parent->refn; strncpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name, - ORANGEFS_NAME_LEN); + ORANGEFS_NAME_MAX); gossip_debug(GOSSIP_DCACHE_DEBUG, "%s:%s:%d interrupt flag [%d]\n", diff --git a/fs/orangefs/downcall.h b/fs/orangefs/downcall.h index 72d4cac54821..66b99210f1f9 100644 --- a/fs/orangefs/downcall.h +++ b/fs/orangefs/downcall.h @@ -33,7 +33,7 @@ struct orangefs_symlink_response { struct orangefs_getattr_response { struct ORANGEFS_sys_attr_s attributes; - char link_target[ORANGEFS_NAME_LEN]; + char link_target[ORANGEFS_NAME_MAX]; }; struct orangefs_mkdir_response { diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index b3ae3749a932..f26f6694c48d 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -38,7 +38,7 @@ static int orangefs_create(struct inode *dir, ORANGEFS_TYPE_METAFILE, mode); strncpy(new_op->upcall.req.create.d_name, - dentry->d_name.name, ORANGEFS_NAME_LEN); + dentry->d_name.name, ORANGEFS_NAME_MAX); ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); @@ -116,7 +116,7 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry, gossip_debug(GOSSIP_NAME_DEBUG, "%s called on %s\n", __func__, dentry->d_name.name); - if (dentry->d_name.len > (ORANGEFS_NAME_LEN - 1)) + if (dentry->d_name.len > (ORANGEFS_NAME_MAX - 1)) return ERR_PTR(-ENAMETOOLONG); new_op = op_alloc(ORANGEFS_VFS_OP_LOOKUP); @@ -133,7 +133,7 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry, new_op->upcall.req.lookup.parent_refn = parent->refn; strncpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name, - ORANGEFS_NAME_LEN); + ORANGEFS_NAME_MAX); gossip_debug(GOSSIP_NAME_DEBUG, "%s: doing lookup on %s under %pU,%d (follow=%s)\n", @@ -234,7 +234,7 @@ static int orangefs_unlink(struct inode *dir, struct dentry *dentry) new_op->upcall.req.remove.parent_refn = parent->refn; strncpy(new_op->upcall.req.remove.d_name, dentry->d_name.name, - ORANGEFS_NAME_LEN); + ORANGEFS_NAME_MAX); ret = service_operation(new_op, "orangefs_unlink", get_interruptible_flag(inode)); @@ -283,8 +283,8 @@ static int orangefs_symlink(struct inode *dir, strncpy(new_op->upcall.req.sym.entry_name, dentry->d_name.name, - ORANGEFS_NAME_LEN); - strncpy(new_op->upcall.req.sym.target, symname, ORANGEFS_NAME_LEN); + ORANGEFS_NAME_MAX); + strncpy(new_op->upcall.req.sym.target, symname, ORANGEFS_NAME_MAX); ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); @@ -347,7 +347,7 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode ORANGEFS_TYPE_DIRECTORY, mode); strncpy(new_op->upcall.req.mkdir.d_name, - dentry->d_name.name, ORANGEFS_NAME_LEN); + dentry->d_name.name, ORANGEFS_NAME_MAX); ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); @@ -420,10 +420,10 @@ static int orangefs_rename(struct inode *old_dir, strncpy(new_op->upcall.req.rename.d_old_name, old_dentry->d_name.name, - ORANGEFS_NAME_LEN); + ORANGEFS_NAME_MAX); strncpy(new_op->upcall.req.rename.d_new_name, new_dentry->d_name.name, - ORANGEFS_NAME_LEN); + ORANGEFS_NAME_MAX); ret = service_operation(new_op, "orangefs_rename", diff --git a/fs/orangefs/orangefs-dev-proto.h b/fs/orangefs/orangefs-dev-proto.h index 5a8725a88eac..9eac9d9a3f3a 100644 --- a/fs/orangefs/orangefs-dev-proto.h +++ b/fs/orangefs/orangefs-dev-proto.h @@ -46,7 +46,6 @@ * Misc constants. Please retain them as multiples of 8! * Otherwise 32-64 bit interactions will be messed up :) */ -#define ORANGEFS_NAME_LEN 0x00000100 #define ORANGEFS_MAX_DEBUG_STRING_LEN 0x00000400 #define ORANGEFS_MAX_DEBUG_ARRAY_LEN 0x00000800 diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 65ddc74e96b6..eac24eb7fe80 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -170,7 +170,7 @@ static int orangefs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_type = sb->s_magic; memcpy(&buf->f_fsid, &ORANGEFS_SB(sb)->fs_id, sizeof(buf->f_fsid)); buf->f_bsize = new_op->downcall.resp.statfs.block_size; - buf->f_namelen = ORANGEFS_NAME_LEN; + buf->f_namelen = ORANGEFS_NAME_MAX; buf->f_blocks = (sector_t) new_op->downcall.resp.statfs.blocks_total; buf->f_bfree = (sector_t) new_op->downcall.resp.statfs.blocks_avail; diff --git a/fs/orangefs/upcall.h b/fs/orangefs/upcall.h index 781cbc38523a..af6dcac180ab 100644 --- a/fs/orangefs/upcall.h +++ b/fs/orangefs/upcall.h @@ -27,20 +27,20 @@ struct orangefs_lookup_request_s { __s32 sym_follow; __s32 __pad1; struct orangefs_object_kref parent_refn; - char d_name[ORANGEFS_NAME_LEN]; + char d_name[ORANGEFS_NAME_MAX]; }; struct orangefs_create_request_s { struct orangefs_object_kref parent_refn; struct ORANGEFS_sys_attr_s attributes; - char d_name[ORANGEFS_NAME_LEN]; + char d_name[ORANGEFS_NAME_MAX]; }; struct orangefs_symlink_request_s { struct orangefs_object_kref parent_refn; struct ORANGEFS_sys_attr_s attributes; - char entry_name[ORANGEFS_NAME_LEN]; - char target[ORANGEFS_NAME_LEN]; + char entry_name[ORANGEFS_NAME_MAX]; + char target[ORANGEFS_NAME_MAX]; }; struct orangefs_getattr_request_s { @@ -56,13 +56,13 @@ struct orangefs_setattr_request_s { struct orangefs_remove_request_s { struct orangefs_object_kref parent_refn; - char d_name[ORANGEFS_NAME_LEN]; + char d_name[ORANGEFS_NAME_MAX]; }; struct orangefs_mkdir_request_s { struct orangefs_object_kref parent_refn; struct ORANGEFS_sys_attr_s attributes; - char d_name[ORANGEFS_NAME_LEN]; + char d_name[ORANGEFS_NAME_MAX]; }; struct orangefs_readdir_request_s { @@ -84,8 +84,8 @@ struct orangefs_readdirplus_request_s { struct orangefs_rename_request_s { struct orangefs_object_kref old_parent_refn; struct orangefs_object_kref new_parent_refn; - char d_old_name[ORANGEFS_NAME_LEN]; - char d_new_name[ORANGEFS_NAME_LEN]; + char d_old_name[ORANGEFS_NAME_MAX]; + char d_new_name[ORANGEFS_NAME_MAX]; }; struct orangefs_statfs_request_s { From 9c2bcf288ed7f600bc3f8e7541ea0522a8912bae Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Sat, 20 Feb 2016 14:26:01 -0500 Subject: [PATCH 136/174] orangefs: remove vestigial async io code I have verified that there is nothing in the userspace daemon version we are implementing this protocol against that ever looks at this field. Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/file.c | 1 - fs/orangefs/orangefs-kernel.h | 8 -------- fs/orangefs/upcall.h | 4 ++-- 3 files changed, 2 insertions(+), 11 deletions(-) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 4eb009e8f19f..399d5288dc1a 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -102,7 +102,6 @@ static ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inod return -ENOMEM; /* synchronous I/O */ - new_op->upcall.req.io.async_vfs_io = ORANGEFS_VFS_SYNC_IO; new_op->upcall.req.io.readahead_size = readahead_size; new_op->upcall.req.io.io_type = type; new_op->upcall.req.io.refn = orangefs_inode->refn; diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index c3b3b22115eb..91cbd53f782c 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -100,14 +100,6 @@ enum orangefs_vfs_op_states { OP_VFS_STATE_GIVEN_UP = 16, }; -/* - * Defines for controlling whether I/O upcalls are for async or sync operations - */ -enum ORANGEFS_async_io_type { - ORANGEFS_VFS_SYNC_IO = 0, - ORANGEFS_VFS_ASYNC_IO = 1, -}; - /* * An array of client_debug_mask will be built to hold debug keyword/mask * values fetched from userspace. diff --git a/fs/orangefs/upcall.h b/fs/orangefs/upcall.h index af6dcac180ab..001b20239407 100644 --- a/fs/orangefs/upcall.h +++ b/fs/orangefs/upcall.h @@ -13,10 +13,10 @@ * client-core and device */ struct orangefs_io_request_s { - __s32 async_vfs_io; + __s32 __pad1; __s32 buf_index; __s32 count; - __s32 __pad1; + __s32 __pad2; __s64 offset; struct orangefs_object_kref refn; enum ORANGEFS_io_type io_type; From 6ceaf7818f266d917ed61338885ddd2b77008f06 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Sat, 20 Feb 2016 14:47:13 -0500 Subject: [PATCH 137/174] orangefs: we never lookup with sym_follow set Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/namei.c | 6 ++---- fs/orangefs/protocol.h | 1 - 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index f26f6694c48d..650ff299738b 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -136,13 +136,11 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry, ORANGEFS_NAME_MAX); gossip_debug(GOSSIP_NAME_DEBUG, - "%s: doing lookup on %s under %pU,%d (follow=%s)\n", + "%s: doing lookup on %s under %pU,%d\n", __func__, new_op->upcall.req.lookup.d_name, &new_op->upcall.req.lookup.parent_refn.khandle, - new_op->upcall.req.lookup.parent_refn.fs_id, - ((new_op->upcall.req.lookup.sym_follow == - ORANGEFS_LOOKUP_LINK_FOLLOW) ? "yes" : "no")); + new_op->upcall.req.lookup.parent_refn.fs_id); ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); diff --git a/fs/orangefs/protocol.h b/fs/orangefs/protocol.h index 6ac0c60c9f5e..45ce4ff4cbc7 100644 --- a/fs/orangefs/protocol.h +++ b/fs/orangefs/protocol.h @@ -339,7 +339,6 @@ struct ORANGEFS_sys_attr_s { }; #define ORANGEFS_LOOKUP_LINK_NO_FOLLOW 0 -#define ORANGEFS_LOOKUP_LINK_FOLLOW 1 /* pint-dev.h ***************************************************************/ From 69a23de2f3de046f1017489eb9e6de4e8165e4f0 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Sat, 20 Feb 2016 15:28:58 -0500 Subject: [PATCH 138/174] orangefs: clean up fill_default_sys_attrs Size and type are read-only and not in the mask. The times were left unset despite being in the mask. We zero-fill the times since the server will fill them in and we will get the correct time when we fill the inode with getattr. Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-kernel.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 91cbd53f782c..785c9a4ef834 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -645,9 +645,10 @@ do { \ do { \ sys_attr.owner = from_kuid(current_user_ns(), current_fsuid()); \ sys_attr.group = from_kgid(current_user_ns(), current_fsgid()); \ - sys_attr.size = 0; \ sys_attr.perms = ORANGEFS_util_translate_mode(mode); \ - sys_attr.objtype = type; \ + sys_attr.mtime = 0; \ + sys_attr.atime = 0; \ + sys_attr.ctime = 0; \ sys_attr.mask = ORANGEFS_ATTR_SYS_ALL_SETABLE; \ } while (0) From be81ce48b262e2164d64a1354c618571b0c9cd09 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 26 Feb 2016 13:54:10 +0100 Subject: [PATCH 139/174] orangefs: avoid time conversion function The new orangefs code uses a helper function to read a time field to its private structures from struct iattr. This will conflict with the move to 64-bit timestamps in the kernel and is generally not necessary. This replaces the conversion with a simple cast to time64_t that shows what is going on. As the orangefs-internal representation already uses 64-bit timestamps, there should be no ambiguity to negative values, and the cast ensures that we treat them as times before 1970 on both 32-bit and 64-bit architectures, rather than times after 2038. This patch keeps that behavior. Signed-off-by: Arnd Bergmann Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-kernel.h | 5 ----- fs/orangefs/orangefs-utils.c | 12 +++++------- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 785c9a4ef834..b6f52e3fee7f 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -555,11 +555,6 @@ int orangefs_unmount_sb(struct super_block *sb); bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op); -static inline __u64 orangefs_convert_time_field(const struct timespec *ts) -{ - return (__u64)ts->tv_sec; -} - int orangefs_normalize_to_errno(__s32 error_code); extern struct mutex devreq_mutex; diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c index 488f3501b09c..8ef9e9646748 100644 --- a/fs/orangefs/orangefs-utils.c +++ b/fs/orangefs/orangefs-utils.c @@ -202,9 +202,9 @@ static int copy_attributes_to_inode(struct inode *inode, inode->i_uid = make_kuid(&init_user_ns, attrs->owner); inode->i_gid = make_kgid(&init_user_ns, attrs->group); - inode->i_atime.tv_sec = (time_t) attrs->atime; - inode->i_mtime.tv_sec = (time_t) attrs->mtime; - inode->i_ctime.tv_sec = (time_t) attrs->ctime; + inode->i_atime.tv_sec = (time64_t) attrs->atime; + inode->i_mtime.tv_sec = (time64_t) attrs->mtime; + inode->i_ctime.tv_sec = (time64_t) attrs->ctime; inode->i_atime.tv_nsec = 0; inode->i_mtime.tv_nsec = 0; inode->i_ctime.tv_nsec = 0; @@ -301,16 +301,14 @@ static inline int copy_attributes_from_inode(struct inode *inode, if (iattr->ia_valid & ATTR_ATIME) { attrs->mask |= ORANGEFS_ATTR_SYS_ATIME; if (iattr->ia_valid & ATTR_ATIME_SET) { - attrs->atime = - orangefs_convert_time_field(&iattr->ia_atime); + attrs->atime = (time64_t)iattr->ia_atime.tv_sec; attrs->mask |= ORANGEFS_ATTR_SYS_ATIME_SET; } } if (iattr->ia_valid & ATTR_MTIME) { attrs->mask |= ORANGEFS_ATTR_SYS_MTIME; if (iattr->ia_valid & ATTR_MTIME_SET) { - attrs->mtime = - orangefs_convert_time_field(&iattr->ia_mtime); + attrs->mtime = (time64_t)iattr->ia_mtime.tv_sec; attrs->mask |= ORANGEFS_ATTR_SYS_MTIME_SET; } } From 401898eed7e05b8a898a44b49d5e9a510aebee83 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 26 Feb 2016 13:54:09 +0100 Subject: [PATCH 140/174] orangefs: remove unused 'diff' function orangefs contains a helper function to calculate the difference between two timeval structures. We are trying to remove all instances of timespec from the kernel, and this one is not used at all, so let's remove it now. Signed-off-by: Arnd Bergmann Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-kernel.h | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index b6f52e3fee7f..c045c0b89507 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -662,15 +662,4 @@ static inline void orangefs_i_size_write(struct inode *inode, loff_t i_size) #endif } -static inline unsigned int diff(struct timeval *end, struct timeval *begin) -{ - if (end->tv_usec < begin->tv_usec) { - end->tv_usec += 1000000; - end->tv_sec--; - } - end->tv_sec -= begin->tv_sec; - end->tv_usec -= begin->tv_usec; - return (end->tv_sec * 1000000) + end->tv_usec; -} - #endif /* __ORANGEFSKERNEL_H */ From ca9f518eadeb7edd8e438a6542d3caec9bc3bb74 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Fri, 26 Feb 2016 10:21:12 -0500 Subject: [PATCH 141/174] Orangefs: code sanitation. Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 20 ++++++++-------- fs/orangefs/orangefs-mod.c | 11 +++++---- fs/orangefs/waitqueue.c | 44 ++++++++++++++++++++++++----------- 3 files changed, 46 insertions(+), 29 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index 0db3a57f974d..e3934c06b96a 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -46,6 +46,10 @@ static void orangefs_devreq_add_op(struct orangefs_kernel_op_s *op) list_add_tail(&op->list, &htable_ops_in_progress[index]); } +/* + * find the op with this tag and remove it from the in progress + * hash table. + */ static struct orangefs_kernel_op_s *orangefs_devreq_remove_op(__u64 tag) { struct orangefs_kernel_op_s *op, *next; @@ -190,8 +194,10 @@ restart: return -EAGAIN; } - gossip_debug(GOSSIP_DEV_DEBUG, "orangefs: reading op tag %llu %s\n", - llu(cur_op->tag), get_opname_string(cur_op)); + gossip_debug(GOSSIP_DEV_DEBUG, "%s: reading op tag %llu %s\n", + __func__, + llu(cur_op->tag), + get_opname_string(cur_op)); /* * Such an op should never be on the list in the first place. If so, we @@ -204,6 +210,7 @@ restart: spin_unlock(&orangefs_request_list_lock); return -EAGAIN; } + list_del_init(&cur_op->list); spin_unlock(&orangefs_request_list_lock); @@ -323,6 +330,7 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb, return -EPROTO; } + /* remove the op from the in progress hash table */ op = orangefs_devreq_remove_op(head.tag); if (!op) { gossip_err("WARNING: No one's waiting for tag %llu\n", @@ -486,15 +494,7 @@ static int orangefs_devreq_release(struct inode *inode, struct file *file) gossip_debug(GOSSIP_DEV_DEBUG, "ORANGEFS Device Close: Filesystem(s) %s\n", (unmounted ? "UNMOUNTED" : "MOUNTED")); - /* - * Walk through the list of ops in the request list, mark them - * as purged and wake them up. - */ purge_waiting_ops(); - /* - * Walk through the hash table of in progress operations; mark - * them as purged and wake them up - */ purge_inprogress_ops(); orangefs_bufmap_run_down(); diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c index 965959cb11d1..a4e08dd3e669 100644 --- a/fs/orangefs/orangefs-mod.c +++ b/fs/orangefs/orangefs-mod.c @@ -119,10 +119,10 @@ static int __init orangefs_init(void) if (gossip_debug_mask != 0) kernel_mask_set_mod_init = true; - /* print information message to the system log */ - pr_info("orangefs: orangefs_init called with debug mask: :%s: :%llx:\n", - kernel_debug_string, - (unsigned long long)gossip_debug_mask); + pr_info("%s: called with debug mask: :%s: :%llx:\n", + __func__, + kernel_debug_string, + (unsigned long long)gossip_debug_mask); ret = bdi_init(&orangefs_backing_dev_info); @@ -147,7 +147,8 @@ static int __init orangefs_init(void) /* Initialize the orangefsdev subsystem. */ ret = orangefs_dev_init(); if (ret < 0) { - gossip_err("orangefs: could not initialize device subsystem %d!\n", + gossip_err("%s: could not initialize device subsystem %d!\n", + __func__, ret); goto cleanup_inode; } diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index 1eadf69cc919..edfd921cf6ec 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -75,7 +75,7 @@ retry_servicing: /* * If ORANGEFS_OP_NO_MUTEX was set in flags, we need to avoid - * aquiring the request_mutex because we're servicing a + * acquiring the request_mutex because we're servicing a * high priority remount operation and the request_mutex is * already taken. */ @@ -91,7 +91,8 @@ retry_servicing: if (ret < 0) { op->downcall.status = ret; gossip_debug(GOSSIP_WAIT_DEBUG, - "orangefs: service_operation interrupted.\n"); + "%s: service_operation interrupted.\n", + __func__); return ret; } } @@ -127,9 +128,9 @@ retry_servicing: ret, op); + /* got matching downcall; make sure status is in errno format */ if (!ret) { spin_unlock(&op->lock); - /* got matching downcall; make sure status is in errno format */ op->downcall.status = orangefs_normalize_to_errno(op->downcall.status); ret = op->downcall.status; @@ -144,8 +145,8 @@ retry_servicing: } /* - * remove waiting ops from the request list or - * remove in-progress ops from the in-progress list. + * remove a waiting op from the request list or + * remove an in-progress op from the in-progress list. */ orangefs_clean_up_interrupted_operation(op); @@ -179,6 +180,7 @@ out: return ret; } +/* This can get called on an I/O op if it had a bad service_operation. */ bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op) { u64 tag = op->tag; @@ -206,23 +208,31 @@ bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op) spin_unlock(&op->lock); spin_unlock(&orangefs_request_list_lock); - gossip_debug(GOSSIP_UTILS_DEBUG, + gossip_debug(GOSSIP_WAIT_DEBUG, "Attempting ORANGEFS operation cancellation of tag %llu\n", llu(tag)); return true; } -static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *op) +/* + * Change an op to the "given up" state and remove it from its list. + */ +static void + orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *op) { /* * handle interrupted cases depending on what state we were in when - * the interruption is detected. there is a coarse grained lock - * across the operation. + * the interruption is detected. * * Called with op->lock held. */ + + /* + * List manipulation code elsewhere will ignore ops that + * have been given up upon. + */ op->op_state |= OP_VFS_STATE_GIVEN_UP; - /* from that point on it can't be moved by anybody else */ + if (list_empty(&op->list)) { /* caught copying to/from daemon */ BUG_ON(op_state_serviced(op)); @@ -259,12 +269,12 @@ static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s } /* - * sleeps on waitqueue waiting for matching downcall. - * if client-core finishes servicing, then we are good to go. + * Sleeps on waitqueue waiting for matching downcall. + * If client-core finishes servicing, then we are good to go. * else if client-core exits, we get woken up here, and retry with a timeout * - * Post when this call returns to the caller, the specified op will no - * longer be on any list or htable. + * When this call returns to the caller, the specified op will no + * longer be in either the in_progress hash table or on the request list. * * Returns 0 on success and -errno on failure * Errors are: @@ -281,6 +291,12 @@ static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op, { long n; + /* + * There's a "schedule_timeout" inside of these wait + * primitives, during which the op is out of the hands of the + * user process that needs something done and is being + * manipulated by the client-core process. + */ if (interruptible) n = wait_for_completion_interruptible_timeout(&op->waitq, timeout); From 9f08cfe94417f782393330cbfc95617c04f051c2 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Fri, 26 Feb 2016 14:39:08 -0500 Subject: [PATCH 142/174] Orangefs: update orangefs.txt Al Viro has cleaned up the way ops are processed and waited for, now orangefs.txt has an overview of how it works. Several recent related commits have added to the comments in the code as well. Signed-off-by: Mike Marshall --- Documentation/filesystems/orangefs.txt | 69 +++++++++++++++++++++++--- fs/orangefs/devorangefs-req.c | 4 +- 2 files changed, 63 insertions(+), 10 deletions(-) diff --git a/Documentation/filesystems/orangefs.txt b/Documentation/filesystems/orangefs.txt index 925a53e52097..e1a0056a365f 100644 --- a/Documentation/filesystems/orangefs.txt +++ b/Documentation/filesystems/orangefs.txt @@ -221,18 +221,71 @@ contains the "downcall" which expresses the results of the request. The slab allocator is used to keep a cache of op structures handy. -The life cycle of a typical op goes like this: +At init time the kernel module defines and initializes a request list +and an in_progress hash table to keep track of all the ops that are +in flight at any given time. - - obtain and initialize an op structure from the op_cache. +Ops are stateful: - - queue the op to the pvfs device so that its upcall data can be - read by userspace. + * unknown - op was just initialized + * waiting - op is on request_list (upward bound) + * inprogr - op is in progress (waiting for downcall) + * serviced - op has matching downcall; ok + * purged - op has to start a timer since client-core + exited uncleanly before servicing op + * given up - submitter has given up waiting for it - - wait for userspace to write downcall data back to the pvfs device. +When some arbitrary userspace program needs to perform a +filesystem operation on Orangefs (readdir, I/O, create, whatever) +an op structure is initialized and tagged with a distinguishing ID +number. The upcall part of the op is filled out, and the op is +passed to the "service_operation" function. - - consume the downcall and return the op struct to the op_cache. +Service_operation changes the op's state to "waiting", puts +it on the request list, and signals the Orangefs file_operations.poll +function through a wait queue. Userspace is polling the pseudo-device +and thus becomes aware of the upcall request that needs to be read. -Some ops are atypical with respect to their payloads: readdir and io ops. +When the Orangefs file_operations.read function is triggered, the +request list is searched for an op that seems ready-to-process. +The op is removed from the request list. The tag from the op and +the filled-out upcall struct are copy_to_user'ed back to userspace. + +If any of these (and some additional protocol) copy_to_users fail, +the op's state is set to "waiting" and the op is added back to +the request list. Otherwise, the op's state is changed to "in progress", +and the op is hashed on its tag and put onto the end of a list in the +in_progress hash table at the index the tag hashed to. + +When userspace has assembled the response to the upcall, it +writes the response, which includes the distinguishing tag, back to +the pseudo device in a series of io_vecs. This triggers the Orangefs +file_operations.write_iter function to find the op with the associated +tag and remove it from the in_progress hash table. As long as the op's +state is not "canceled" or "given up", its state is set to "serviced". +The file_operations.write_iter function returns to the waiting vfs, +and back to service_operation through wait_for_matching_downcall. + +Service operation returns to its caller with the op's downcall +part (the response to the upcall) filled out. + +The "client-core" is the bridge between the kernel module and +userspace. The client-core is a daemon. The client-core has an +associated watchdog daemon. If the client-core is ever signaled +to die, the watchdog daemon restarts the client-core. Even though +the client-core is restarted "right away", there is a period of +time during such an event that the client-core is dead. A dead client-core +can't be triggered by the Orangefs file_operations.poll function. +Ops that pass through service_operation during a "dead spell" can timeout +on the wait queue and one attempt is made to recycle them. Obviously, +if the client-core stays dead too long, the arbitrary userspace processes +trying to use Orangefs will be negatively affected. Waiting ops +that can't be serviced will be removed from the request list and +have their states set to "given up". In-progress ops that can't +be serviced will be removed from the in_progress hash table and +have their states set to "given up". + +Readdir and I/O ops are atypical with respect to their payloads. - readdir ops use the smaller of the two pre-allocated pre-partitioned memory buffers. The readdir buffer is only available to userspace. @@ -311,7 +364,7 @@ particular response. jamb everything needed to represent a pvfs2_readdir_response_t into the readdir buffer descriptor specified in the upcall. -writev() on /dev/pvfs2-req is used to pass responses to the requests +Userspace uses writev() on /dev/pvfs2-req to pass responses to the requests made by the kernel side. A buffer_list containing: diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index e3934c06b96a..d50f89ea302e 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -404,8 +404,8 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb, wakeup: /* - * tell the vfs op waiting on a waitqueue - * that this op is done + * Return to vfs waitqueue, and back to service_operation + * through wait_for_matching_downcall. */ spin_lock(&op->lock); if (unlikely(op_is_cancel(op))) { From 9d9e7ba9ee8f304c4608f3c81aa5e7fb3bddd251 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Thu, 3 Mar 2016 13:46:48 -0500 Subject: [PATCH 143/174] Orangefs: improve gossip statements Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 18 ++++++++++++++++++ fs/orangefs/file.c | 8 ++------ fs/orangefs/orangefs-mod.c | 11 ++++++----- fs/orangefs/waitqueue.c | 33 +++++++++++++++++++++++---------- 4 files changed, 49 insertions(+), 21 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index d50f89ea302e..0f9a12ac7458 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -245,6 +245,12 @@ restart: * it has been sent to the client. */ set_op_state_inprogress(cur_op); + gossip_debug(GOSSIP_DEV_DEBUG, + "%s: 1 op:%s: op_state:%d: process:%s:\n", + __func__, + get_opname_string(cur_op), + cur_op->op_state, + current->comm); orangefs_devreq_add_op(cur_op); spin_unlock(&cur_op->lock); spin_unlock(&htable_ops_in_progress_lock); @@ -262,6 +268,12 @@ error: spin_lock(&cur_op->lock); if (likely(!op_state_given_up(cur_op))) { set_op_state_waiting(cur_op); + gossip_debug(GOSSIP_DEV_DEBUG, + "%s: 2 op:%s: op_state:%d: process:%s:\n", + __func__, + get_opname_string(cur_op), + cur_op->op_state, + current->comm); list_add(&cur_op->list, &orangefs_request_list); spin_unlock(&cur_op->lock); } else { @@ -416,6 +428,12 @@ wakeup: complete(&op->waitq); } else { set_op_state_serviced(op); + gossip_debug(GOSSIP_DEV_DEBUG, + "%s: op:%s: op_state:%d: process:%s:\n", + __func__, + get_opname_string(op), + op->op_state, + current->comm); spin_unlock(&op->lock); } return ret; diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 399d5288dc1a..6f2e0f745c5d 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -214,18 +214,14 @@ populate_shared_memory: goto out; } gossip_debug(GOSSIP_FILE_DEBUG, - "%s(%pU): Amount written as returned by the sys-io call:%d\n", + "%s(%pU): Amount %s, returned by the sys-io call:%d\n", __func__, handle, + type == ORANGEFS_IO_READ ? "read" : "written", (int)new_op->downcall.resp.io.amt_complete); ret = new_op->downcall.resp.io.amt_complete; - /* - * tell the device file owner waiting on I/O that this read has - * completed and it can return now. - */ - out: if (buffer_index >= 0) { orangefs_bufmap_put(buffer_index); diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c index a4e08dd3e669..91a4293d1cd7 100644 --- a/fs/orangefs/orangefs-mod.c +++ b/fs/orangefs/orangefs-mod.c @@ -261,12 +261,13 @@ void purge_inprogress_ops(void) next, &htable_ops_in_progress[i], list) { - gossip_debug(GOSSIP_INIT_DEBUG, - "pvfs2-client-core: purging in-progress op tag " - "%llu %s\n", - llu(op->tag), - get_opname_string(op)); set_op_state_purged(op); + gossip_debug(GOSSIP_DEV_DEBUG, + "%s: op:%s: op_state:%d: process:%s:\n", + __func__, + get_opname_string(op), + op->op_state, + current->comm); } spin_unlock(&htable_ops_in_progress_lock); } diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index edfd921cf6ec..31635bc303fe 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -37,6 +37,12 @@ void purge_waiting_ops(void) llu(op->tag), get_opname_string(op)); set_op_state_purged(op); + gossip_debug(GOSSIP_DEV_DEBUG, + "%s: op:%s: op_state:%d: process:%s:\n", + __func__, + get_opname_string(op), + op->op_state, + current->comm); } spin_unlock(&orangefs_request_list_lock); } @@ -101,6 +107,12 @@ retry_servicing: spin_lock(&orangefs_request_list_lock); spin_lock(&op->lock); set_op_state_waiting(op); + gossip_debug(GOSSIP_DEV_DEBUG, + "%s: op:%s: op_state:%d: process:%s:\n", + __func__, + get_opname_string(op), + op->op_state, + current->comm); /* add high priority remount op to the front of the line. */ if (flags & ORANGEFS_OP_PRIORITY) list_add(&op->list, &orangefs_request_list); @@ -173,7 +185,8 @@ retry_servicing: out: gossip_debug(GOSSIP_WAIT_DEBUG, - "orangefs: service_operation %s returning: %d for %p.\n", + "%s: %s returning: %d for %p.\n", + __func__, op_name, ret, op); @@ -204,6 +217,12 @@ bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op) } spin_lock(&op->lock); set_op_state_waiting(op); + gossip_debug(GOSSIP_DEV_DEBUG, + "%s: op:%s: op_state:%d: process:%s:\n", + __func__, + get_opname_string(op), + op->op_state, + current->comm); list_add(&op->list, &orangefs_request_list); spin_unlock(&op->lock); spin_unlock(&orangefs_request_list_lock); @@ -310,9 +329,7 @@ static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op, if (unlikely(n < 0)) { gossip_debug(GOSSIP_WAIT_DEBUG, - "*** %s:" - " operation interrupted by a signal (tag " - "%llu, op %p)\n", + "%s: operation interrupted, tag %llu, %p\n", __func__, llu(op->tag), op); @@ -320,9 +337,7 @@ static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op, } if (op_state_purged(op)) { gossip_debug(GOSSIP_WAIT_DEBUG, - "*** %s:" - " operation purged (tag " - "%llu, %p, att %d)\n", + "%s: operation purged, tag %llu, %p, %d\n", __func__, llu(op->tag), op, @@ -333,9 +348,7 @@ static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op, } /* must have timed out, then... */ gossip_debug(GOSSIP_WAIT_DEBUG, - "*** %s:" - " operation timed out (tag" - " %llu, %p, att %d)\n", + "%s: operation timed out, tag %llu, %p, %d)\n", __func__, llu(op->tag), op, From cf07c0bf88b7e8765361e808b61fef287caedfe3 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Wed, 9 Mar 2016 13:11:45 -0500 Subject: [PATCH 144/174] Orangefs: add a new gossip statement Signed-off-by: Mike Marshall --- fs/orangefs/dir.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/orangefs/dir.c b/fs/orangefs/dir.c index 43e3aeb529ea..259b667f6c8f 100644 --- a/fs/orangefs/dir.c +++ b/fs/orangefs/dir.c @@ -342,6 +342,10 @@ get_new_buffer_index: if (ctx->pos == ORANGEFS_ITERATE_NEXT) ctx->pos = 0; + gossip_debug(GOSSIP_DIR_DEBUG, + "%s: dirent_outcount:%d:\n", + __func__, + rhandle.readdir_response.orangefs_dirent_outcount); for (i = ctx->pos; i < rhandle.readdir_response.orangefs_dirent_outcount; i++) { From 162ada7764162eb2eb0a02546f820ca8b099cdea Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Wed, 9 Mar 2016 13:12:37 -0500 Subject: [PATCH 145/174] Orangefs: improve the POSIXness of interrupted writes... Don't return EINTR on interrupted writes if some data has already been written. Signed-off-by: Mike Marshall --- fs/orangefs/file.c | 54 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 45 insertions(+), 9 deletions(-) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 6f2e0f745c5d..9b561b7894b3 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -180,21 +180,57 @@ populate_shared_memory: } if (ret < 0) { - /* - * don't write an error to syslog on signaled operation - * termination unless we've got debugging turned on, as - * this can happen regularly (i.e. ctrl-c) - */ - if (ret == -EINTR) + if (ret == -EINTR) { + /* + * We can't return EINTR if any data was written, + * it's not POSIX. It is minimally acceptable + * to give a partial write, the way NFS does. + * + * It would be optimal to return all or nothing, + * but if a userspace write is bigger than + * an IO buffer, and the interrupt occurs + * between buffer writes, that would not be + * possible. + */ + switch (new_op->op_state - OP_VFS_STATE_GIVEN_UP) { + /* + * If the op was waiting when the interrupt + * occurred, then the client-core did not + * trigger the write. + */ + case OP_VFS_STATE_WAITING: + if (*offset == 0) + ret = -EINTR; + else + ret = 0; + break; + /* + * If the op was in progress when the interrupt + * occurred, then the client-core was able to + * trigger the write. + */ + case OP_VFS_STATE_INPROGR: + ret = total_size; + break; + default: + gossip_err("%s: unexpected op state :%d:.\n", + __func__, + new_op->op_state); + ret = 0; + break; + } gossip_debug(GOSSIP_FILE_DEBUG, - "%s: returning error %ld\n", __func__, - (long)ret); - else + "%s: got EINTR, state:%d: %p\n", + __func__, + new_op->op_state, + new_op); + } else { gossip_err("%s: error in %s handle %pU, returning %zd\n", __func__, type == ORANGEFS_IO_READ ? "read from" : "write to", handle, ret); + } if (orangefs_cancel_op_in_progress(new_op)) return ret; From c62da5853de5564e367932185500f96ab70a6f7c Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Mon, 29 Feb 2016 16:07:35 -0500 Subject: [PATCH 146/174] orangefs: Avoid symlink upcall if target is too long. Previously the client-core detected this condition by sheer luck! Since we used strncpy, no NUL byte would be included on the name. The client-core would call strlen, which would read past the end of its buffer, but return a number large enough that the client-core would return ENAMETOOLONG. Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/namei.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index 650ff299738b..5a60c508af4e 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -269,6 +269,9 @@ static int orangefs_symlink(struct inode *dir, if (!symname) return -EINVAL; + if (strlen(symname)+1 > ORANGEFS_NAME_MAX) + return -ENAMETOOLONG; + new_op = op_alloc(ORANGEFS_VFS_OP_SYMLINK); if (!new_op) return -ENOMEM; From acfcbaf1925f2dc5c46c61de69d756dec92a2ff8 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Sat, 5 Mar 2016 13:17:39 -0500 Subject: [PATCH 147/174] orangefs: make fs_mount_pending static Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 76 +++++++++++++++++------------------ fs/orangefs/orangefs-kernel.h | 1 - 2 files changed, 38 insertions(+), 39 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index 0f9a12ac7458..12ea8730aa5d 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -74,6 +74,44 @@ static struct orangefs_kernel_op_s *orangefs_devreq_remove_op(__u64 tag) return NULL; } +/* Returns whether any FS are still pending remounted */ +static int mark_all_pending_mounts(void) +{ + int unmounted = 1; + struct orangefs_sb_info_s *orangefs_sb = NULL; + + spin_lock(&orangefs_superblocks_lock); + list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) { + /* All of these file system require a remount */ + orangefs_sb->mount_pending = 1; + unmounted = 0; + } + spin_unlock(&orangefs_superblocks_lock); + return unmounted; +} + +/* + * Determine if a given file system needs to be remounted or not + * Returns -1 on error + * 0 if already mounted + * 1 if needs remount + */ +static int fs_mount_pending(__s32 fsid) +{ + int mount_pending = -1; + struct orangefs_sb_info_s *orangefs_sb = NULL; + + spin_lock(&orangefs_superblocks_lock); + list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) { + if (orangefs_sb->fs_id == fsid) { + mount_pending = orangefs_sb->mount_pending; + break; + } + } + spin_unlock(&orangefs_superblocks_lock); + return mount_pending; +} + static int orangefs_devreq_open(struct inode *inode, struct file *file) { int ret = -EINVAL; @@ -449,44 +487,6 @@ Enomem: goto wakeup; } -/* Returns whether any FS are still pending remounted */ -static int mark_all_pending_mounts(void) -{ - int unmounted = 1; - struct orangefs_sb_info_s *orangefs_sb = NULL; - - spin_lock(&orangefs_superblocks_lock); - list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) { - /* All of these file system require a remount */ - orangefs_sb->mount_pending = 1; - unmounted = 0; - } - spin_unlock(&orangefs_superblocks_lock); - return unmounted; -} - -/* - * Determine if a given file system needs to be remounted or not - * Returns -1 on error - * 0 if already mounted - * 1 if needs remount - */ -int fs_mount_pending(__s32 fsid) -{ - int mount_pending = -1; - struct orangefs_sb_info_s *orangefs_sb = NULL; - - spin_lock(&orangefs_superblocks_lock); - list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) { - if (orangefs_sb->fs_id == fsid) { - mount_pending = orangefs_sb->mount_pending; - break; - } - } - spin_unlock(&orangefs_superblocks_lock); - return mount_pending; -} - /* * NOTE: gets called when the last reference to this device is dropped. * Using the open_access_count variable, we enforce a reference count diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index c045c0b89507..045e493ab033 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -523,7 +523,6 @@ int orangefs_dev_init(void); void orangefs_dev_cleanup(void); int is_daemon_in_service(void); bool __is_daemon_in_service(void); -int fs_mount_pending(__s32 fsid); /* * defined in orangefs-utils.c From 53f57fef43f5b9586c7a78acdeae27e206eae48b Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Mon, 14 Mar 2016 15:28:34 -0400 Subject: [PATCH 148/174] Orangefs: Extra sanity insurance on buffer before using string functions on it. Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index 12ea8730aa5d..35418d0b77bf 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -678,6 +678,19 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg) ret = copy_from_user(&client_debug_array_string, (void __user *)arg, ORANGEFS_MAX_DEBUG_STRING_LEN); + /* + * The real client-core makes an effort to ensure + * that actual strings that aren't too long to fit in + * this buffer is what we get here. We're going to use + * string functions on the stuff we got, so we'll make + * this extra effort to try and keep from + * flowing out of this buffer when we use the string + * functions, even if somehow the stuff we end up + * with here is garbage. + */ + client_debug_array_string[ORANGEFS_MAX_DEBUG_STRING_LEN - 1] = + '\0'; + if (ret != 0) { pr_info("%s: CLIENT_STRING: copy_from_user failed\n", __func__); From a7d3e78ab53ff479fee3ad5a674a74c54c337b3b Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Mon, 14 Mar 2016 15:30:03 -0400 Subject: [PATCH 149/174] Orangefs: follow_link -> get_link change Signed-off-by: Mike Marshall --- fs/orangefs/inode.c | 8 +++----- fs/orangefs/symlink.c | 15 +-------------- 2 files changed, 4 insertions(+), 19 deletions(-) diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 4e923ece1e09..4a350ec2dbb3 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -268,17 +268,15 @@ int orangefs_getattr(struct vfsmount *mnt, "orangefs_getattr: called on %s\n", dentry->d_name.name); - /* - * Similar to the above comment, a getattr also expects that all - * fields/attributes of the inode would be refreshed. So again, we - * dont have too much of a choice but refresh all the attributes. - */ ret = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT, 0); if (ret == 0) { generic_fillattr(inode, kstat); + /* override block size reported to stat */ orangefs_inode = ORANGEFS_I(inode); kstat->blksize = orangefs_inode->blksize; + + inode->i_link = ORANGEFS_I(dentry->d_inode)->link_target; } else { /* assume an I/O error and flag inode as bad */ gossip_debug(GOSSIP_INODE_DEBUG, diff --git a/fs/orangefs/symlink.c b/fs/orangefs/symlink.c index 2b8541a7fc43..6418dd638680 100644 --- a/fs/orangefs/symlink.c +++ b/fs/orangefs/symlink.c @@ -8,22 +8,9 @@ #include "orangefs-kernel.h" #include "orangefs-bufmap.h" -static const char *orangefs_follow_link(struct dentry *dentry, void **cookie) -{ - char *target = ORANGEFS_I(dentry->d_inode)->link_target; - - gossip_debug(GOSSIP_INODE_DEBUG, - "%s: called on %s (target is %p)\n", - __func__, (char *)dentry->d_name.name, target); - - *cookie = target; - - return target; -} - struct inode_operations orangefs_symlink_inode_operations = { .readlink = generic_readlink, - .follow_link = orangefs_follow_link, + .get_link = simple_get_link, .setattr = orangefs_setattr, .getattr = orangefs_getattr, .listxattr = orangefs_listxattr, From 2180c52cc72993b3b097573aaa550f273f795c8a Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Mon, 14 Mar 2016 15:30:39 -0400 Subject: [PATCH 150/174] Orangefs: fix sloppy cleanups of debugfs and sysfs init failures. Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-debugfs.c | 20 ++++---- fs/orangefs/orangefs-mod.c | 29 ++++++++--- fs/orangefs/orangefs-sysfs.c | 89 +++++++++++++++++----------------- 3 files changed, 76 insertions(+), 62 deletions(-) diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c index 9eb7972ae10d..19670b8b4053 100644 --- a/fs/orangefs/orangefs-debugfs.c +++ b/fs/orangefs/orangefs-debugfs.c @@ -101,30 +101,33 @@ int orangefs_debugfs_init(void) int rc = -ENOMEM; debug_dir = debugfs_create_dir("orangefs", NULL); - if (!debug_dir) + if (!debug_dir) { + pr_info("%s: debugfs_create_dir failed.\n", __func__); goto out; + } help_file_dentry = debugfs_create_file(ORANGEFS_KMOD_DEBUG_HELP_FILE, 0444, debug_dir, debug_help_string, &debug_help_fops); - if (!help_file_dentry) + if (!help_file_dentry) { + pr_info("%s: debugfs_create_file failed.\n", __func__); goto out; + } orangefs_debug_disabled = 0; rc = 0; out: - if (rc) - orangefs_debugfs_cleanup(); return rc; } void orangefs_debugfs_cleanup(void) { - debugfs_remove_recursive(debug_dir); + if (debug_dir) + debugfs_remove_recursive(debug_dir); } /* open ORANGEFS_KMOD_DEBUG_HELP_FILE */ @@ -198,7 +201,6 @@ static int help_show(struct seq_file *m, void *v) */ int orangefs_kernel_debug_init(void) { - int rc = -ENOMEM; struct dentry *ret; char *k_buffer = NULL; @@ -232,8 +234,6 @@ int orangefs_kernel_debug_init(void) rc = 0; out: - if (rc) - orangefs_debugfs_cleanup(); gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: rc:%d:\n", __func__, rc); return rc; @@ -268,7 +268,7 @@ int orangefs_client_debug_init(void) c_buffer, &kernel_debug_fops); if (!client_debug_dentry) { - pr_info("%s: failed to create %s.\n", + pr_info("%s: failed to create updated %s.\n", __func__, ORANGEFS_CLIENT_DEBUG_FILE); goto out; @@ -277,8 +277,6 @@ int orangefs_client_debug_init(void) rc = 0; out: - if (rc) - orangefs_debugfs_cleanup(); gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: rc:%d:\n", __func__, rc); return rc; diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c index 91a4293d1cd7..abc41fa2d2c4 100644 --- a/fs/orangefs/orangefs-mod.c +++ b/fs/orangefs/orangefs-mod.c @@ -185,22 +185,39 @@ static int __init orangefs_init(void) */ ret = orangefs_prepare_debugfs_help_string(1); if (ret) - goto out; + goto prepare_helpstring_failed; - orangefs_debugfs_init(); - orangefs_kernel_debug_init(); - orangefs_sysfs_init(); + ret = orangefs_debugfs_init(); + if (ret) + goto debugfs_init_failed; + + ret = orangefs_kernel_debug_init(); + if (ret) + goto kernel_debug_init_failed; + + ret = orangefs_sysfs_init(); + if (ret) + goto sysfs_init_failed; ret = register_filesystem(&orangefs_fs_type); if (ret == 0) { pr_info("orangefs: module version %s loaded\n", ORANGEFS_VERSION); - return 0; + ret = 0; + goto out; } - orangefs_debugfs_cleanup(); orangefs_sysfs_exit(); fsid_key_table_finalize(); +sysfs_init_failed: + +kernel_debug_init_failed: + +debugfs_init_failed: + orangefs_debugfs_cleanup(); + +prepare_helpstring_failed: + cleanup_progress_table: kfree(htable_ops_in_progress); diff --git a/fs/orangefs/orangefs-sysfs.c b/fs/orangefs/orangefs-sysfs.c index 83f4053bd11b..5c03113e3ad2 100644 --- a/fs/orangefs/orangefs-sysfs.c +++ b/fs/orangefs/orangefs-sysfs.c @@ -1611,27 +1611,22 @@ static struct stats_orangefs_obj *stats_orangefs_obj; int orangefs_sysfs_init(void) { - int rc; + int rc = -EINVAL; gossip_debug(GOSSIP_SYSFS_DEBUG, "orangefs_sysfs_init: start\n"); /* create /sys/fs/orangefs. */ orangefs_obj = kzalloc(sizeof(*orangefs_obj), GFP_KERNEL); - if (!orangefs_obj) { - rc = -EINVAL; + if (!orangefs_obj) goto out; - } rc = kobject_init_and_add(&orangefs_obj->kobj, &orangefs_ktype, fs_kobj, ORANGEFS_KOBJ_ID); - if (rc) { - kobject_put(&orangefs_obj->kobj); - rc = -EINVAL; - goto out; - } + if (rc) + goto ofs_obj_bail; kobject_uevent(&orangefs_obj->kobj, KOBJ_ADD); @@ -1639,7 +1634,7 @@ int orangefs_sysfs_init(void) acache_orangefs_obj = kzalloc(sizeof(*acache_orangefs_obj), GFP_KERNEL); if (!acache_orangefs_obj) { rc = -EINVAL; - goto out; + goto ofs_obj_bail; } rc = kobject_init_and_add(&acache_orangefs_obj->kobj, @@ -1647,11 +1642,8 @@ int orangefs_sysfs_init(void) &orangefs_obj->kobj, ACACHE_KOBJ_ID); - if (rc) { - kobject_put(&acache_orangefs_obj->kobj); - rc = -EINVAL; - goto out; - } + if (rc) + goto acache_obj_bail; kobject_uevent(&acache_orangefs_obj->kobj, KOBJ_ADD); @@ -1660,18 +1652,15 @@ int orangefs_sysfs_init(void) kzalloc(sizeof(*capcache_orangefs_obj), GFP_KERNEL); if (!capcache_orangefs_obj) { rc = -EINVAL; - goto out; + goto acache_obj_bail; } rc = kobject_init_and_add(&capcache_orangefs_obj->kobj, &capcache_orangefs_ktype, &orangefs_obj->kobj, CAPCACHE_KOBJ_ID); - if (rc) { - kobject_put(&capcache_orangefs_obj->kobj); - rc = -EINVAL; - goto out; - } + if (rc) + goto capcache_obj_bail; kobject_uevent(&capcache_orangefs_obj->kobj, KOBJ_ADD); @@ -1680,18 +1669,15 @@ int orangefs_sysfs_init(void) kzalloc(sizeof(*ccache_orangefs_obj), GFP_KERNEL); if (!ccache_orangefs_obj) { rc = -EINVAL; - goto out; + goto capcache_obj_bail; } rc = kobject_init_and_add(&ccache_orangefs_obj->kobj, &ccache_orangefs_ktype, &orangefs_obj->kobj, CCACHE_KOBJ_ID); - if (rc) { - kobject_put(&ccache_orangefs_obj->kobj); - rc = -EINVAL; - goto out; - } + if (rc) + goto ccache_obj_bail; kobject_uevent(&ccache_orangefs_obj->kobj, KOBJ_ADD); @@ -1699,7 +1685,7 @@ int orangefs_sysfs_init(void) ncache_orangefs_obj = kzalloc(sizeof(*ncache_orangefs_obj), GFP_KERNEL); if (!ncache_orangefs_obj) { rc = -EINVAL; - goto out; + goto ccache_obj_bail; } rc = kobject_init_and_add(&ncache_orangefs_obj->kobj, @@ -1707,11 +1693,8 @@ int orangefs_sysfs_init(void) &orangefs_obj->kobj, NCACHE_KOBJ_ID); - if (rc) { - kobject_put(&ncache_orangefs_obj->kobj); - rc = -EINVAL; - goto out; - } + if (rc) + goto ncache_obj_bail; kobject_uevent(&ncache_orangefs_obj->kobj, KOBJ_ADD); @@ -1719,7 +1702,7 @@ int orangefs_sysfs_init(void) pc_orangefs_obj = kzalloc(sizeof(*pc_orangefs_obj), GFP_KERNEL); if (!pc_orangefs_obj) { rc = -EINVAL; - goto out; + goto ncache_obj_bail; } rc = kobject_init_and_add(&pc_orangefs_obj->kobj, @@ -1727,11 +1710,8 @@ int orangefs_sysfs_init(void) &orangefs_obj->kobj, "perf_counters"); - if (rc) { - kobject_put(&pc_orangefs_obj->kobj); - rc = -EINVAL; - goto out; - } + if (rc) + goto pc_obj_bail; kobject_uevent(&pc_orangefs_obj->kobj, KOBJ_ADD); @@ -1739,7 +1719,7 @@ int orangefs_sysfs_init(void) stats_orangefs_obj = kzalloc(sizeof(*stats_orangefs_obj), GFP_KERNEL); if (!stats_orangefs_obj) { rc = -EINVAL; - goto out; + goto pc_obj_bail; } rc = kobject_init_and_add(&stats_orangefs_obj->kobj, @@ -1747,13 +1727,32 @@ int orangefs_sysfs_init(void) &orangefs_obj->kobj, STATS_KOBJ_ID); - if (rc) { - kobject_put(&stats_orangefs_obj->kobj); - rc = -EINVAL; - goto out; - } + if (rc) + goto stats_obj_bail; kobject_uevent(&stats_orangefs_obj->kobj, KOBJ_ADD); + goto out; + +stats_obj_bail: + kobject_put(&stats_orangefs_obj->kobj); + +pc_obj_bail: + kobject_put(&pc_orangefs_obj->kobj); + +ncache_obj_bail: + kobject_put(&ncache_orangefs_obj->kobj); + +ccache_obj_bail: + kobject_put(&ccache_orangefs_obj->kobj); + +capcache_obj_bail: + kobject_put(&capcache_orangefs_obj->kobj); + +acache_obj_bail: + kobject_put(&acache_orangefs_obj->kobj); + +ofs_obj_bail: + kobject_put(&orangefs_obj->kobj); out: return rc; } From 1a0ce16d713679be86a023f7bd0f9c54f2c07e8a Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Thu, 17 Mar 2016 13:24:34 -0400 Subject: [PATCH 151/174] Orangefs: adjust unwind on module init failure. Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-mod.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c index abc41fa2d2c4..76a1b9765e23 100644 --- a/fs/orangefs/orangefs-mod.c +++ b/fs/orangefs/orangefs-mod.c @@ -181,11 +181,10 @@ static int __init orangefs_init(void) * and passes along the needed info. The argument signifies * which time orangefs_prepare_debugfs_help_string is being * called. - * */ ret = orangefs_prepare_debugfs_help_string(1); if (ret) - goto prepare_helpstring_failed; + goto cleanup_key_table; ret = orangefs_debugfs_init(); if (ret) @@ -207,7 +206,6 @@ static int __init orangefs_init(void) } orangefs_sysfs_exit(); - fsid_key_table_finalize(); sysfs_init_failed: @@ -216,7 +214,8 @@ kernel_debug_init_failed: debugfs_init_failed: orangefs_debugfs_cleanup(); -prepare_helpstring_failed: +cleanup_key_table: + fsid_key_table_finalize(); cleanup_progress_table: kfree(htable_ops_in_progress); From 5e06664f29c92c8e6b007cdec1f3abf197bc1961 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Wed, 16 Mar 2016 13:54:48 -0400 Subject: [PATCH 152/174] orangefs: remove unused reference to xattr key length Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/xattr.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/orangefs/xattr.c b/fs/orangefs/xattr.c index 8e9ccf971486..75a7dde8cc5f 100644 --- a/fs/orangefs/xattr.c +++ b/fs/orangefs/xattr.c @@ -344,7 +344,6 @@ ssize_t orangefs_listxattr(struct dentry *dentry, char *buffer, size_t size) __u64 token = ORANGEFS_ITERATE_START; ssize_t ret = -ENOMEM; ssize_t total = 0; - ssize_t length = 0; int count_keys = 0; int key_size; int i = 0; @@ -389,10 +388,6 @@ try_again: goto done; } - length = new_op->downcall.resp.listxattr.keylen; - if (length == 0) - goto done; - returned_count = new_op->downcall.resp.listxattr.returned_count; if (returned_count < 0 || returned_count >= ORANGEFS_MAX_XATTR_LISTLEN) { From 02a5cc537dfa222583b6b6c17451a67816fce9f5 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Wed, 16 Mar 2016 14:01:43 -0400 Subject: [PATCH 153/174] orangefs: sanitize listxattr and return EIO on impossible values Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/xattr.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/orangefs/xattr.c b/fs/orangefs/xattr.c index 75a7dde8cc5f..ef5da7538cd5 100644 --- a/fs/orangefs/xattr.c +++ b/fs/orangefs/xattr.c @@ -394,6 +394,7 @@ try_again: gossip_err("%s: impossible value for returned_count:%d:\n", __func__, returned_count); + ret = -EIO; goto done; } @@ -401,6 +402,15 @@ try_again: * Check to see how much can be fit in the buffer. Fit only whole keys. */ for (i = 0; i < returned_count; i++) { + if (new_op->downcall.resp.listxattr.lengths[i] < 0 || + new_op->downcall.resp.listxattr.lengths[i] > + ORANGEFS_MAX_XATTR_NAMELEN) { + gossip_err("%s: impossible value for lengths[%d]\n", + __func__, + new_op->downcall.resp.listxattr.lengths[i]); + ret = -EIO; + goto done; + } if (total + new_op->downcall.resp.listxattr.lengths[i] > size) goto done; From a4c680a027f52e179175fe451c69a55aba748efd Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Wed, 16 Mar 2016 14:35:21 -0400 Subject: [PATCH 154/174] orangefs: remove paranoia in orangefs_set_inode Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/inode.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 4a350ec2dbb3..a45625240b17 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -365,16 +365,8 @@ static inline ino_t orangefs_handle_hash(struct orangefs_object_kref *ref) static int orangefs_set_inode(struct inode *inode, void *data) { struct orangefs_object_kref *ref = (struct orangefs_object_kref *) data; - struct orangefs_inode_s *orangefs_inode = NULL; - - /* Make sure that we have sane parameters */ - if (!data || !inode) - return 0; - orangefs_inode = ORANGEFS_I(inode); - if (!orangefs_inode) - return 0; - orangefs_inode->refn.fs_id = ref->fs_id; - orangefs_inode->refn.khandle = ref->khandle; + ORANGEFS_I(inode)->refn.fs_id = ref->fs_id; + ORANGEFS_I(inode)->refn.khandle = ref->khandle; return 0; } From 2f83ace37181e445cab83d1d2a3a1dc88a36a814 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Thu, 17 Mar 2016 13:20:35 -0400 Subject: [PATCH 155/174] orangefs: put register_chrdev immediately before register_filesystem Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-mod.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c index 76a1b9765e23..6f072a8c0de1 100644 --- a/fs/orangefs/orangefs-mod.c +++ b/fs/orangefs/orangefs-mod.c @@ -144,21 +144,12 @@ static int __init orangefs_init(void) if (ret < 0) goto cleanup_op; - /* Initialize the orangefsdev subsystem. */ - ret = orangefs_dev_init(); - if (ret < 0) { - gossip_err("%s: could not initialize device subsystem %d!\n", - __func__, - ret); - goto cleanup_inode; - } - htable_ops_in_progress = kcalloc(hash_table_size, sizeof(struct list_head), GFP_KERNEL); if (!htable_ops_in_progress) { gossip_err("Failed to initialize op hashtable"); ret = -ENOMEM; - goto cleanup_device; + goto cleanup_inode; } /* initialize a doubly linked at each hash table index */ @@ -198,6 +189,15 @@ static int __init orangefs_init(void) if (ret) goto sysfs_init_failed; + /* Initialize the orangefsdev subsystem. */ + ret = orangefs_dev_init(); + if (ret < 0) { + gossip_err("%s: could not initialize device subsystem %d!\n", + __func__, + ret); + goto cleanup_device; + } + ret = register_filesystem(&orangefs_fs_type); if (ret == 0) { pr_info("orangefs: module version %s loaded\n", ORANGEFS_VERSION); @@ -207,6 +207,9 @@ static int __init orangefs_init(void) orangefs_sysfs_exit(); +cleanup_device: + orangefs_dev_cleanup(); + sysfs_init_failed: kernel_debug_init_failed: @@ -220,9 +223,6 @@ cleanup_key_table: cleanup_progress_table: kfree(htable_ops_in_progress); -cleanup_device: - orangefs_dev_cleanup(); - cleanup_inode: orangefs_inode_cache_finalize(); From d57521a653e7ac2e8edaabfff202caf87c61f08a Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Mon, 14 Mar 2016 16:59:38 -0400 Subject: [PATCH 156/174] orangefs: remove inode->i_lock wrapper Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-kernel.h | 3 --- fs/orangefs/orangefs-utils.c | 8 ++++---- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 045e493ab033..784629870717 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -632,9 +632,6 @@ do { \ spin_unlock(&orangefs_superblocks_lock); \ } while (0) -#define orangefs_lock_inode(inode) spin_lock(&inode->i_lock) -#define orangefs_unlock_inode(inode) spin_unlock(&inode->i_lock) - #define fill_default_sys_attrs(sys_attr, type, mode) \ do { \ sys_attr.owner = from_kuid(current_user_ns(), current_fsuid()); \ diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c index 8ef9e9646748..df4ad04407e2 100644 --- a/fs/orangefs/orangefs-utils.c +++ b/fs/orangefs/orangefs-utils.c @@ -170,11 +170,11 @@ static int copy_attributes_to_inode(struct inode *inode, rounded_up_size = (inode_size + (4096 - (inode_size % 4096))); - orangefs_lock_inode(inode); + spin_lock(&inode->i_lock); inode->i_bytes = inode_size; inode->i_blocks = (unsigned long)(rounded_up_size / 512); - orangefs_unlock_inode(inode); + spin_unlock(&inode->i_lock); /* * NOTE: make sure all the places we're called @@ -194,9 +194,9 @@ static int copy_attributes_to_inode(struct inode *inode, default: inode->i_size = PAGE_CACHE_SIZE; - orangefs_lock_inode(inode); + spin_lock(&inode->i_lock); inode_set_bytes(inode, inode->i_size); - orangefs_unlock_inode(inode); + spin_unlock(&inode->i_lock); break; } From 3c9cf98d7b4f27e4303ea6e67db7f0c343a575b6 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Tue, 15 Mar 2016 11:28:20 -0400 Subject: [PATCH 157/174] orangefs: rename orangefs_inode_getattr to orangefs_inode_old_getattr This is motivated by orangefs_inode_old_getattr's habit of writing over live inodes. Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/dcache.c | 2 +- fs/orangefs/file.c | 7 +- fs/orangefs/inode.c | 9 +-- fs/orangefs/orangefs-kernel.h | 4 +- fs/orangefs/orangefs-utils.c | 121 +++++++++++++++++++++++++++++++++- 5 files changed, 133 insertions(+), 10 deletions(-) diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c index 2de92b71d861..cc5487a1d040 100644 --- a/fs/orangefs/dcache.c +++ b/fs/orangefs/dcache.c @@ -117,7 +117,7 @@ static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags) /* Now we must perform a getattr to validate the inode contents. */ - ret = orangefs_inode_getattr(dentry->d_inode, + ret = orangefs_inode_old_getattr(dentry->d_inode, ORANGEFS_ATTR_SYS_TYPE|ORANGEFS_ATTR_SYS_LNK_TARGET, 1); if (ret < 0) { gossip_debug(GOSSIP_DCACHE_DEBUG, "%s:%s:%d getattr failure.\n", diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 9b561b7894b3..3aff671534d0 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -455,10 +455,10 @@ static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *ite /* Make sure generic_write_checks sees an up to date inode size. */ if (file->f_flags & O_APPEND) { - rc = orangefs_inode_getattr(file->f_mapping->host, + rc = orangefs_inode_old_getattr(file->f_mapping->host, ORANGEFS_ATTR_SYS_SIZE, 0); if (rc) { - gossip_err("%s: orangefs_inode_getattr failed, rc:%zd:.\n", + gossip_err("%s: orangefs_inode_old_getattr failed, rc:%zd:.\n", __func__, rc); goto out; } @@ -670,7 +670,8 @@ static loff_t orangefs_file_llseek(struct file *file, loff_t offset, int origin) * NOTE: We are only interested in file size here, * so we set mask accordingly. */ - ret = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_SIZE, 0); + ret = orangefs_inode_old_getattr(inode, + ORANGEFS_ATTR_SYS_SIZE, 0); if (ret) { gossip_debug(GOSSIP_FILE_DEBUG, "%s:%s:%d calling make bad inode\n", diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index a45625240b17..fd591d44a97d 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -268,7 +268,8 @@ int orangefs_getattr(struct vfsmount *mnt, "orangefs_getattr: called on %s\n", dentry->d_name.name); - ret = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT, 0); + ret = orangefs_inode_old_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT, + 0); if (ret == 0) { generic_fillattr(inode, kstat); @@ -299,7 +300,7 @@ int orangefs_permission(struct inode *inode, int mask) gossip_debug(GOSSIP_INODE_DEBUG, "%s: refreshing\n", __func__); /* Make sure the permission (and other common attrs) are up to date. */ - ret = orangefs_inode_getattr(inode, + ret = orangefs_inode_old_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT_NOSIZE, 0); if (ret < 0) return ret; @@ -401,7 +402,7 @@ struct inode *orangefs_iget(struct super_block *sb, struct orangefs_object_kref if (!inode || !(inode->i_state & I_NEW)) return inode; - error = orangefs_inode_getattr(inode, + error = orangefs_inode_old_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT_NOSIZE, 0); if (error) { iget_failed(inode); @@ -447,7 +448,7 @@ struct inode *orangefs_new_inode(struct super_block *sb, struct inode *dir, orangefs_set_inode(inode, ref); inode->i_ino = hash; /* needed for stat etc */ - error = orangefs_inode_getattr(inode, + error = orangefs_inode_old_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT_NOSIZE, 0); if (error) goto out_iput; diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 784629870717..276685cdf38d 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -544,7 +544,9 @@ int orangefs_inode_setxattr(struct inode *inode, size_t size, int flags); -int orangefs_inode_getattr(struct inode *inode, __u32 mask, int check); +int orangefs_inode_old_getattr(struct inode *inode, __u32 mask, int check); + +int orangefs_inode_getattr(struct inode *inode, int new, int size); int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr); diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c index df4ad04407e2..59c51e2c5a71 100644 --- a/fs/orangefs/orangefs-utils.c +++ b/fs/orangefs/orangefs-utils.c @@ -458,7 +458,8 @@ static int compare_attributes_to_inode(struct inode *inode, * otherwise. When check is 1, returns 1 on success where the inode is valid * and 0 on success where the inode is stale and -errno otherwise. */ -int orangefs_inode_getattr(struct inode *inode, __u32 getattr_mask, int check) +int orangefs_inode_old_getattr(struct inode *inode, __u32 getattr_mask, + int check) { struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); struct orangefs_kernel_op_s *new_op; @@ -536,6 +537,124 @@ out: return ret; } +static int orangefs_inode_type(enum orangefs_ds_type objtype) +{ + if (objtype == ORANGEFS_TYPE_METAFILE) + return S_IFREG; + else if (objtype == ORANGEFS_TYPE_DIRECTORY) + return S_IFDIR; + else if (objtype == ORANGEFS_TYPE_SYMLINK) + return S_IFLNK; + else + return -1; +} + +int orangefs_inode_getattr(struct inode *inode, int new, int size) +{ + struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); + struct orangefs_kernel_op_s *new_op; + loff_t inode_size, rounded_up_size; + int ret; + + gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__, + get_khandle_from_ino(inode)); + + new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR); + if (!new_op) + return -ENOMEM; + new_op->upcall.req.getattr.refn = orangefs_inode->refn; + new_op->upcall.req.getattr.mask = size ? + ORANGEFS_ATTR_SYS_ALL_NOHINT : ORANGEFS_ATTR_SYS_ALL_NOHINT_NOSIZE; + + ret = service_operation(new_op, __func__, + get_interruptible_flag(inode)); + if (ret != 0) + goto out; + + ret = orangefs_inode_type(new_op-> + downcall.resp.getattr.attributes.objtype); + if (!new) { + /* + * If the inode type or symlink target have changed then this + * inode is stale. + */ + if (ret == -1 || !(inode->i_mode & ret)) { + orangefs_make_bad_inode(inode); + ret = -ESTALE; + goto out; + } + if (ret == S_IFLNK && strncmp(orangefs_inode->link_target, + new_op->downcall.resp.getattr.link_target, + ORANGEFS_NAME_MAX)) { + orangefs_make_bad_inode(inode); + ret = -ESTALE; + goto out; + } + } + + switch (ret) { + case S_IFREG: + inode->i_flags = orangefs_inode_flags(&new_op-> + downcall.resp.getattr.attributes); + if (size) { + inode_size = (loff_t)new_op-> + downcall.resp.getattr.attributes.size; + rounded_up_size = + (inode_size + (4096 - (inode_size % 4096))); + inode->i_size = inode_size; + orangefs_inode->blksize = + new_op->downcall.resp.getattr.attributes.blksize; + spin_lock(&inode->i_lock); + inode->i_bytes = inode_size; + inode->i_blocks = + (unsigned long)(rounded_up_size / 512); + spin_unlock(&inode->i_lock); + } + break; + case S_IFDIR: + inode->i_size = PAGE_CACHE_SIZE; + orangefs_inode->blksize = (1 << inode->i_blkbits); + spin_lock(&inode->i_lock); + inode_set_bytes(inode, inode->i_size); + spin_unlock(&inode->i_lock); + set_nlink(inode, 1); + break; + case S_IFLNK: + if (new) { + inode->i_size = (loff_t)strlen(new_op-> + downcall.resp.getattr.link_target); + orangefs_inode->blksize = (1 << inode->i_blkbits); + strlcpy(orangefs_inode->link_target, + new_op->downcall.resp.getattr.link_target, + ORANGEFS_NAME_MAX); + } + break; + } + + inode->i_uid = make_kuid(&init_user_ns, new_op-> + downcall.resp.getattr.attributes.owner); + inode->i_gid = make_kgid(&init_user_ns, new_op-> + downcall.resp.getattr.attributes.group); + inode->i_atime.tv_sec = (time64_t)new_op-> + downcall.resp.getattr.attributes.atime; + inode->i_mtime.tv_sec = (time64_t)new_op-> + downcall.resp.getattr.attributes.mtime; + inode->i_ctime.tv_sec = (time64_t)new_op-> + downcall.resp.getattr.attributes.ctime; + inode->i_atime.tv_nsec = 0; + inode->i_mtime.tv_nsec = 0; + inode->i_ctime.tv_nsec = 0; + + /* special case: mark the root inode as sticky */ + inode->i_mode = ret | (is_root_handle(inode) ? S_ISVTX : 0) | + orangefs_inode_perms(&new_op->downcall.resp.getattr.attributes); + + ret = 0; +out: + op_release(new_op); + return ret; +} + /* * issues a orangefs setattr request to make sure the new attribute values * take effect if successful. returns 0 on success; -errno otherwise From 075cca50b6d3ba70ee08cc14535e1c0ba073f871 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Tue, 15 Mar 2016 11:36:18 -0400 Subject: [PATCH 158/174] orangefs: use new orangefs_inode_getattr to create new inodes Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/inode.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index fd591d44a97d..45d5846301c4 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -402,8 +402,7 @@ struct inode *orangefs_iget(struct super_block *sb, struct orangefs_object_kref if (!inode || !(inode->i_state & I_NEW)) return inode; - error = orangefs_inode_old_getattr(inode, - ORANGEFS_ATTR_SYS_ALL_NOHINT_NOSIZE, 0); + error = orangefs_inode_getattr(inode, 1, 0); if (error) { iget_failed(inode); return ERR_PTR(error); @@ -448,8 +447,7 @@ struct inode *orangefs_new_inode(struct super_block *sb, struct inode *dir, orangefs_set_inode(inode, ref); inode->i_ino = hash; /* needed for stat etc */ - error = orangefs_inode_old_getattr(inode, - ORANGEFS_ATTR_SYS_ALL_NOHINT_NOSIZE, 0); + error = orangefs_inode_getattr(inode, 1, 0); if (error) goto out_iput; From e2f7f0d798497f7e2f9296f706ff3263ede0b044 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Tue, 15 Mar 2016 12:33:20 -0400 Subject: [PATCH 159/174] orangefs: use new orangefs_inode_getattr to get size in write and llseek Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/file.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 3aff671534d0..d4a00ad26f6e 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -455,11 +455,12 @@ static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *ite /* Make sure generic_write_checks sees an up to date inode size. */ if (file->f_flags & O_APPEND) { - rc = orangefs_inode_old_getattr(file->f_mapping->host, - ORANGEFS_ATTR_SYS_SIZE, 0); + rc = orangefs_inode_getattr(file->f_mapping->host, 0, 1); + if (rc == -ESTALE) + rc = -EIO; if (rc) { - gossip_err("%s: orangefs_inode_old_getattr failed, rc:%zd:.\n", - __func__, rc); + gossip_err("%s: orangefs_inode_getattr failed, " + "rc:%zd:.\n", __func__, rc); goto out; } } @@ -670,8 +671,9 @@ static loff_t orangefs_file_llseek(struct file *file, loff_t offset, int origin) * NOTE: We are only interested in file size here, * so we set mask accordingly. */ - ret = orangefs_inode_old_getattr(inode, - ORANGEFS_ATTR_SYS_SIZE, 0); + ret = orangefs_inode_getattr(file->f_mapping->host, 0, 1); + if (ret == -ESTALE) + ret = -EIO; if (ret) { gossip_debug(GOSSIP_FILE_DEBUG, "%s:%s:%d calling make bad inode\n", From 8f24928d195fc32a6ba53c2329c5bbcaa59119bc Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Tue, 15 Mar 2016 12:36:29 -0400 Subject: [PATCH 160/174] orangefs: use new getattr in inode getattr and permission Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/inode.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 45d5846301c4..8f047722cb44 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -268,8 +268,7 @@ int orangefs_getattr(struct vfsmount *mnt, "orangefs_getattr: called on %s\n", dentry->d_name.name); - ret = orangefs_inode_old_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT, - 0); + ret = orangefs_inode_getattr(inode, 0, 1); if (ret == 0) { generic_fillattr(inode, kstat); @@ -278,14 +277,6 @@ int orangefs_getattr(struct vfsmount *mnt, kstat->blksize = orangefs_inode->blksize; inode->i_link = ORANGEFS_I(dentry->d_inode)->link_target; - } else { - /* assume an I/O error and flag inode as bad */ - gossip_debug(GOSSIP_INODE_DEBUG, - "%s:%s:%d calling make bad inode\n", - __FILE__, - __func__, - __LINE__); - orangefs_make_bad_inode(inode); } return ret; } @@ -300,8 +291,7 @@ int orangefs_permission(struct inode *inode, int mask) gossip_debug(GOSSIP_INODE_DEBUG, "%s: refreshing\n", __func__); /* Make sure the permission (and other common attrs) are up to date. */ - ret = orangefs_inode_old_getattr(inode, - ORANGEFS_ATTR_SYS_ALL_NOHINT_NOSIZE, 0); + ret = orangefs_inode_getattr(inode, 0, 0); if (ret < 0) return ret; From 5859d77e56e470ba16960befcd948c97a6f62102 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Thu, 17 Mar 2016 15:15:16 -0400 Subject: [PATCH 161/174] orangefs: use new getattr for revalidate and remove old getattr Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/dcache.c | 3 +- fs/orangefs/orangefs-kernel.h | 4 +- fs/orangefs/orangefs-utils.c | 367 +++++----------------------------- 3 files changed, 49 insertions(+), 325 deletions(-) diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c index cc5487a1d040..5dfc4f3cfe68 100644 --- a/fs/orangefs/dcache.c +++ b/fs/orangefs/dcache.c @@ -117,8 +117,7 @@ static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags) /* Now we must perform a getattr to validate the inode contents. */ - ret = orangefs_inode_old_getattr(dentry->d_inode, - ORANGEFS_ATTR_SYS_TYPE|ORANGEFS_ATTR_SYS_LNK_TARGET, 1); + ret = orangefs_inode_check_changed(dentry->d_inode); if (ret < 0) { gossip_debug(GOSSIP_DCACHE_DEBUG, "%s:%s:%d getattr failure.\n", __FILE__, __func__, __LINE__); diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 276685cdf38d..5e85b199dee2 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -544,10 +544,10 @@ int orangefs_inode_setxattr(struct inode *inode, size_t size, int flags); -int orangefs_inode_old_getattr(struct inode *inode, __u32 mask, int check); - int orangefs_inode_getattr(struct inode *inode, int new, int size); +int orangefs_inode_check_changed(struct inode *inode); + int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr); void orangefs_make_bad_inode(struct inode *inode); diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c index 59c51e2c5a71..6643a6a87fa1 100644 --- a/fs/orangefs/orangefs-utils.c +++ b/fs/orangefs/orangefs-utils.c @@ -129,141 +129,6 @@ static int orangefs_inode_perms(struct ORANGEFS_sys_attr_s *attrs) return perm_mode; } -/* NOTE: symname is ignored unless the inode is a sym link */ -static int copy_attributes_to_inode(struct inode *inode, - struct ORANGEFS_sys_attr_s *attrs, - char *symname) -{ - int ret = -1; - struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); - loff_t inode_size = 0; - loff_t rounded_up_size = 0; - - - /* - * arbitrarily set the inode block size; FIXME: we need to - * resolve the difference between the reported inode blocksize - * and the PAGE_CACHE_SIZE, since our block count will always - * be wrong. - * - * For now, we're setting the block count to be the proper - * number assuming the block size is 512 bytes, and the size is - * rounded up to the nearest 4K. This is apparently required - * to get proper size reports from the 'du' shell utility. - * - * changing the inode->i_blkbits to something other than - * PAGE_CACHE_SHIFT breaks mmap/execution as we depend on that. - */ - gossip_debug(GOSSIP_UTILS_DEBUG, - "attrs->mask = %x (objtype = %s)\n", - attrs->mask, - attrs->objtype == ORANGEFS_TYPE_METAFILE ? "file" : - attrs->objtype == ORANGEFS_TYPE_DIRECTORY ? "directory" : - attrs->objtype == ORANGEFS_TYPE_SYMLINK ? "symlink" : - "invalid/unknown"); - - switch (attrs->objtype) { - case ORANGEFS_TYPE_METAFILE: - inode->i_flags = orangefs_inode_flags(attrs); - if (attrs->mask & ORANGEFS_ATTR_SYS_SIZE) { - inode_size = (loff_t) attrs->size; - rounded_up_size = - (inode_size + (4096 - (inode_size % 4096))); - - spin_lock(&inode->i_lock); - inode->i_bytes = inode_size; - inode->i_blocks = - (unsigned long)(rounded_up_size / 512); - spin_unlock(&inode->i_lock); - - /* - * NOTE: make sure all the places we're called - * from have the inode->i_sem lock. We're fine - * in 99% of the cases since we're mostly - * called from a lookup. - */ - inode->i_size = inode_size; - } - break; - case ORANGEFS_TYPE_SYMLINK: - if (symname != NULL) { - inode->i_size = (loff_t) strlen(symname); - break; - } - /*FALLTHRU*/ - default: - inode->i_size = PAGE_CACHE_SIZE; - - spin_lock(&inode->i_lock); - inode_set_bytes(inode, inode->i_size); - spin_unlock(&inode->i_lock); - break; - } - - inode->i_uid = make_kuid(&init_user_ns, attrs->owner); - inode->i_gid = make_kgid(&init_user_ns, attrs->group); - inode->i_atime.tv_sec = (time64_t) attrs->atime; - inode->i_mtime.tv_sec = (time64_t) attrs->mtime; - inode->i_ctime.tv_sec = (time64_t) attrs->ctime; - inode->i_atime.tv_nsec = 0; - inode->i_mtime.tv_nsec = 0; - inode->i_ctime.tv_nsec = 0; - - inode->i_mode = orangefs_inode_perms(attrs); - - if (is_root_handle(inode)) { - /* special case: mark the root inode as sticky */ - inode->i_mode |= S_ISVTX; - gossip_debug(GOSSIP_UTILS_DEBUG, - "Marking inode %pU as sticky\n", - get_khandle_from_ino(inode)); - } - - switch (attrs->objtype) { - case ORANGEFS_TYPE_METAFILE: - inode->i_mode |= S_IFREG; - ret = 0; - break; - case ORANGEFS_TYPE_DIRECTORY: - inode->i_mode |= S_IFDIR; - /* NOTE: we have no good way to keep nlink consistent - * for directories across clients; keep constant at 1. - * Why 1? If we go with 2, then find(1) gets confused - * and won't work properly withouth the -noleaf option - */ - set_nlink(inode, 1); - ret = 0; - break; - case ORANGEFS_TYPE_SYMLINK: - inode->i_mode |= S_IFLNK; - - /* copy link target to inode private data */ - if (orangefs_inode && symname) { - strncpy(orangefs_inode->link_target, - symname, - ORANGEFS_NAME_MAX); - gossip_debug(GOSSIP_UTILS_DEBUG, - "Copied attr link target %s\n", - orangefs_inode->link_target); - } - gossip_debug(GOSSIP_UTILS_DEBUG, - "symlink mode %o\n", - inode->i_mode); - ret = 0; - break; - default: - gossip_err("orangefs: copy_attributes_to_inode: got invalid attribute type %x\n", - attrs->objtype); - } - - gossip_debug(GOSSIP_UTILS_DEBUG, - "orangefs: copy_attributes_to_inode: setting i_mode to %o, i_size to %lu\n", - inode->i_mode, - (unsigned long)i_size_read(inode)); - - return ret; -} - /* * NOTE: in kernel land, we never use the sys_attr->link_target for * anything, so don't bother copying it into the sys_attr object here. @@ -351,192 +216,6 @@ static inline int copy_attributes_from_inode(struct inode *inode, return 0; } -static int compare_attributes_to_inode(struct inode *inode, - struct ORANGEFS_sys_attr_s *attrs, - char *symname, - int mask) -{ - struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); - loff_t inode_size, rounded_up_size; - - /* Much of what happens below relies on the type being around. */ - if (!(mask & ORANGEFS_ATTR_SYS_TYPE)) - return 0; - - if (attrs->objtype == ORANGEFS_TYPE_METAFILE && - inode->i_flags != orangefs_inode_flags(attrs)) - return 0; - - /* Compare file size. */ - - switch (attrs->objtype) { - case ORANGEFS_TYPE_METAFILE: - if (mask & ORANGEFS_ATTR_SYS_SIZE) { - inode_size = attrs->size; - rounded_up_size = inode_size + - (4096 - (inode_size % 4096)); - if (inode->i_bytes != inode_size || - inode->i_blocks != rounded_up_size/512) - return 0; - } - break; - case ORANGEFS_TYPE_SYMLINK: - if (mask & ORANGEFS_ATTR_SYS_SIZE) - if (symname && strlen(symname) != inode->i_size) - return 0; - break; - default: - if (inode->i_size != PAGE_CACHE_SIZE && - inode_get_bytes(inode) != PAGE_CACHE_SIZE) - return 0; - } - - /* Compare general attributes. */ - - if (mask & ORANGEFS_ATTR_SYS_UID && - !uid_eq(inode->i_uid, make_kuid(&init_user_ns, attrs->owner))) - return 0; - if (mask & ORANGEFS_ATTR_SYS_GID && - !gid_eq(inode->i_gid, make_kgid(&init_user_ns, attrs->group))) - return 0; - if (mask & ORANGEFS_ATTR_SYS_ATIME && - inode->i_atime.tv_sec != attrs->atime) - return 0; - if (mask & ORANGEFS_ATTR_SYS_MTIME && - inode->i_atime.tv_sec != attrs->mtime) - return 0; - if (mask & ORANGEFS_ATTR_SYS_CTIME && - inode->i_atime.tv_sec != attrs->ctime) - return 0; - if (inode->i_atime.tv_nsec != 0 || - inode->i_mtime.tv_nsec != 0 || - inode->i_ctime.tv_nsec != 0) - return 0; - - if (mask & ORANGEFS_ATTR_SYS_PERM && - (inode->i_mode & ~(S_ISVTX|S_IFREG|S_IFDIR|S_IFLNK)) != - orangefs_inode_perms(attrs)) - return 0; - - if (is_root_handle(inode)) - if (!(inode->i_mode & S_ISVTX)) - return 0; - - /* Compare file type. */ - - switch (attrs->objtype) { - case ORANGEFS_TYPE_METAFILE: - if (!S_ISREG(inode->i_mode)) - return 0; - break; - case ORANGEFS_TYPE_DIRECTORY: - if (!S_ISDIR(inode->i_mode)) - return 0; - if (inode->i_nlink != 1) - return 0; - break; - case ORANGEFS_TYPE_SYMLINK: - if (!S_ISLNK(inode->i_mode)) - return 0; - if (orangefs_inode && symname && - mask & ORANGEFS_ATTR_SYS_LNK_TARGET) - if (strcmp(orangefs_inode->link_target, symname)) - return 0; - break; - default: - gossip_err("orangefs: compare_attributes_to_inode: got invalid attribute type %x\n", - attrs->objtype); - - } - - return 1; -} - -/* - * Issues a orangefs getattr request and fills in the appropriate inode - * attributes if successful. When check is 0, returns 0 on success and -errno - * otherwise. When check is 1, returns 1 on success where the inode is valid - * and 0 on success where the inode is stale and -errno otherwise. - */ -int orangefs_inode_old_getattr(struct inode *inode, __u32 getattr_mask, - int check) -{ - struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); - struct orangefs_kernel_op_s *new_op; - int ret = -EINVAL; - - gossip_debug(GOSSIP_UTILS_DEBUG, - "%s: called on inode %pU\n", - __func__, - get_khandle_from_ino(inode)); - - new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR); - if (!new_op) - return -ENOMEM; - new_op->upcall.req.getattr.refn = orangefs_inode->refn; - new_op->upcall.req.getattr.mask = getattr_mask; - - ret = service_operation(new_op, __func__, - get_interruptible_flag(inode)); - if (ret != 0) - goto out; - - if (check) { - ret = compare_attributes_to_inode(inode, - &new_op->downcall.resp.getattr.attributes, - new_op->downcall.resp.getattr.link_target, - getattr_mask); - - if (new_op->downcall.resp.getattr.attributes.objtype == - ORANGEFS_TYPE_METAFILE) { - if (orangefs_inode->blksize != - new_op->downcall.resp.getattr.attributes.blksize) - ret = 0; - } else { - if (orangefs_inode->blksize != 1 << inode->i_blkbits) - ret = 0; - } - } else { - if (copy_attributes_to_inode(inode, - &new_op->downcall.resp.getattr.attributes, - new_op->downcall.resp.getattr.link_target)) { - gossip_err("%s: failed to copy attributes\n", __func__); - ret = -ENOENT; - goto out; - } - - /* - * Store blksize in orangefs specific part of inode structure; - * we are only going to use this to report to stat to make sure - * it doesn't perturb any inode related code paths. - */ - if (new_op->downcall.resp.getattr.attributes.objtype == - ORANGEFS_TYPE_METAFILE) { - orangefs_inode->blksize = new_op->downcall.resp. - getattr.attributes.blksize; - } else { - /* - * mimic behavior of generic_fillattr() for other file - * types. - */ - orangefs_inode->blksize = (1 << inode->i_blkbits); - - } - } - -out: - gossip_debug(GOSSIP_UTILS_DEBUG, - "Getattr on handle %pU, " - "fsid %d\n (inode ct = %d) returned %d\n", - &orangefs_inode->refn.khandle, - orangefs_inode->refn.fs_id, - (int)atomic_read(&inode->i_count), - ret); - - op_release(new_op); - return ret; -} - static int orangefs_inode_type(enum orangefs_ds_type objtype) { if (objtype == ORANGEFS_TYPE_METAFILE) @@ -655,6 +334,52 @@ out: return ret; } +int orangefs_inode_check_changed(struct inode *inode) +{ + struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); + struct orangefs_kernel_op_s *new_op; + int ret; + + gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__, + get_khandle_from_ino(inode)); + + new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR); + if (!new_op) + return -ENOMEM; + new_op->upcall.req.getattr.refn = orangefs_inode->refn; + new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_TYPE | + ORANGEFS_ATTR_SYS_LNK_TARGET; + + ret = service_operation(new_op, __func__, + get_interruptible_flag(inode)); + if (ret != 0) + goto out; + + ret = orangefs_inode_type(new_op-> + downcall.resp.getattr.attributes.objtype); + /* + * If the inode type or symlink target have changed then this + * inode is stale. + */ + if (ret == -1 || !(inode->i_mode & ret)) { + orangefs_make_bad_inode(inode); + ret = 1; + goto out; + } + if (ret == S_IFLNK && strncmp(orangefs_inode->link_target, + new_op->downcall.resp.getattr.link_target, + ORANGEFS_NAME_MAX)) { + orangefs_make_bad_inode(inode); + ret = 1; + goto out; + } + + ret = 0; +out: + op_release(new_op); + return ret; +} + /* * issues a orangefs setattr request to make sure the new attribute values * take effect if successful. returns 0 on success; -errno otherwise From 266626339b688e650f4197fb8e54532581e18cae Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Thu, 17 Mar 2016 16:01:52 -0400 Subject: [PATCH 162/174] orangefs: refactor inode type or link_target change detection Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-utils.c | 77 +++++++++++++++++------------------- 1 file changed, 36 insertions(+), 41 deletions(-) diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c index 6643a6a87fa1..36ee30d1f0ad 100644 --- a/fs/orangefs/orangefs-utils.c +++ b/fs/orangefs/orangefs-utils.c @@ -228,12 +228,35 @@ static int orangefs_inode_type(enum orangefs_ds_type objtype) return -1; } +static int orangefs_inode_is_stale(struct inode *inode, int new, + struct ORANGEFS_sys_attr_s *attrs, char *link_target) +{ + struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); + int type = orangefs_inode_type(attrs->objtype); + if (!new) { + /* + * If the inode type or symlink target have changed then this + * inode is stale. + */ + if (type == -1 || !(inode->i_mode & type)) { + orangefs_make_bad_inode(inode); + return 1; + } + if (type == S_IFLNK && strncmp(orangefs_inode->link_target, + link_target, ORANGEFS_NAME_MAX)) { + orangefs_make_bad_inode(inode); + return 1; + } + } + return 0; +} + int orangefs_inode_getattr(struct inode *inode, int new, int size) { struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); struct orangefs_kernel_op_s *new_op; loff_t inode_size, rounded_up_size; - int ret; + int ret, type; gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__, get_khandle_from_ino(inode)); @@ -250,28 +273,17 @@ int orangefs_inode_getattr(struct inode *inode, int new, int size) if (ret != 0) goto out; - ret = orangefs_inode_type(new_op-> + type = orangefs_inode_type(new_op-> downcall.resp.getattr.attributes.objtype); - if (!new) { - /* - * If the inode type or symlink target have changed then this - * inode is stale. - */ - if (ret == -1 || !(inode->i_mode & ret)) { - orangefs_make_bad_inode(inode); - ret = -ESTALE; - goto out; - } - if (ret == S_IFLNK && strncmp(orangefs_inode->link_target, - new_op->downcall.resp.getattr.link_target, - ORANGEFS_NAME_MAX)) { - orangefs_make_bad_inode(inode); - ret = -ESTALE; - goto out; - } + ret = orangefs_inode_is_stale(inode, new, + &new_op->downcall.resp.getattr.attributes, + new_op->downcall.resp.getattr.link_target); + if (ret) { + ret = -ESTALE; + goto out; } - switch (ret) { + switch (type) { case S_IFREG: inode->i_flags = orangefs_inode_flags(&new_op-> downcall.resp.getattr.attributes); @@ -325,7 +337,7 @@ int orangefs_inode_getattr(struct inode *inode, int new, int size) inode->i_ctime.tv_nsec = 0; /* special case: mark the root inode as sticky */ - inode->i_mode = ret | (is_root_handle(inode) ? S_ISVTX : 0) | + inode->i_mode = type | (is_root_handle(inode) ? S_ISVTX : 0) | orangefs_inode_perms(&new_op->downcall.resp.getattr.attributes); ret = 0; @@ -355,26 +367,9 @@ int orangefs_inode_check_changed(struct inode *inode) if (ret != 0) goto out; - ret = orangefs_inode_type(new_op-> - downcall.resp.getattr.attributes.objtype); - /* - * If the inode type or symlink target have changed then this - * inode is stale. - */ - if (ret == -1 || !(inode->i_mode & ret)) { - orangefs_make_bad_inode(inode); - ret = 1; - goto out; - } - if (ret == S_IFLNK && strncmp(orangefs_inode->link_target, - new_op->downcall.resp.getattr.link_target, - ORANGEFS_NAME_MAX)) { - orangefs_make_bad_inode(inode); - ret = 1; - goto out; - } - - ret = 0; + ret = orangefs_inode_is_stale(inode, 0, + &new_op->downcall.resp.getattr.attributes, + new_op->downcall.resp.getattr.link_target); out: op_release(new_op); return ret; From 93d53a488571fb8f8ceaba09352dcf4dfa1fc4e0 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Thu, 17 Mar 2016 16:33:08 -0400 Subject: [PATCH 163/174] orangefs: remove wrapper around mutex_lock(&inode->i_mutex) Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-kernel.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 5e85b199dee2..d85776b15176 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -645,18 +645,14 @@ do { \ sys_attr.mask = ORANGEFS_ATTR_SYS_ALL_SETABLE; \ } while (0) -#define orangefs_inode_lock(__i) mutex_lock(&(__i)->i_mutex) - -#define orangefs_inode_unlock(__i) mutex_unlock(&(__i)->i_mutex) - static inline void orangefs_i_size_write(struct inode *inode, loff_t i_size) { #if BITS_PER_LONG == 32 && defined(CONFIG_SMP) - orangefs_inode_lock(inode); + mutex_lock(&inode->i_mutex); #endif i_size_write(inode, i_size); #if BITS_PER_LONG == 32 && defined(CONFIG_SMP) - orangefs_inode_unlock(inode); + mutex_unlock(&inode->i_mutex); #endif } From 05d31c5cb34cbdf05f9326b276be03756abb4b70 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Fri, 18 Mar 2016 13:36:45 -0400 Subject: [PATCH 164/174] orangefs: remove needless wrapper around GFP_KERNEL Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-kernel.h | 3 --- fs/orangefs/super.c | 3 +-- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index d85776b15176..5832168106de 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -120,9 +120,6 @@ struct client_debug_mask { #define ORANGEFS_CACHE_CREATE_FLAGS 0 #endif /* ((defined ORANGEFS_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB)) */ -#define ORANGEFS_GFP_FLAGS (GFP_KERNEL) -#define ORANGEFS_BUFMAP_GFP_FLAGS (GFP_KERNEL) - /* orangefs xattr and acl related defines */ #define ORANGEFS_XATTR_INDEX_POSIX_ACL_ACCESS 1 #define ORANGEFS_XATTR_INDEX_POSIX_ACL_DEFAULT 2 diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index eac24eb7fe80..1eeb0093b62c 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -360,8 +360,7 @@ static int orangefs_fill_sb(struct super_block *sb, struct orangefs_object_kref root_object; /* alloc and init our private orangefs sb info */ - sb->s_fs_info = - kzalloc(sizeof(struct orangefs_sb_info_s), ORANGEFS_GFP_FLAGS); + sb->s_fs_info = kzalloc(sizeof(struct orangefs_sb_info_s), GFP_KERNEL); if (!ORANGEFS_SB(sb)) return -ENOMEM; ORANGEFS_SB(sb)->sb = sb; From e8da254c415475d3df67966a198523bfe3ac0576 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Fri, 18 Mar 2016 14:20:15 -0400 Subject: [PATCH 165/174] orangefs: move code which sets i_link to orangefs_inode_getattr Everything else setting inode->i_ values is in there. Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/inode.c | 2 -- fs/orangefs/orangefs-utils.c | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 8f047722cb44..2e521ec734c4 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -275,8 +275,6 @@ int orangefs_getattr(struct vfsmount *mnt, /* override block size reported to stat */ orangefs_inode = ORANGEFS_I(inode); kstat->blksize = orangefs_inode->blksize; - - inode->i_link = ORANGEFS_I(dentry->d_inode)->link_target; } return ret; } diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c index 36ee30d1f0ad..40f5163b56aa 100644 --- a/fs/orangefs/orangefs-utils.c +++ b/fs/orangefs/orangefs-utils.c @@ -318,6 +318,7 @@ int orangefs_inode_getattr(struct inode *inode, int new, int size) strlcpy(orangefs_inode->link_target, new_op->downcall.resp.getattr.link_target, ORANGEFS_NAME_MAX); + inode->i_link = orangefs_inode->link_target; } break; } From fecd86aac5a7621635b61e7491f0ed73610d76fa Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Wed, 23 Mar 2016 17:06:25 -0400 Subject: [PATCH 166/174] ornagefs: ensure that truncate has an up to date inode size Signed-off-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/inode.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 2e521ec734c4..2382e267b49e 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -157,7 +157,7 @@ static int orangefs_setattr_size(struct inode *inode, struct iattr *iattr) { struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); struct orangefs_kernel_op_s *new_op; - loff_t orig_size = i_size_read(inode); + loff_t orig_size; int ret = -EINVAL; gossip_debug(GOSSIP_INODE_DEBUG, @@ -168,6 +168,17 @@ static int orangefs_setattr_size(struct inode *inode, struct iattr *iattr) orangefs_inode->refn.fs_id, iattr->ia_size); + /* Ensure that we have a up to date size, so we know if it changed. */ + ret = orangefs_inode_getattr(inode, 0, 1); + if (ret == -ESTALE) + ret = -EIO; + if (ret) { + gossip_err("%s: orangefs_inode_getattr failed, ret:%d:.\n", + __func__, ret); + return ret; + } + orig_size = i_size_read(inode); + truncate_setsize(inode, iattr->ia_size); new_op = op_alloc(ORANGEFS_VFS_OP_TRUNCATE); From 9f5e2f7f1b4bf7d0b19d88edd9425510fadbb9e4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Feb 2016 19:54:13 -0500 Subject: [PATCH 167/174] orangefs: get rid of readdir_handle_s no point, really - we couldn't keep those across the calls of getdents(); it would be too easy to DoS, having all slots exhausted. Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/dir.c | 93 +++++++++++++++-------------------------------- 1 file changed, 30 insertions(+), 63 deletions(-) diff --git a/fs/orangefs/dir.c b/fs/orangefs/dir.c index 259b667f6c8f..53a411732606 100644 --- a/fs/orangefs/dir.c +++ b/fs/orangefs/dir.c @@ -8,11 +8,6 @@ #include "orangefs-kernel.h" #include "orangefs-bufmap.h" -struct readdir_handle_s { - struct orangefs_readdir_response_s readdir_response; - void *dents_buf; -}; - /* * decode routine used by kmod to deal with the blob sent from * userspace for readdirs. The blob contains zero or more of these @@ -141,44 +136,6 @@ out: return ret; } -static long readdir_handle_ctor(struct readdir_handle_s *rhandle, void *buf, - size_t size) -{ - long ret; - - if (buf == NULL) { - gossip_err - ("Invalid NULL buffer specified in readdir_handle_ctor\n"); - return -ENOMEM; - } - rhandle->dents_buf = buf; - ret = decode_dirents(buf, size, &rhandle->readdir_response); - if (ret < 0) { - gossip_err("Could not decode readdir from buffer %ld\n", ret); - gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", buf); - vfree(buf); - rhandle->dents_buf = NULL; - } - return ret; -} - -static void readdir_handle_dtor(struct readdir_handle_s *rhandle) -{ - if (rhandle == NULL) - return; - - /* kfree(NULL) is safe */ - kfree(rhandle->readdir_response.dirent_array); - rhandle->readdir_response.dirent_array = NULL; - - if (rhandle->dents_buf) { - gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", - rhandle->dents_buf); - vfree(rhandle->dents_buf); - rhandle->dents_buf = NULL; - } -} - /* * Read directory entries from an instance of an open directory. */ @@ -198,7 +155,8 @@ static int orangefs_readdir(struct file *file, struct dir_context *ctx) struct orangefs_kernel_op_s *new_op = NULL; struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(dentry->d_inode); int buffer_full = 0; - struct readdir_handle_s rhandle; + struct orangefs_readdir_response_s readdir_response; + void *dents_buf; int i = 0; int len = 0; ino_t current_ino = 0; @@ -224,8 +182,7 @@ static int orangefs_readdir(struct file *file, struct dir_context *ctx) "orangefs_readdir called on %s (pos=%llu)\n", dentry->d_name.name, llu(pos)); - rhandle.dents_buf = NULL; - memset(&rhandle.readdir_response, 0, sizeof(rhandle.readdir_response)); + memset(&readdir_response, 0, sizeof(readdir_response)); new_op = op_alloc(ORANGEFS_VFS_OP_READDIR); if (!new_op) @@ -278,7 +235,7 @@ get_new_buffer_index: if (ret == -EIO && op_state_purged(new_op)) { gossip_err("%s: Client is down. Aborting readdir call.\n", __func__); - goto out_free_op; + goto out_slot; } if (ret < 0 || new_op->downcall.status != 0) { @@ -287,18 +244,22 @@ get_new_buffer_index: new_op->downcall.status); if (ret >= 0) ret = new_op->downcall.status; - goto out_free_op; + goto out_slot; } - bytes_decoded = - readdir_handle_ctor(&rhandle, - new_op->downcall.trailer_buf, - new_op->downcall.trailer_size); + dents_buf = new_op->downcall.trailer_buf; + if (dents_buf == NULL) { + gossip_err("Invalid NULL buffer in readdir response\n"); + ret = -ENOMEM; + goto out_slot; + } + + bytes_decoded = decode_dirents(dents_buf, new_op->downcall.trailer_size, + &readdir_response); if (bytes_decoded < 0) { - gossip_err("orangefs_readdir: Could not decode trailer buffer into a readdir response %d\n", - ret); ret = bytes_decoded; - goto out_free_op; + gossip_err("Could not decode readdir from buffer %d\n", ret); + goto out_vfree; } if (bytes_decoded != new_op->downcall.trailer_size) { @@ -345,14 +306,14 @@ get_new_buffer_index: gossip_debug(GOSSIP_DIR_DEBUG, "%s: dirent_outcount:%d:\n", __func__, - rhandle.readdir_response.orangefs_dirent_outcount); + readdir_response.orangefs_dirent_outcount); for (i = ctx->pos; - i < rhandle.readdir_response.orangefs_dirent_outcount; + i < readdir_response.orangefs_dirent_outcount; i++) { - len = rhandle.readdir_response.dirent_array[i].d_length; - current_entry = rhandle.readdir_response.dirent_array[i].d_name; + len = readdir_response.dirent_array[i].d_length; + current_entry = readdir_response.dirent_array[i].d_name; current_ino = orangefs_khandle_to_ino( - &(rhandle.readdir_response.dirent_array[i].khandle)); + &readdir_response.dirent_array[i].khandle); gossip_debug(GOSSIP_DIR_DEBUG, "calling dir_emit for %s with len %d" @@ -382,14 +343,14 @@ get_new_buffer_index: * getting another batch... */ if (ret) { - *ptoken = rhandle.readdir_response.token; + *ptoken = readdir_response.token; ctx->pos = ORANGEFS_ITERATE_NEXT; } /* * Did we hit the end of the directory? */ - if (rhandle.readdir_response.token == ORANGEFS_READDIR_END && + if (readdir_response.token == ORANGEFS_READDIR_END && !buffer_full) { gossip_debug(GOSSIP_DIR_DEBUG, "End of dir detected; setting ctx->pos to ORANGEFS_READDIR_END.\n"); @@ -397,7 +358,13 @@ get_new_buffer_index: } out_destroy_handle: - readdir_handle_dtor(&rhandle); + /* kfree(NULL) is safe */ + kfree(readdir_response.dirent_array); +out_vfree: + gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", dents_buf); + vfree(dents_buf); +out_slot: + orangefs_readdir_index_put(buffer_index); out_free_op: op_release(new_op); gossip_debug(GOSSIP_DIR_DEBUG, "orangefs_readdir returning %d\n", ret); From bf6bf606e545cb31c29499b354c13b2621acd649 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Feb 2016 20:06:19 -0500 Subject: [PATCH 168/174] orangefs_copy_{to,from}_bufmap(): don't pass bufmap pointer it's always __orangefs_bufmap Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/file.c | 18 ++++++------------ fs/orangefs/orangefs-bufmap.c | 13 ++++++------- fs/orangefs/orangefs-bufmap.h | 6 ++---- 3 files changed, 14 insertions(+), 23 deletions(-) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index d4a00ad26f6e..db9dd6ebcc3f 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -21,8 +21,7 @@ * can futher be kernel-space or user-space addresses. * or it can pointers to struct page's */ -static int precopy_buffers(struct orangefs_bufmap *bufmap, - int buffer_index, +static int precopy_buffers(int buffer_index, struct iov_iter *iter, size_t total_size) { @@ -34,8 +33,7 @@ static int precopy_buffers(struct orangefs_bufmap *bufmap, if (total_size) { - ret = orangefs_bufmap_copy_from_iovec(bufmap, - iter, + ret = orangefs_bufmap_copy_from_iovec(iter, buffer_index, total_size); if (ret < 0) @@ -58,8 +56,7 @@ static int precopy_buffers(struct orangefs_bufmap *bufmap, * can futher be kernel-space or user-space addresses. * or it can pointers to struct page's */ -static int postcopy_buffers(struct orangefs_bufmap *bufmap, - int buffer_index, +static int postcopy_buffers(int buffer_index, struct iov_iter *iter, size_t total_size) { @@ -70,8 +67,7 @@ static int postcopy_buffers(struct orangefs_bufmap *bufmap, * struct page pointers. */ if (total_size) { - ret = orangefs_bufmap_copy_to_iovec(bufmap, - iter, + ret = orangefs_bufmap_copy_to_iovec(iter, buffer_index, total_size); if (ret < 0) @@ -138,8 +134,7 @@ populate_shared_memory: * precopy_buffers only pertains to writes. */ if (type == ORANGEFS_IO_WRITE) { - ret = precopy_buffers(bufmap, - buffer_index, + ret = precopy_buffers(buffer_index, iter, total_size); if (ret < 0) @@ -242,8 +237,7 @@ populate_shared_memory: * postcopy_buffers only pertains to reads. */ if (type == ORANGEFS_IO_READ) { - ret = postcopy_buffers(bufmap, - buffer_index, + ret = postcopy_buffers(buffer_index, iter, new_op->downcall.resp.io.amt_complete); if (ret < 0) diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index 44d437dbfce0..97689c6cab17 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -511,19 +511,18 @@ void orangefs_readdir_index_put(int buffer_index) * we've been handed an iovec, we need to copy it to * the shared memory descriptor at "buffer_index". */ -int orangefs_bufmap_copy_from_iovec(struct orangefs_bufmap *bufmap, - struct iov_iter *iter, +int orangefs_bufmap_copy_from_iovec(struct iov_iter *iter, int buffer_index, size_t size) { - struct orangefs_bufmap_desc *to = &bufmap->desc_array[buffer_index]; + struct orangefs_bufmap_desc *to; int i; gossip_debug(GOSSIP_BUFMAP_DEBUG, "%s: buffer_index:%d: size:%zu:\n", __func__, buffer_index, size); - + to = &__orangefs_bufmap->desc_array[buffer_index]; for (i = 0; size; i++) { struct page *page = to->page_array[i]; size_t n = size; @@ -542,14 +541,14 @@ int orangefs_bufmap_copy_from_iovec(struct orangefs_bufmap *bufmap, * we've been handed an iovec, we need to fill it from * the shared memory descriptor at "buffer_index". */ -int orangefs_bufmap_copy_to_iovec(struct orangefs_bufmap *bufmap, - struct iov_iter *iter, +int orangefs_bufmap_copy_to_iovec(struct iov_iter *iter, int buffer_index, size_t size) { - struct orangefs_bufmap_desc *from = &bufmap->desc_array[buffer_index]; + struct orangefs_bufmap_desc *from; int i; + from = &__orangefs_bufmap->desc_array[buffer_index]; gossip_debug(GOSSIP_BUFMAP_DEBUG, "%s: buffer_index:%d: size:%zu:\n", __func__, buffer_index, size); diff --git a/fs/orangefs/orangefs-bufmap.h b/fs/orangefs/orangefs-bufmap.h index 0be62be373f7..babdc713c5e0 100644 --- a/fs/orangefs/orangefs-bufmap.h +++ b/fs/orangefs/orangefs-bufmap.h @@ -27,13 +27,11 @@ int orangefs_readdir_index_get(struct orangefs_bufmap **mapp, int *buffer_index) void orangefs_readdir_index_put(int buffer_index); -int orangefs_bufmap_copy_from_iovec(struct orangefs_bufmap *bufmap, - struct iov_iter *iter, +int orangefs_bufmap_copy_from_iovec(struct iov_iter *iter, int buffer_index, size_t size); -int orangefs_bufmap_copy_to_iovec(struct orangefs_bufmap *bufmap, - struct iov_iter *iter, +int orangefs_bufmap_copy_to_iovec(struct iov_iter *iter, int buffer_index, size_t size); From b8a99a8f9f0aebf2a75bb0d9280bff7e7ac9b57e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Feb 2016 20:10:26 -0500 Subject: [PATCH 169/174] orangefs: saner calling conventions for getting a slot just have it return the slot number or -E... - the caller checks the sign anyway Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/dir.c | 6 +++--- fs/orangefs/file.c | 10 +++++----- fs/orangefs/orangefs-bufmap.c | 24 ++++++------------------ fs/orangefs/orangefs-bufmap.h | 4 ++-- 4 files changed, 16 insertions(+), 28 deletions(-) diff --git a/fs/orangefs/dir.c b/fs/orangefs/dir.c index 53a411732606..f30b6ecacdd1 100644 --- a/fs/orangefs/dir.c +++ b/fs/orangefs/dir.c @@ -141,7 +141,6 @@ out: */ static int orangefs_readdir(struct file *file, struct dir_context *ctx) { - struct orangefs_bufmap *bufmap = NULL; int ret = 0; int buffer_index; /* @@ -205,8 +204,9 @@ static int orangefs_readdir(struct file *file, struct dir_context *ctx) new_op->upcall.req.readdir.token = *ptoken; get_new_buffer_index: - ret = orangefs_readdir_index_get(&bufmap, &buffer_index); - if (ret < 0) { + buffer_index = orangefs_readdir_index_get(); + if (buffer_index < 0) { + ret = buffer_index; gossip_lerr("orangefs_readdir: orangefs_readdir_index_get() failure (%d)\n", ret); goto out_free_op; diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index db9dd6ebcc3f..63e6a10ab13d 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -87,7 +87,6 @@ static ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inod { struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); struct orangefs_khandle *handle = &orangefs_inode->refn.khandle; - struct orangefs_bufmap *bufmap = NULL; struct orangefs_kernel_op_s *new_op = NULL; struct iov_iter saved = *iter; int buffer_index = -1; @@ -104,11 +103,12 @@ static ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inod populate_shared_memory: /* get a shared buffer index */ - ret = orangefs_bufmap_get(&bufmap, &buffer_index); - if (ret < 0) { + buffer_index = orangefs_bufmap_get(); + if (buffer_index < 0) { + ret = buffer_index; gossip_debug(GOSSIP_FILE_DEBUG, - "%s: orangefs_bufmap_get failure (%ld)\n", - __func__, (long)ret); + "%s: orangefs_bufmap_get failure (%zd)\n", + __func__, ret); goto out; } gossip_debug(GOSSIP_FILE_DEBUG, diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index 97689c6cab17..1f8acc9f9a88 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -455,17 +455,11 @@ void orangefs_bufmap_run_down(void) * gets a free mapped buffer descriptor, will sleep until one becomes * available if necessary * - * returns 0 on success, -errno on failure + * returns slot on success, -errno on failure */ -int orangefs_bufmap_get(struct orangefs_bufmap **mapp, int *buffer_index) +int orangefs_bufmap_get(void) { - int ret = get(&rw_map); - if (ret >= 0) { - *mapp = __orangefs_bufmap; - *buffer_index = ret; - ret = 0; - } - return ret; + return get(&rw_map); } /* @@ -489,17 +483,11 @@ void orangefs_bufmap_put(int buffer_index) * we could do that at a later point of time. Regardless, these * indices are used by the client-core. * - * returns 0 on success, -errno on failure + * returns slot on success, -errno on failure */ -int orangefs_readdir_index_get(struct orangefs_bufmap **mapp, int *buffer_index) +int orangefs_readdir_index_get(void) { - int ret = get(&readdir_map); - if (ret >= 0) { - *mapp = __orangefs_bufmap; - *buffer_index = ret; - ret = 0; - } - return ret; + return get(&readdir_map); } void orangefs_readdir_index_put(int buffer_index) diff --git a/fs/orangefs/orangefs-bufmap.h b/fs/orangefs/orangefs-bufmap.h index babdc713c5e0..ec2849cb52e2 100644 --- a/fs/orangefs/orangefs-bufmap.h +++ b/fs/orangefs/orangefs-bufmap.h @@ -19,11 +19,11 @@ void orangefs_bufmap_finalize(void); void orangefs_bufmap_run_down(void); -int orangefs_bufmap_get(struct orangefs_bufmap **mapp, int *buffer_index); +int orangefs_bufmap_get(void); void orangefs_bufmap_put(int buffer_index); -int orangefs_readdir_index_get(struct orangefs_bufmap **mapp, int *buffer_index); +int orangefs_readdir_index_get(void); void orangefs_readdir_index_put(int buffer_index); From 7df240d771862c31f869d6b9024c1942c1f01521 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Feb 2016 20:12:04 -0500 Subject: [PATCH 170/174] orangefs-bufmap.h: trim unused junk Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-bufmap.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/fs/orangefs/orangefs-bufmap.h b/fs/orangefs/orangefs-bufmap.h index ec2849cb52e2..71f64f4057b5 100644 --- a/fs/orangefs/orangefs-bufmap.h +++ b/fs/orangefs/orangefs-bufmap.h @@ -7,8 +7,6 @@ #ifndef __ORANGEFS_BUFMAP_H #define __ORANGEFS_BUFMAP_H -struct orangefs_bufmap; - int orangefs_bufmap_size_query(void); int orangefs_bufmap_shift_query(void); @@ -35,11 +33,4 @@ int orangefs_bufmap_copy_to_iovec(struct iov_iter *iter, int buffer_index, size_t size); -size_t orangefs_bufmap_copy_to_user_task_iovec(struct task_struct *tsk, - struct iovec *iovec, - unsigned long nr_segs, - struct orangefs_bufmap *bufmap, - int buffer_index, - size_t bytes_to_be_copied); - #endif /* __ORANGEFS_BUFMAP_H */ From 177f8fc491e230c2e7a3ac7d5626dd6f3d94e9f2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Feb 2016 20:25:19 -0500 Subject: [PATCH 171/174] orangefs: sanitize ->llseek() a) open files can't have NULL inodes b) it's SEEK_END, not ORANGEFS_SEEK_END; no need to get cute. c) make_bad_inode() on lseek()? Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/file.c | 12 +++--------- fs/orangefs/orangefs-kernel.h | 1 - 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 63e6a10ab13d..cb6a164b2718 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -652,14 +652,9 @@ static int orangefs_fsync(struct file *file, static loff_t orangefs_file_llseek(struct file *file, loff_t offset, int origin) { int ret = -EINVAL; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); - if (!inode) { - gossip_err("orangefs_file_llseek: invalid inode (NULL)\n"); - return ret; - } - - if (origin == ORANGEFS_SEEK_END) { + if (origin == SEEK_END) { /* * revalidate the inode's file size. * NOTE: We are only interested in file size here, @@ -674,7 +669,6 @@ static loff_t orangefs_file_llseek(struct file *file, loff_t offset, int origin) __FILE__, __func__, __LINE__); - orangefs_make_bad_inode(inode); return ret; } } @@ -684,7 +678,7 @@ static loff_t orangefs_file_llseek(struct file *file, loff_t offset, int origin) " | inode size is %lu\n", (long)offset, origin, - (unsigned long)file->f_path.dentry->d_inode->i_size); + (unsigned long)i_size_read(inode)); return generic_file_llseek(file, offset, origin); } diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 5832168106de..db258d2ccc6a 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -70,7 +70,6 @@ #define ORANGEFS_DEVREQ_MAGIC 0x20030529 #define ORANGEFS_LINK_MAX 0x000000FF #define ORANGEFS_PURGE_RETRY_COUNT 0x00000005 -#define ORANGEFS_SEEK_END 0x00000002 #define ORANGEFS_MAX_NUM_OPTIONS 0x00000004 #define ORANGEFS_MAX_MOUNT_OPT_LEN 0x00000080 #define ORANGEFS_MAX_FSKEY_LEN 64 From 524b1d3095159adeee0305508eefc836a197b681 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Feb 2016 21:08:29 -0500 Subject: [PATCH 172/174] orangefs: have ->kill_sb() evict the VFS side of things first Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/super.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 1eeb0093b62c..5a89b8083966 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -505,6 +505,9 @@ void orangefs_kill_sb(struct super_block *sb) { gossip_debug(GOSSIP_SUPER_DEBUG, "orangefs_kill_sb: called\n"); + /* provided sb cleanup */ + kill_anon_super(sb); + /* * issue the unmount to userspace to tell it to remove the * dynamic mount info it has for this superblock @@ -514,9 +517,6 @@ void orangefs_kill_sb(struct super_block *sb) /* remove the sb from our list of orangefs specific sb's */ remove_orangefs_sb(sb); - /* provided sb cleanup */ - kill_anon_super(sb); - /* free the orangefs superblock private data */ kfree(ORANGEFS_SB(sb)); } From 6d4c1a30b32a377083900f39c42bcacb633f99a1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Feb 2016 20:15:43 -0500 Subject: [PATCH 173/174] orangefs: fix do_readv_writev() handling of error halfway through Error should only be returned if nothing had been read/written. Otherwise we need to report a short read/write instead. Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index cb6a164b2718..ae92795ed965 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -350,9 +350,9 @@ static ssize_t do_readv_writev(enum ORANGEFS_io_type type, struct file *file, break; } /*end while */ +out: if (total_count > 0) ret = total_count; -out: if (ret > 0) { if (type == ORANGEFS_IO_READ) { file_accessed(file); From 45996492e5c85aa0ac93a95d1b2d1ed56851c865 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Mar 2016 19:56:34 -0400 Subject: [PATCH 174/174] orangefs: fix orangefs_superblock locking * switch orangefs_remount() to taking ORANGEFS_SB(sb) instead of sb * remove from the list _before_ orangefs_unmount() - request_mutex in the latter will make sure that nothing observed in the loop in ORANGEFS_DEV_REMOUNT_ALL handling will get freed until the end of loop * on removal, keep the forward pointer and zero the back one. That way we can drop and regain the spinlock in the loop body (again, ORANGEFS_DEV_REMOUNT_ALL one) and still be able to get to the rest of the list. Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 43 +++++++++++++++++++---------------- fs/orangefs/orangefs-kernel.h | 34 +-------------------------- fs/orangefs/super.c | 30 ++++++++++++++++++------ 3 files changed, 48 insertions(+), 59 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index 35418d0b77bf..db170beba797 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -572,8 +572,7 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg) struct dev_mask_info_s mask_info = { 0 }; struct dev_mask2_info_s mask2_info = { 0, 0 }; int upstream_kmod = 1; - struct list_head *tmp = NULL; - struct orangefs_sb_info_s *orangefs_sb = NULL; + struct orangefs_sb_info_s *orangefs_sb; /* mtmoore: add locking here */ @@ -619,26 +618,32 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg) gossip_debug(GOSSIP_DEV_DEBUG, "%s: priority remount in progress\n", __func__); - list_for_each(tmp, &orangefs_superblocks) { - orangefs_sb = - list_entry(tmp, - struct orangefs_sb_info_s, - list); - if (orangefs_sb && (orangefs_sb->sb)) { - gossip_debug(GOSSIP_DEV_DEBUG, - "%s: Remounting SB %p\n", - __func__, - orangefs_sb); + spin_lock(&orangefs_superblocks_lock); + list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) { + /* + * We have to drop the spinlock, so entries can be + * removed. They can't be freed, though, so we just + * keep the forward pointers and zero the back ones - + * that way we can get to the rest of the list. + */ + if (!orangefs_sb->list.prev) + continue; + gossip_debug(GOSSIP_DEV_DEBUG, + "%s: Remounting SB %p\n", + __func__, + orangefs_sb); - ret = orangefs_remount(orangefs_sb->sb); - if (ret) { - gossip_debug(GOSSIP_DEV_DEBUG, - "SB %p remount failed\n", - orangefs_sb); - break; - } + spin_unlock(&orangefs_superblocks_lock); + ret = orangefs_remount(orangefs_sb); + spin_lock(&orangefs_superblocks_lock); + if (ret) { + gossip_debug(GOSSIP_DEV_DEBUG, + "SB %p remount failed\n", + orangefs_sb); + break; } } + spin_unlock(&orangefs_superblocks_lock); gossip_debug(GOSSIP_DEV_DEBUG, "%s: priority remount complete\n", __func__); diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index db258d2ccc6a..a9925e296ceb 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -462,7 +462,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst, void *data); void orangefs_kill_sb(struct super_block *sb); -int orangefs_remount(struct super_block *sb); +int orangefs_remount(struct orangefs_sb_info_s *); int fsid_key_table_initialize(void); void fsid_key_table_finalize(void); @@ -598,38 +598,6 @@ int service_operation(struct orangefs_kernel_op_s *op, ((ORANGEFS_SB(inode->i_sb)->flags & ORANGEFS_OPT_INTR) ? \ ORANGEFS_OP_INTERRUPTIBLE : 0) -#define add_orangefs_sb(sb) \ -do { \ - gossip_debug(GOSSIP_SUPER_DEBUG, \ - "Adding SB %p to orangefs superblocks\n", \ - ORANGEFS_SB(sb)); \ - spin_lock(&orangefs_superblocks_lock); \ - list_add_tail(&ORANGEFS_SB(sb)->list, &orangefs_superblocks); \ - spin_unlock(&orangefs_superblocks_lock); \ -} while (0) - -#define remove_orangefs_sb(sb) \ -do { \ - struct list_head *tmp = NULL; \ - struct list_head *tmp_safe = NULL; \ - struct orangefs_sb_info_s *orangefs_sb = NULL; \ - \ - spin_lock(&orangefs_superblocks_lock); \ - list_for_each_safe(tmp, tmp_safe, &orangefs_superblocks) { \ - orangefs_sb = list_entry(tmp, \ - struct orangefs_sb_info_s, \ - list); \ - if (orangefs_sb && (orangefs_sb->sb == sb)) { \ - gossip_debug(GOSSIP_SUPER_DEBUG, \ - "Removing SB %p from orangefs superblocks\n", \ - orangefs_sb); \ - list_del(&orangefs_sb->list); \ - break; \ - } \ - } \ - spin_unlock(&orangefs_superblocks_lock); \ -} while (0) - #define fill_default_sys_attrs(sys_attr, type, mode) \ do { \ sys_attr.owner = from_kuid(current_user_ns(), current_fsuid()); \ diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 5a89b8083966..b9da9a0281c9 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -210,7 +210,7 @@ static int orangefs_remount_fs(struct super_block *sb, int *flags, char *data) * the client regains all of the mount information from us. * NOTE: this function assumes that the request_mutex is already acquired! */ -int orangefs_remount(struct super_block *sb) +int orangefs_remount(struct orangefs_sb_info_s *orangefs_sb) { struct orangefs_kernel_op_s *new_op; int ret = -EINVAL; @@ -221,7 +221,7 @@ int orangefs_remount(struct super_block *sb) if (!new_op) return -ENOMEM; strncpy(new_op->upcall.req.fs_mount.orangefs_config_server, - ORANGEFS_SB(sb)->devname, + orangefs_sb->devname, ORANGEFS_MAX_SERVER_ADDR_LEN); gossip_debug(GOSSIP_SUPER_DEBUG, @@ -244,8 +244,8 @@ int orangefs_remount(struct super_block *sb) * short-lived mapping that the system interface uses * to map this superblock to a particular mount entry */ - ORANGEFS_SB(sb)->id = new_op->downcall.resp.fs_mount.id; - ORANGEFS_SB(sb)->mount_pending = 0; + orangefs_sb->id = new_op->downcall.resp.fs_mount.id; + orangefs_sb->mount_pending = 0; } op_release(new_op); @@ -485,7 +485,12 @@ struct dentry *orangefs_mount(struct file_system_type *fst, * finally, add this sb to our list of known orangefs * sb's */ - add_orangefs_sb(sb); + gossip_debug(GOSSIP_SUPER_DEBUG, + "Adding SB %p to orangefs superblocks\n", + ORANGEFS_SB(sb)); + spin_lock(&orangefs_superblocks_lock); + list_add_tail(&ORANGEFS_SB(sb)->list, &orangefs_superblocks); + spin_unlock(&orangefs_superblocks_lock); op_release(new_op); return dget(sb->s_root); @@ -512,10 +517,21 @@ void orangefs_kill_sb(struct super_block *sb) * issue the unmount to userspace to tell it to remove the * dynamic mount info it has for this superblock */ - orangefs_unmount_sb(sb); + orangefs_unmount_sb(sb); /* remove the sb from our list of orangefs specific sb's */ - remove_orangefs_sb(sb); + + spin_lock(&orangefs_superblocks_lock); + __list_del_entry(&ORANGEFS_SB(sb)->list); /* not list_del_init */ + ORANGEFS_SB(sb)->list.prev = NULL; + spin_unlock(&orangefs_superblocks_lock); + + /* + * make sure that ORANGEFS_DEV_REMOUNT_ALL loop that might've seen us + * gets completed before we free the dang thing. + */ + mutex_lock(&request_mutex); + mutex_unlock(&request_mutex); /* free the orangefs superblock private data */ kfree(ORANGEFS_SB(sb));