diff --git a/Documentation/filesystems/ext4/inodes.rst b/Documentation/filesystems/ext4/inodes.rst index 6bd35e506b6f..e851e6ca31fa 100644 --- a/Documentation/filesystems/ext4/inodes.rst +++ b/Documentation/filesystems/ext4/inodes.rst @@ -277,6 +277,8 @@ The ``i_flags`` field is a combination of these values: - This is a huge file (EXT4\_HUGE\_FILE\_FL). * - 0x80000 - Inode uses extents (EXT4\_EXTENTS\_FL). + * - 0x100000 + - Verity protected file (EXT4\_VERITY\_FL). * - 0x200000 - Inode stores a large extended attribute value in its data blocks (EXT4\_EA\_INODE\_FL). @@ -299,9 +301,9 @@ The ``i_flags`` field is a combination of these values: - Reserved for ext4 library (EXT4\_RESERVED\_FL). * - - Aggregate flags: - * - 0x4BDFFF + * - 0x705BDFFF - User-visible flags. - * - 0x4B80FF + * - 0x604BC0FF - User-modifiable flags. Note that while EXT4\_JOURNAL\_DATA\_FL and EXT4\_EXTENTS\_FL can be set with setattr, they are not in the kernel's EXT4\_FL\_USER\_MODIFIABLE mask, since it needs to handle the setting of diff --git a/Documentation/filesystems/ext4/overview.rst b/Documentation/filesystems/ext4/overview.rst index cbab18baba12..123ebfde47ee 100644 --- a/Documentation/filesystems/ext4/overview.rst +++ b/Documentation/filesystems/ext4/overview.rst @@ -24,3 +24,4 @@ order. .. include:: bigalloc.rst .. include:: inlinedata.rst .. include:: eainode.rst +.. include:: verity.rst diff --git a/Documentation/filesystems/ext4/super.rst b/Documentation/filesystems/ext4/super.rst index 04ff079a2acf..6eae92054827 100644 --- a/Documentation/filesystems/ext4/super.rst +++ b/Documentation/filesystems/ext4/super.rst @@ -696,6 +696,8 @@ the following: (RO\_COMPAT\_READONLY) * - 0x2000 - Filesystem tracks project quotas. (RO\_COMPAT\_PROJECT) + * - 0x8000 + - Verity inodes may be present on the filesystem. (RO\_COMPAT\_VERITY) .. _super_def_hash: diff --git a/Documentation/filesystems/ext4/verity.rst b/Documentation/filesystems/ext4/verity.rst new file mode 100644 index 000000000000..3e4c0ee0e068 --- /dev/null +++ b/Documentation/filesystems/ext4/verity.rst @@ -0,0 +1,41 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Verity files +------------ + +ext4 supports fs-verity, which is a filesystem feature that provides +Merkle tree based hashing for individual readonly files. Most of +fs-verity is common to all filesystems that support it; see +:ref:`Documentation/filesystems/fsverity.rst ` for the +fs-verity documentation. However, the on-disk layout of the verity +metadata is filesystem-specific. On ext4, the verity metadata is +stored after the end of the file data itself, in the following format: + +- Zero-padding to the next 65536-byte boundary. This padding need not + actually be allocated on-disk, i.e. it may be a hole. + +- The Merkle tree, as documented in + :ref:`Documentation/filesystems/fsverity.rst + `, with the tree levels stored in order from + root to leaf, and the tree blocks within each level stored in their + natural order. + +- Zero-padding to the next filesystem block boundary. + +- The verity descriptor, as documented in + :ref:`Documentation/filesystems/fsverity.rst `, + with optionally appended signature blob. + +- Zero-padding to the next offset that is 4 bytes before a filesystem + block boundary. + +- The size of the verity descriptor in bytes, as a 4-byte little + endian integer. + +Verity inodes have EXT4_VERITY_FL set, and they must use extents, i.e. +EXT4_EXTENTS_FL must be set and EXT4_INLINE_DATA_FL must be clear. +They can have EXT4_ENCRYPT_FL set, in which case the verity metadata +is encrypted as well as the data itself. + +Verity files cannot have blocks allocated past the end of the verity +metadata. diff --git a/Documentation/filesystems/fsverity.rst b/Documentation/filesystems/fsverity.rst new file mode 100644 index 000000000000..42a0b6dd9e0b --- /dev/null +++ b/Documentation/filesystems/fsverity.rst @@ -0,0 +1,726 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. _fsverity: + +======================================================= +fs-verity: read-only file-based authenticity protection +======================================================= + +Introduction +============ + +fs-verity (``fs/verity/``) is a support layer that filesystems can +hook into to support transparent integrity and authenticity protection +of read-only files. Currently, it is supported by the ext4 and f2fs +filesystems. Like fscrypt, not too much filesystem-specific code is +needed to support fs-verity. + +fs-verity is similar to `dm-verity +`_ +but works on files rather than block devices. On regular files on +filesystems supporting fs-verity, userspace can execute an ioctl that +causes the filesystem to build a Merkle tree for the file and persist +it to a filesystem-specific location associated with the file. + +After this, the file is made readonly, and all reads from the file are +automatically verified against the file's Merkle tree. Reads of any +corrupted data, including mmap reads, will fail. + +Userspace can use another ioctl to retrieve the root hash (actually +the "file measurement", which is a hash that includes the root hash) +that fs-verity is enforcing for the file. This ioctl executes in +constant time, regardless of the file size. + +fs-verity is essentially a way to hash a file in constant time, +subject to the caveat that reads which would violate the hash will +fail at runtime. + +Use cases +========= + +By itself, the base fs-verity feature only provides integrity +protection, i.e. detection of accidental (non-malicious) corruption. + +However, because fs-verity makes retrieving the file hash extremely +efficient, it's primarily meant to be used as a tool to support +authentication (detection of malicious modifications) or auditing +(logging file hashes before use). + +Trusted userspace code (e.g. operating system code running on a +read-only partition that is itself authenticated by dm-verity) can +authenticate the contents of an fs-verity file by using the +`FS_IOC_MEASURE_VERITY`_ ioctl to retrieve its hash, then verifying a +digital signature of it. + +A standard file hash could be used instead of fs-verity. However, +this is inefficient if the file is large and only a small portion may +be accessed. This is often the case for Android application package +(APK) files, for example. These typically contain many translations, +classes, and other resources that are infrequently or even never +accessed on a particular device. It would be slow and wasteful to +read and hash the entire file before starting the application. + +Unlike an ahead-of-time hash, fs-verity also re-verifies data each +time it's paged in. This ensures that malicious disk firmware can't +undetectably change the contents of the file at runtime. + +fs-verity does not replace or obsolete dm-verity. dm-verity should +still be used on read-only filesystems. fs-verity is for files that +must live on a read-write filesystem because they are independently +updated and potentially user-installed, so dm-verity cannot be used. + +The base fs-verity feature is a hashing mechanism only; actually +authenticating the files is up to userspace. However, to meet some +users' needs, fs-verity optionally supports a simple signature +verification mechanism where users can configure the kernel to require +that all fs-verity files be signed by a key loaded into a keyring; see +`Built-in signature verification`_. Support for fs-verity file hashes +in IMA (Integrity Measurement Architecture) policies is also planned. + +User API +======== + +FS_IOC_ENABLE_VERITY +-------------------- + +The FS_IOC_ENABLE_VERITY ioctl enables fs-verity on a file. It takes +in a pointer to a :c:type:`struct fsverity_enable_arg`, defined as +follows:: + + struct fsverity_enable_arg { + __u32 version; + __u32 hash_algorithm; + __u32 block_size; + __u32 salt_size; + __u64 salt_ptr; + __u32 sig_size; + __u32 __reserved1; + __u64 sig_ptr; + __u64 __reserved2[11]; + }; + +This structure contains the parameters of the Merkle tree to build for +the file, and optionally contains a signature. It must be initialized +as follows: + +- ``version`` must be 1. +- ``hash_algorithm`` must be the identifier for the hash algorithm to + use for the Merkle tree, such as FS_VERITY_HASH_ALG_SHA256. See + ``include/uapi/linux/fsverity.h`` for the list of possible values. +- ``block_size`` must be the Merkle tree block size. Currently, this + must be equal to the system page size, which is usually 4096 bytes. + Other sizes may be supported in the future. This value is not + necessarily the same as the filesystem block size. +- ``salt_size`` is the size of the salt in bytes, or 0 if no salt is + provided. The salt is a value that is prepended to every hashed + block; it can be used to personalize the hashing for a particular + file or device. Currently the maximum salt size is 32 bytes. +- ``salt_ptr`` is the pointer to the salt, or NULL if no salt is + provided. +- ``sig_size`` is the size of the signature in bytes, or 0 if no + signature is provided. Currently the signature is (somewhat + arbitrarily) limited to 16128 bytes. See `Built-in signature + verification`_ for more information. +- ``sig_ptr`` is the pointer to the signature, or NULL if no + signature is provided. +- All reserved fields must be zeroed. + +FS_IOC_ENABLE_VERITY causes the filesystem to build a Merkle tree for +the file and persist it to a filesystem-specific location associated +with the file, then mark the file as a verity file. This ioctl may +take a long time to execute on large files, and it is interruptible by +fatal signals. + +FS_IOC_ENABLE_VERITY checks for write access to the inode. However, +it must be executed on an O_RDONLY file descriptor and no processes +can have the file open for writing. Attempts to open the file for +writing while this ioctl is executing will fail with ETXTBSY. (This +is necessary to guarantee that no writable file descriptors will exist +after verity is enabled, and to guarantee that the file's contents are +stable while the Merkle tree is being built over it.) + +On success, FS_IOC_ENABLE_VERITY returns 0, and the file becomes a +verity file. On failure (including the case of interruption by a +fatal signal), no changes are made to the file. + +FS_IOC_ENABLE_VERITY can fail with the following errors: + +- ``EACCES``: the process does not have write access to the file +- ``EBADMSG``: the signature is malformed +- ``EBUSY``: this ioctl is already running on the file +- ``EEXIST``: the file already has verity enabled +- ``EFAULT``: the caller provided inaccessible memory +- ``EINTR``: the operation was interrupted by a fatal signal +- ``EINVAL``: unsupported version, hash algorithm, or block size; or + reserved bits are set; or the file descriptor refers to neither a + regular file nor a directory. +- ``EISDIR``: the file descriptor refers to a directory +- ``EKEYREJECTED``: the signature doesn't match the file +- ``EMSGSIZE``: the salt or signature is too long +- ``ENOKEY``: the fs-verity keyring doesn't contain the certificate + needed to verify the signature +- ``ENOPKG``: fs-verity recognizes the hash algorithm, but it's not + available in the kernel's crypto API as currently configured (e.g. + for SHA-512, missing CONFIG_CRYPTO_SHA512). +- ``ENOTTY``: this type of filesystem does not implement fs-verity +- ``EOPNOTSUPP``: the kernel was not configured with fs-verity + support; or the filesystem superblock has not had the 'verity' + feature enabled on it; or the filesystem does not support fs-verity + on this file. (See `Filesystem support`_.) +- ``EPERM``: the file is append-only; or, a signature is required and + one was not provided. +- ``EROFS``: the filesystem is read-only +- ``ETXTBSY``: someone has the file open for writing. This can be the + caller's file descriptor, another open file descriptor, or the file + reference held by a writable memory map. + +FS_IOC_MEASURE_VERITY +--------------------- + +The FS_IOC_MEASURE_VERITY ioctl retrieves the measurement of a verity +file. The file measurement is a digest that cryptographically +identifies the file contents that are being enforced on reads. + +This ioctl takes in a pointer to a variable-length structure:: + + struct fsverity_digest { + __u16 digest_algorithm; + __u16 digest_size; /* input/output */ + __u8 digest[]; + }; + +``digest_size`` is an input/output field. On input, it must be +initialized to the number of bytes allocated for the variable-length +``digest`` field. + +On success, 0 is returned and the kernel fills in the structure as +follows: + +- ``digest_algorithm`` will be the hash algorithm used for the file + measurement. It will match ``fsverity_enable_arg::hash_algorithm``. +- ``digest_size`` will be the size of the digest in bytes, e.g. 32 + for SHA-256. (This can be redundant with ``digest_algorithm``.) +- ``digest`` will be the actual bytes of the digest. + +FS_IOC_MEASURE_VERITY is guaranteed to execute in constant time, +regardless of the size of the file. + +FS_IOC_MEASURE_VERITY can fail with the following errors: + +- ``EFAULT``: the caller provided inaccessible memory +- ``ENODATA``: the file is not a verity file +- ``ENOTTY``: this type of filesystem does not implement fs-verity +- ``EOPNOTSUPP``: the kernel was not configured with fs-verity + support, or the filesystem superblock has not had the 'verity' + feature enabled on it. (See `Filesystem support`_.) +- ``EOVERFLOW``: the digest is longer than the specified + ``digest_size`` bytes. Try providing a larger buffer. + +FS_IOC_GETFLAGS +--------------- + +The existing ioctl FS_IOC_GETFLAGS (which isn't specific to fs-verity) +can also be used to check whether a file has fs-verity enabled or not. +To do so, check for FS_VERITY_FL (0x00100000) in the returned flags. + +The verity flag is not settable via FS_IOC_SETFLAGS. You must use +FS_IOC_ENABLE_VERITY instead, since parameters must be provided. + +Accessing verity files +====================== + +Applications can transparently access a verity file just like a +non-verity one, with the following exceptions: + +- Verity files are readonly. They cannot be opened for writing or + truncate()d, even if the file mode bits allow it. Attempts to do + one of these things will fail with EPERM. However, changes to + metadata such as owner, mode, timestamps, and xattrs are still + allowed, since these are not measured by fs-verity. Verity files + can also still be renamed, deleted, and linked to. + +- Direct I/O is not supported on verity files. Attempts to use direct + I/O on such files will fall back to buffered I/O. + +- DAX (Direct Access) is not supported on verity files, because this + would circumvent the data verification. + +- Reads of data that doesn't match the verity Merkle tree will fail + with EIO (for read()) or SIGBUS (for mmap() reads). + +- If the sysctl "fs.verity.require_signatures" is set to 1 and the + file's verity measurement is not signed by a key in the fs-verity + keyring, then opening the file will fail. See `Built-in signature + verification`_. + +Direct access to the Merkle tree is not supported. Therefore, if a +verity file is copied, or is backed up and restored, then it will lose +its "verity"-ness. fs-verity is primarily meant for files like +executables that are managed by a package manager. + +File measurement computation +============================ + +This section describes how fs-verity hashes the file contents using a +Merkle tree to produce the "file measurement" which cryptographically +identifies the file contents. This algorithm is the same for all +filesystems that support fs-verity. + +Userspace only needs to be aware of this algorithm if it needs to +compute the file measurement itself, e.g. in order to sign the file. + +.. _fsverity_merkle_tree: + +Merkle tree +----------- + +The file contents is divided into blocks, where the block size is +configurable but is usually 4096 bytes. The end of the last block is +zero-padded if needed. Each block is then hashed, producing the first +level of hashes. Then, the hashes in this first level are grouped +into 'blocksize'-byte blocks (zero-padding the ends as needed) and +these blocks are hashed, producing the second level of hashes. This +proceeds up the tree until only a single block remains. The hash of +this block is the "Merkle tree root hash". + +If the file fits in one block and is nonempty, then the "Merkle tree +root hash" is simply the hash of the single data block. If the file +is empty, then the "Merkle tree root hash" is all zeroes. + +The "blocks" here are not necessarily the same as "filesystem blocks". + +If a salt was specified, then it's zero-padded to the closest multiple +of the input size of the hash algorithm's compression function, e.g. +64 bytes for SHA-256 or 128 bytes for SHA-512. The padded salt is +prepended to every data or Merkle tree block that is hashed. + +The purpose of the block padding is to cause every hash to be taken +over the same amount of data, which simplifies the implementation and +keeps open more possibilities for hardware acceleration. The purpose +of the salt padding is to make the salting "free" when the salted hash +state is precomputed, then imported for each hash. + +Example: in the recommended configuration of SHA-256 and 4K blocks, +128 hash values fit in each block. Thus, each level of the Merkle +tree is approximately 128 times smaller than the previous, and for +large files the Merkle tree's size converges to approximately 1/127 of +the original file size. However, for small files, the padding is +significant, making the space overhead proportionally more. + +.. _fsverity_descriptor: + +fs-verity descriptor +-------------------- + +By itself, the Merkle tree root hash is ambiguous. For example, it +can't a distinguish a large file from a small second file whose data +is exactly the top-level hash block of the first file. Ambiguities +also arise from the convention of padding to the next block boundary. + +To solve this problem, the verity file measurement is actually +computed as a hash of the following structure, which contains the +Merkle tree root hash as well as other fields such as the file size:: + + struct fsverity_descriptor { + __u8 version; /* must be 1 */ + __u8 hash_algorithm; /* Merkle tree hash algorithm */ + __u8 log_blocksize; /* log2 of size of data and tree blocks */ + __u8 salt_size; /* size of salt in bytes; 0 if none */ + __le32 sig_size; /* must be 0 */ + __le64 data_size; /* size of file the Merkle tree is built over */ + __u8 root_hash[64]; /* Merkle tree root hash */ + __u8 salt[32]; /* salt prepended to each hashed block */ + __u8 __reserved[144]; /* must be 0's */ + }; + +Note that the ``sig_size`` field must be set to 0 for the purpose of +computing the file measurement, even if a signature was provided (or +will be provided) to `FS_IOC_ENABLE_VERITY`_. + +Built-in signature verification +=============================== + +With CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y, fs-verity supports putting +a portion of an authentication policy (see `Use cases`_) in the +kernel. Specifically, it adds support for: + +1. At fs-verity module initialization time, a keyring ".fs-verity" is + created. The root user can add trusted X.509 certificates to this + keyring using the add_key() system call, then (when done) + optionally use keyctl_restrict_keyring() to prevent additional + certificates from being added. + +2. `FS_IOC_ENABLE_VERITY`_ accepts a pointer to a PKCS#7 formatted + detached signature in DER format of the file measurement. On + success, this signature is persisted alongside the Merkle tree. + Then, any time the file is opened, the kernel will verify the + file's actual measurement against this signature, using the + certificates in the ".fs-verity" keyring. + +3. A new sysctl "fs.verity.require_signatures" is made available. + When set to 1, the kernel requires that all verity files have a + correctly signed file measurement as described in (2). + +File measurements must be signed in the following format, which is +similar to the structure used by `FS_IOC_MEASURE_VERITY`_:: + + struct fsverity_signed_digest { + char magic[8]; /* must be "FSVerity" */ + __le16 digest_algorithm; + __le16 digest_size; + __u8 digest[]; + }; + +fs-verity's built-in signature verification support is meant as a +relatively simple mechanism that can be used to provide some level of +authenticity protection for verity files, as an alternative to doing +the signature verification in userspace or using IMA-appraisal. +However, with this mechanism, userspace programs still need to check +that the verity bit is set, and there is no protection against verity +files being swapped around. + +Filesystem support +================== + +fs-verity is currently supported by the ext4 and f2fs filesystems. +The CONFIG_FS_VERITY kconfig option must be enabled to use fs-verity +on either filesystem. + +``include/linux/fsverity.h`` declares the interface between the +``fs/verity/`` support layer and filesystems. Briefly, filesystems +must provide an ``fsverity_operations`` structure that provides +methods to read and write the verity metadata to a filesystem-specific +location, including the Merkle tree blocks and +``fsverity_descriptor``. Filesystems must also call functions in +``fs/verity/`` at certain times, such as when a file is opened or when +pages have been read into the pagecache. (See `Verifying data`_.) + +ext4 +---- + +ext4 supports fs-verity since Linux TODO and e2fsprogs v1.45.2. + +To create verity files on an ext4 filesystem, the filesystem must have +been formatted with ``-O verity`` or had ``tune2fs -O verity`` run on +it. "verity" is an RO_COMPAT filesystem feature, so once set, old +kernels will only be able to mount the filesystem readonly, and old +versions of e2fsck will be unable to check the filesystem. Moreover, +currently ext4 only supports mounting a filesystem with the "verity" +feature when its block size is equal to PAGE_SIZE (often 4096 bytes). + +ext4 sets the EXT4_VERITY_FL on-disk inode flag on verity files. It +can only be set by `FS_IOC_ENABLE_VERITY`_, and it cannot be cleared. + +ext4 also supports encryption, which can be used simultaneously with +fs-verity. In this case, the plaintext data is verified rather than +the ciphertext. This is necessary in order to make the file +measurement meaningful, since every file is encrypted differently. + +ext4 stores the verity metadata (Merkle tree and fsverity_descriptor) +past the end of the file, starting at the first 64K boundary beyond +i_size. This approach works because (a) verity files are readonly, +and (b) pages fully beyond i_size aren't visible to userspace but can +be read/written internally by ext4 with only some relatively small +changes to ext4. This approach avoids having to depend on the +EA_INODE feature and on rearchitecturing ext4's xattr support to +support paging multi-gigabyte xattrs into memory, and to support +encrypting xattrs. Note that the verity metadata *must* be encrypted +when the file is, since it contains hashes of the plaintext data. + +Currently, ext4 verity only supports the case where the Merkle tree +block size, filesystem block size, and page size are all the same. It +also only supports extent-based files. + +f2fs +---- + +f2fs supports fs-verity since Linux TODO and f2fs-tools v1.11.0. + +To create verity files on an f2fs filesystem, the filesystem must have +been formatted with ``-O verity``. + +f2fs sets the FADVISE_VERITY_BIT on-disk inode flag on verity files. +It can only be set by `FS_IOC_ENABLE_VERITY`_, and it cannot be +cleared. + +Like ext4, f2fs stores the verity metadata (Merkle tree and +fsverity_descriptor) past the end of the file, starting at the first +64K boundary beyond i_size. See explanation for ext4 above. +Moreover, f2fs supports at most 4096 bytes of xattr entries per inode +which wouldn't be enough for even a single Merkle tree block. + +Currently, f2fs verity only supports a Merkle tree block size of 4096. +Also, f2fs doesn't support enabling verity on files that currently +have atomic or volatile writes pending. + +Implementation details +====================== + +Verifying data +-------------- + +fs-verity ensures that all reads of a verity file's data are verified, +regardless of which syscall is used to do the read (e.g. mmap(), +read(), pread()) and regardless of whether it's the first read or a +later read (unless the later read can return cached data that was +already verified). Below, we describe how filesystems implement this. + +Pagecache +~~~~~~~~~ + +For filesystems using Linux's pagecache, the ``->readpage()`` and +``->readpages()`` methods must be modified to verify pages before they +are marked Uptodate. Merely hooking ``->read_iter()`` would be +insufficient, since ``->read_iter()`` is not used for memory maps. + +Therefore, fs/verity/ provides a function fsverity_verify_page() which +verifies a page that has been read into the pagecache of a verity +inode, but is still locked and not Uptodate, so it's not yet readable +by userspace. As needed to do the verification, +fsverity_verify_page() will call back into the filesystem to read +Merkle tree pages via fsverity_operations::read_merkle_tree_page(). + +fsverity_verify_page() returns false if verification failed; in this +case, the filesystem must not set the page Uptodate. Following this, +as per the usual Linux pagecache behavior, attempts by userspace to +read() from the part of the file containing the page will fail with +EIO, and accesses to the page within a memory map will raise SIGBUS. + +fsverity_verify_page() currently only supports the case where the +Merkle tree block size is equal to PAGE_SIZE (often 4096 bytes). + +In principle, fsverity_verify_page() verifies the entire path in the +Merkle tree from the data page to the root hash. However, for +efficiency the filesystem may cache the hash pages. Therefore, +fsverity_verify_page() only ascends the tree reading hash pages until +an already-verified hash page is seen, as indicated by the PageChecked +bit being set. It then verifies the path to that page. + +This optimization, which is also used by dm-verity, results in +excellent sequential read performance. This is because usually (e.g. +127 in 128 times for 4K blocks and SHA-256) the hash page from the +bottom level of the tree will already be cached and checked from +reading a previous data page. However, random reads perform worse. + +Block device based filesystems +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Block device based filesystems (e.g. ext4 and f2fs) in Linux also use +the pagecache, so the above subsection applies too. However, they +also usually read many pages from a file at once, grouped into a +structure called a "bio". To make it easier for these types of +filesystems to support fs-verity, fs/verity/ also provides a function +fsverity_verify_bio() which verifies all pages in a bio. + +ext4 and f2fs also support encryption. If a verity file is also +encrypted, the pages must be decrypted before being verified. To +support this, these filesystems allocate a "post-read context" for +each bio and store it in ``->bi_private``:: + + struct bio_post_read_ctx { + struct bio *bio; + struct work_struct work; + unsigned int cur_step; + unsigned int enabled_steps; + }; + +``enabled_steps`` is a bitmask that specifies whether decryption, +verity, or both is enabled. After the bio completes, for each needed +postprocessing step the filesystem enqueues the bio_post_read_ctx on a +workqueue, and then the workqueue work does the decryption or +verification. Finally, pages where no decryption or verity error +occurred are marked Uptodate, and the pages are unlocked. + +Files on ext4 and f2fs may contain holes. Normally, ``->readpages()`` +simply zeroes holes and sets the corresponding pages Uptodate; no bios +are issued. To prevent this case from bypassing fs-verity, these +filesystems use fsverity_verify_page() to verify hole pages. + +ext4 and f2fs disable direct I/O on verity files, since otherwise +direct I/O would bypass fs-verity. (They also do the same for +encrypted files.) + +Userspace utility +================= + +This document focuses on the kernel, but a userspace utility for +fs-verity can be found at: + + https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/fsverity-utils.git + +See the README.md file in the fsverity-utils source tree for details, +including examples of setting up fs-verity protected files. + +Tests +===== + +To test fs-verity, use xfstests. For example, using `kvm-xfstests +`_:: + + kvm-xfstests -c ext4,f2fs -g verity + +FAQ +=== + +This section answers frequently asked questions about fs-verity that +weren't already directly answered in other parts of this document. + +:Q: Why isn't fs-verity part of IMA? +:A: fs-verity and IMA (Integrity Measurement Architecture) have + different focuses. fs-verity is a filesystem-level mechanism for + hashing individual files using a Merkle tree. In contrast, IMA + specifies a system-wide policy that specifies which files are + hashed and what to do with those hashes, such as log them, + authenticate them, or add them to a measurement list. + + IMA is planned to support the fs-verity hashing mechanism as an + alternative to doing full file hashes, for people who want the + performance and security benefits of the Merkle tree based hash. + But it doesn't make sense to force all uses of fs-verity to be + through IMA. As a standalone filesystem feature, fs-verity + already meets many users' needs, and it's testable like other + filesystem features e.g. with xfstests. + +:Q: Isn't fs-verity useless because the attacker can just modify the + hashes in the Merkle tree, which is stored on-disk? +:A: To verify the authenticity of an fs-verity file you must verify + the authenticity of the "file measurement", which is basically the + root hash of the Merkle tree. See `Use cases`_. + +:Q: Isn't fs-verity useless because the attacker can just replace a + verity file with a non-verity one? +:A: See `Use cases`_. In the initial use case, it's really trusted + userspace code that authenticates the files; fs-verity is just a + tool to do this job efficiently and securely. The trusted + userspace code will consider non-verity files to be inauthentic. + +:Q: Why does the Merkle tree need to be stored on-disk? Couldn't you + store just the root hash? +:A: If the Merkle tree wasn't stored on-disk, then you'd have to + compute the entire tree when the file is first accessed, even if + just one byte is being read. This is a fundamental consequence of + how Merkle tree hashing works. To verify a leaf node, you need to + verify the whole path to the root hash, including the root node + (the thing which the root hash is a hash of). But if the root + node isn't stored on-disk, you have to compute it by hashing its + children, and so on until you've actually hashed the entire file. + + That defeats most of the point of doing a Merkle tree-based hash, + since if you have to hash the whole file ahead of time anyway, + then you could simply do sha256(file) instead. That would be much + simpler, and a bit faster too. + + It's true that an in-memory Merkle tree could still provide the + advantage of verification on every read rather than just on the + first read. However, it would be inefficient because every time a + hash page gets evicted (you can't pin the entire Merkle tree into + memory, since it may be very large), in order to restore it you + again need to hash everything below it in the tree. This again + defeats most of the point of doing a Merkle tree-based hash, since + a single block read could trigger re-hashing gigabytes of data. + +:Q: But couldn't you store just the leaf nodes and compute the rest? +:A: See previous answer; this really just moves up one level, since + one could alternatively interpret the data blocks as being the + leaf nodes of the Merkle tree. It's true that the tree can be + computed much faster if the leaf level is stored rather than just + the data, but that's only because each level is less than 1% the + size of the level below (assuming the recommended settings of + SHA-256 and 4K blocks). For the exact same reason, by storing + "just the leaf nodes" you'd already be storing over 99% of the + tree, so you might as well simply store the whole tree. + +:Q: Can the Merkle tree be built ahead of time, e.g. distributed as + part of a package that is installed to many computers? +:A: This isn't currently supported. It was part of the original + design, but was removed to simplify the kernel UAPI and because it + wasn't a critical use case. Files are usually installed once and + used many times, and cryptographic hashing is somewhat fast on + most modern processors. + +:Q: Why doesn't fs-verity support writes? +:A: Write support would be very difficult and would require a + completely different design, so it's well outside the scope of + fs-verity. Write support would require: + + - A way to maintain consistency between the data and hashes, + including all levels of hashes, since corruption after a crash + (especially of potentially the entire file!) is unacceptable. + The main options for solving this are data journalling, + copy-on-write, and log-structured volume. But it's very hard to + retrofit existing filesystems with new consistency mechanisms. + Data journalling is available on ext4, but is very slow. + + - Rebuilding the the Merkle tree after every write, which would be + extremely inefficient. Alternatively, a different authenticated + dictionary structure such as an "authenticated skiplist" could + be used. However, this would be far more complex. + + Compare it to dm-verity vs. dm-integrity. dm-verity is very + simple: the kernel just verifies read-only data against a + read-only Merkle tree. In contrast, dm-integrity supports writes + but is slow, is much more complex, and doesn't actually support + full-device authentication since it authenticates each sector + independently, i.e. there is no "root hash". It doesn't really + make sense for the same device-mapper target to support these two + very different cases; the same applies to fs-verity. + +:Q: Since verity files are immutable, why isn't the immutable bit set? +:A: The existing "immutable" bit (FS_IMMUTABLE_FL) already has a + specific set of semantics which not only make the file contents + read-only, but also prevent the file from being deleted, renamed, + linked to, or having its owner or mode changed. These extra + properties are unwanted for fs-verity, so reusing the immutable + bit isn't appropriate. + +:Q: Why does the API use ioctls instead of setxattr() and getxattr()? +:A: Abusing the xattr interface for basically arbitrary syscalls is + heavily frowned upon by most of the Linux filesystem developers. + An xattr should really just be an xattr on-disk, not an API to + e.g. magically trigger construction of a Merkle tree. + +:Q: Does fs-verity support remote filesystems? +:A: Only ext4 and f2fs support is implemented currently, but in + principle any filesystem that can store per-file verity metadata + can support fs-verity, regardless of whether it's local or remote. + Some filesystems may have fewer options of where to store the + verity metadata; one possibility is to store it past the end of + the file and "hide" it from userspace by manipulating i_size. The + data verification functions provided by ``fs/verity/`` also assume + that the filesystem uses the Linux pagecache, but both local and + remote filesystems normally do so. + +:Q: Why is anything filesystem-specific at all? Shouldn't fs-verity + be implemented entirely at the VFS level? +:A: There are many reasons why this is not possible or would be very + difficult, including the following: + + - To prevent bypassing verification, pages must not be marked + Uptodate until they've been verified. Currently, each + filesystem is responsible for marking pages Uptodate via + ``->readpages()``. Therefore, currently it's not possible for + the VFS to do the verification on its own. Changing this would + require significant changes to the VFS and all filesystems. + + - It would require defining a filesystem-independent way to store + the verity metadata. Extended attributes don't work for this + because (a) the Merkle tree may be gigabytes, but many + filesystems assume that all xattrs fit into a single 4K + filesystem block, and (b) ext4 and f2fs encryption doesn't + encrypt xattrs, yet the Merkle tree *must* be encrypted when the + file contents are, because it stores hashes of the plaintext + file contents. + + So the verity metadata would have to be stored in an actual + file. Using a separate file would be very ugly, since the + metadata is fundamentally part of the file to be protected, and + it could cause problems where users could delete the real file + but not the metadata file or vice versa. On the other hand, + having it be in the same file would break applications unless + filesystems' notion of i_size were divorced from the VFS's, + which would be complex and require changes to all filesystems. + + - It's desirable that FS_IOC_ENABLE_VERITY uses the filesystem's + transaction mechanism so that either the file ends up with + verity enabled, or no changes were made. Allowing intermediate + states to occur after a crash may cause problems. diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 96653ebefd7e..fd2bcf99cda0 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -36,3 +36,4 @@ filesystem implementations. journalling fscrypt + fsverity diff --git a/Documentation/ioctl/ioctl-number.rst b/Documentation/ioctl/ioctl-number.rst index 7f8dcae7a230..bef79cd4c6b4 100644 --- a/Documentation/ioctl/ioctl-number.rst +++ b/Documentation/ioctl/ioctl-number.rst @@ -233,6 +233,7 @@ Code Seq# Include File Comments 'f' 00-0F fs/ext4/ext4.h conflict! 'f' 00-0F linux/fs.h conflict! 'f' 00-0F fs/ocfs2/ocfs2_fs.h conflict! +'f' 81-8F linux/fsverity.h 'g' 00-0F linux/usb/gadgetfs.h 'g' 20-2F linux/usb/g_printer.h 'h' 00-7F conflict! Charon filesystem diff --git a/MAINTAINERS b/MAINTAINERS index ce6113999cf8..b2326dece28e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6694,6 +6694,18 @@ S: Maintained F: fs/notify/ F: include/linux/fsnotify*.h +FSVERITY: READ-ONLY FILE-BASED AUTHENTICITY PROTECTION +M: Eric Biggers +M: Theodore Y. Ts'o +L: linux-fscrypt@vger.kernel.org +Q: https://patchwork.kernel.org/project/linux-fscrypt/list/ +T: git git://git.kernel.org/pub/scm/fs/fscrypt/fscrypt.git fsverity +S: Supported +F: fs/verity/ +F: include/linux/fsverity.h +F: include/uapi/linux/fsverity.h +F: Documentation/filesystems/fsverity.rst + FUJITSU LAPTOP EXTRAS M: Jonathan Woithe L: platform-driver-x86@vger.kernel.org diff --git a/fs/Kconfig b/fs/Kconfig index 669d46550e6d..2501e6f1f965 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -112,6 +112,8 @@ config MANDATORY_FILE_LOCKING source "fs/crypto/Kconfig" +source "fs/verity/Kconfig" + source "fs/notify/Kconfig" source "fs/quota/Kconfig" diff --git a/fs/Makefile b/fs/Makefile index b2e4973a0bea..14231b4cf383 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_AIO) += aio.o obj-$(CONFIG_IO_URING) += io_uring.o obj-$(CONFIG_FS_DAX) += dax.o obj-$(CONFIG_FS_ENCRYPTION) += crypto/ +obj-$(CONFIG_FS_VERITY) += verity/ obj-$(CONFIG_FILE_LOCKING) += locks.o obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index 8fdfcd3c3e04..b17ddc229ac5 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile @@ -13,3 +13,4 @@ ext4-y := balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \ ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o +ext4-$(CONFIG_FS_VERITY) += verity.o diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index bf660aa7a9e0..9c7f4036021b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -41,6 +41,7 @@ #endif #include +#include #include @@ -395,6 +396,7 @@ struct flex_groups { #define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ #define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ +#define EXT4_VERITY_FL 0x00100000 /* Verity protected inode */ #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */ #define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ #define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */ @@ -402,7 +404,7 @@ struct flex_groups { #define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded file */ #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ -#define EXT4_FL_USER_VISIBLE 0x704BDFFF /* User visible flags */ +#define EXT4_FL_USER_VISIBLE 0x705BDFFF /* User visible flags */ #define EXT4_FL_USER_MODIFIABLE 0x604BC0FF /* User modifiable flags */ /* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */ @@ -467,6 +469,7 @@ enum { EXT4_INODE_TOPDIR = 17, /* Top of directory hierarchies*/ EXT4_INODE_HUGE_FILE = 18, /* Set to each huge file */ EXT4_INODE_EXTENTS = 19, /* Inode uses extents */ + EXT4_INODE_VERITY = 20, /* Verity protected inode */ EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */ EXT4_INODE_EOFBLOCKS = 22, /* Blocks allocated beyond EOF */ EXT4_INODE_INLINE_DATA = 28, /* Data in inode. */ @@ -512,6 +515,7 @@ static inline void ext4_check_flag_values(void) CHECK_FLAG_VALUE(TOPDIR); CHECK_FLAG_VALUE(HUGE_FILE); CHECK_FLAG_VALUE(EXTENTS); + CHECK_FLAG_VALUE(VERITY); CHECK_FLAG_VALUE(EA_INODE); CHECK_FLAG_VALUE(EOFBLOCKS); CHECK_FLAG_VALUE(INLINE_DATA); @@ -1560,6 +1564,7 @@ enum { EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */ EXT4_STATE_EXT_PRECACHED, /* extents have been precached */ EXT4_STATE_LUSTRE_EA_INODE, /* Lustre-style ea_inode */ + EXT4_STATE_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */ }; #define EXT4_INODE_BIT_FNS(name, field, offset) \ @@ -1610,6 +1615,12 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) #define EXT4_SB(sb) (sb) #endif +static inline bool ext4_verity_in_progress(struct inode *inode) +{ + return IS_ENABLED(CONFIG_FS_VERITY) && + ext4_test_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS); +} + #define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime /* @@ -1662,6 +1673,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) #define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400 #define EXT4_FEATURE_RO_COMPAT_READONLY 0x1000 #define EXT4_FEATURE_RO_COMPAT_PROJECT 0x2000 +#define EXT4_FEATURE_RO_COMPAT_VERITY 0x8000 #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 @@ -1756,6 +1768,7 @@ EXT4_FEATURE_RO_COMPAT_FUNCS(bigalloc, BIGALLOC) EXT4_FEATURE_RO_COMPAT_FUNCS(metadata_csum, METADATA_CSUM) EXT4_FEATURE_RO_COMPAT_FUNCS(readonly, READONLY) EXT4_FEATURE_RO_COMPAT_FUNCS(project, PROJECT) +EXT4_FEATURE_RO_COMPAT_FUNCS(verity, VERITY) EXT4_FEATURE_INCOMPAT_FUNCS(compression, COMPRESSION) EXT4_FEATURE_INCOMPAT_FUNCS(filetype, FILETYPE) @@ -1813,7 +1826,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(casefold, CASEFOLD) EXT4_FEATURE_RO_COMPAT_BIGALLOC |\ EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\ EXT4_FEATURE_RO_COMPAT_QUOTA |\ - EXT4_FEATURE_RO_COMPAT_PROJECT) + EXT4_FEATURE_RO_COMPAT_PROJECT |\ + EXT4_FEATURE_RO_COMPAT_VERITY) #define EXTN_FEATURE_FUNCS(ver) \ static inline bool ext4_has_unknown_ext##ver##_compat_features(struct super_block *sb) \ @@ -3177,6 +3191,8 @@ static inline void ext4_set_de_type(struct super_block *sb, extern int ext4_mpage_readpages(struct address_space *mapping, struct list_head *pages, struct page *page, unsigned nr_pages, bool is_readahead); +extern int __init ext4_init_post_read_processing(void); +extern void ext4_exit_post_read_processing(void); /* symlink.c */ extern const struct inode_operations ext4_encrypted_symlink_inode_operations; @@ -3283,6 +3299,9 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io, /* mmp.c */ extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); +/* verity.c */ +extern const struct fsverity_operations ext4_verityops; + /* * Add new method to test whether block and inode bitmaps are properly * initialized. With uninit_bg reading the block from disk is not enough diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 70b0438dbc94..b8a20bb9a145 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -457,6 +457,10 @@ static int ext4_file_open(struct inode * inode, struct file * filp) if (ret) return ret; + ret = fsverity_file_open(inode, filp); + if (ret) + return ret; + /* * Set up the jbd2_inode if we are opening the inode for * writing and the journal is present diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 006b7a2070bf..d0dc0e3463db 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1340,6 +1340,9 @@ retry_journal: } if (ret) { + bool extended = (pos + len > inode->i_size) && + !ext4_verity_in_progress(inode); + unlock_page(page); /* * __block_write_begin may have instantiated a few blocks @@ -1349,11 +1352,11 @@ retry_journal: * Add inode to orphan list in case we crash before * truncate finishes */ - if (pos + len > inode->i_size && ext4_can_truncate(inode)) + if (extended && ext4_can_truncate(inode)) ext4_orphan_add(handle, inode); ext4_journal_stop(handle); - if (pos + len > inode->i_size) { + if (extended) { ext4_truncate_failed_write(inode); /* * If truncate failed early the inode might @@ -1406,6 +1409,7 @@ static int ext4_write_end(struct file *file, int ret = 0, ret2; int i_size_changed = 0; int inline_data = ext4_has_inline_data(inode); + bool verity = ext4_verity_in_progress(inode); trace_ext4_write_end(inode, pos, len, copied); if (inline_data) { @@ -1423,12 +1427,16 @@ static int ext4_write_end(struct file *file, /* * it's important to update i_size while still holding page lock: * page writeout could otherwise come in and zero beyond i_size. + * + * If FS_IOC_ENABLE_VERITY is running on this inode, then Merkle tree + * blocks are being written past EOF, so skip the i_size update. */ - i_size_changed = ext4_update_inode_size(inode, pos + copied); + if (!verity) + i_size_changed = ext4_update_inode_size(inode, pos + copied); unlock_page(page); put_page(page); - if (old_size < pos) + if (old_size < pos && !verity) pagecache_isize_extended(inode, old_size, pos); /* * Don't mark the inode dirty under page lock. First, it unnecessarily @@ -1439,7 +1447,7 @@ static int ext4_write_end(struct file *file, if (i_size_changed || inline_data) ext4_mark_inode_dirty(handle, inode); - if (pos + len > inode->i_size && ext4_can_truncate(inode)) + if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode)) /* if we have allocated more blocks and copied * less. We will have blocks allocated outside * inode->i_size. So truncate them @@ -1450,7 +1458,7 @@ errout: if (!ret) ret = ret2; - if (pos + len > inode->i_size) { + if (pos + len > inode->i_size && !verity) { ext4_truncate_failed_write(inode); /* * If truncate failed early the inode might still be @@ -1511,6 +1519,7 @@ static int ext4_journalled_write_end(struct file *file, unsigned from, to; int size_changed = 0; int inline_data = ext4_has_inline_data(inode); + bool verity = ext4_verity_in_progress(inode); trace_ext4_journalled_write_end(inode, pos, len, copied); from = pos & (PAGE_SIZE - 1); @@ -1540,13 +1549,14 @@ static int ext4_journalled_write_end(struct file *file, if (!partial) SetPageUptodate(page); } - size_changed = ext4_update_inode_size(inode, pos + copied); + if (!verity) + size_changed = ext4_update_inode_size(inode, pos + copied); ext4_set_inode_state(inode, EXT4_STATE_JDATA); EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; unlock_page(page); put_page(page); - if (old_size < pos) + if (old_size < pos && !verity) pagecache_isize_extended(inode, old_size, pos); if (size_changed || inline_data) { @@ -1555,7 +1565,7 @@ static int ext4_journalled_write_end(struct file *file, ret = ret2; } - if (pos + len > inode->i_size && ext4_can_truncate(inode)) + if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode)) /* if we have allocated more blocks and copied * less. We will have blocks allocated outside * inode->i_size. So truncate them @@ -1566,7 +1576,7 @@ errout: ret2 = ext4_journal_stop(handle); if (!ret) ret = ret2; - if (pos + len > inode->i_size) { + if (pos + len > inode->i_size && !verity) { ext4_truncate_failed_write(inode); /* * If truncate failed early the inode might still be @@ -2162,7 +2172,8 @@ static int ext4_writepage(struct page *page, trace_ext4_writepage(page); size = i_size_read(inode); - if (page->index == size >> PAGE_SHIFT) + if (page->index == size >> PAGE_SHIFT && + !ext4_verity_in_progress(inode)) len = size & ~PAGE_MASK; else len = PAGE_SIZE; @@ -2246,7 +2257,8 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) * after page tables are updated. */ size = i_size_read(mpd->inode); - if (page->index == size >> PAGE_SHIFT) + if (page->index == size >> PAGE_SHIFT && + !ext4_verity_in_progress(mpd->inode)) len = size & ~PAGE_MASK; else len = PAGE_SIZE; @@ -2345,6 +2357,9 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd, ext4_lblk_t blocks = (i_size_read(inode) + i_blocksize(inode) - 1) >> inode->i_blkbits; + if (ext4_verity_in_progress(inode)) + blocks = EXT_MAX_BLOCKS; + do { BUG_ON(buffer_locked(bh)); @@ -3061,8 +3076,8 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, index = pos >> PAGE_SHIFT; - if (ext4_nonda_switch(inode->i_sb) || - S_ISLNK(inode->i_mode)) { + if (ext4_nonda_switch(inode->i_sb) || S_ISLNK(inode->i_mode) || + ext4_verity_in_progress(inode)) { *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; return ext4_write_begin(file, mapping, pos, len, flags, pagep, fsdata); @@ -3897,6 +3912,8 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) return 0; #endif + if (fsverity_active(inode)) + return 0; /* * If we are doing data journalling we don't support O_DIRECT @@ -4736,6 +4753,8 @@ static bool ext4_should_use_dax(struct inode *inode) return false; if (ext4_test_inode_flag(inode, EXT4_INODE_ENCRYPT)) return false; + if (ext4_test_inode_flag(inode, EXT4_INODE_VERITY)) + return false; return true; } @@ -4760,9 +4779,11 @@ void ext4_set_inode_flags(struct inode *inode) new_fl |= S_ENCRYPTED; if (flags & EXT4_CASEFOLD_FL) new_fl |= S_CASEFOLD; + if (flags & EXT4_VERITY_FL) + new_fl |= S_VERITY; inode_set_flags(inode, new_fl, S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX| - S_ENCRYPTED|S_CASEFOLD); + S_ENCRYPTED|S_CASEFOLD|S_VERITY); } static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, @@ -5552,6 +5573,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (error) return error; + error = fsverity_prepare_setattr(dentry, attr); + if (error) + return error; + if (is_quota_modification(inode, attr)) { error = dquot_initialize(inode); if (error) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 5703d607f5af..5444d49cbf09 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -1198,6 +1198,17 @@ out: } case EXT4_IOC_SHUTDOWN: return ext4_shutdown(sb, arg); + + case FS_IOC_ENABLE_VERITY: + if (!ext4_has_feature_verity(sb)) + return -EOPNOTSUPP; + return fsverity_ioctl_enable(filp, (const void __user *)arg); + + case FS_IOC_MEASURE_VERITY: + if (!ext4_has_feature_verity(sb)) + return -EOPNOTSUPP; + return fsverity_ioctl_measure(filp, (void __user *)arg); + default: return -ENOTTY; } @@ -1265,6 +1276,8 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case FS_IOC_GET_ENCRYPTION_KEY_STATUS: case EXT4_IOC_SHUTDOWN: case FS_IOC_GETFSMAP: + case FS_IOC_ENABLE_VERITY: + case FS_IOC_MEASURE_VERITY: break; default: return -ENOIOCTLCMD; diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index c916017db334..a30b203fa461 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -47,13 +47,103 @@ #include "ext4.h" -static inline bool ext4_bio_encrypted(struct bio *bio) +#define NUM_PREALLOC_POST_READ_CTXS 128 + +static struct kmem_cache *bio_post_read_ctx_cache; +static mempool_t *bio_post_read_ctx_pool; + +/* postprocessing steps for read bios */ +enum bio_post_read_step { + STEP_INITIAL = 0, + STEP_DECRYPT, + STEP_VERITY, +}; + +struct bio_post_read_ctx { + struct bio *bio; + struct work_struct work; + unsigned int cur_step; + unsigned int enabled_steps; +}; + +static void __read_end_io(struct bio *bio) { -#ifdef CONFIG_FS_ENCRYPTION - return unlikely(bio->bi_private != NULL); -#else - return false; -#endif + struct page *page; + struct bio_vec *bv; + struct bvec_iter_all iter_all; + + bio_for_each_segment_all(bv, bio, iter_all) { + page = bv->bv_page; + + /* PG_error was set if any post_read step failed */ + if (bio->bi_status || PageError(page)) { + ClearPageUptodate(page); + /* will re-read again later */ + ClearPageError(page); + } else { + SetPageUptodate(page); + } + unlock_page(page); + } + if (bio->bi_private) + mempool_free(bio->bi_private, bio_post_read_ctx_pool); + bio_put(bio); +} + +static void bio_post_read_processing(struct bio_post_read_ctx *ctx); + +static void decrypt_work(struct work_struct *work) +{ + struct bio_post_read_ctx *ctx = + container_of(work, struct bio_post_read_ctx, work); + + fscrypt_decrypt_bio(ctx->bio); + + bio_post_read_processing(ctx); +} + +static void verity_work(struct work_struct *work) +{ + struct bio_post_read_ctx *ctx = + container_of(work, struct bio_post_read_ctx, work); + + fsverity_verify_bio(ctx->bio); + + bio_post_read_processing(ctx); +} + +static void bio_post_read_processing(struct bio_post_read_ctx *ctx) +{ + /* + * We use different work queues for decryption and for verity because + * verity may require reading metadata pages that need decryption, and + * we shouldn't recurse to the same workqueue. + */ + switch (++ctx->cur_step) { + case STEP_DECRYPT: + if (ctx->enabled_steps & (1 << STEP_DECRYPT)) { + INIT_WORK(&ctx->work, decrypt_work); + fscrypt_enqueue_decrypt_work(&ctx->work); + return; + } + ctx->cur_step++; + /* fall-through */ + case STEP_VERITY: + if (ctx->enabled_steps & (1 << STEP_VERITY)) { + INIT_WORK(&ctx->work, verity_work); + fsverity_enqueue_verify_work(&ctx->work); + return; + } + ctx->cur_step++; + /* fall-through */ + default: + __read_end_io(ctx->bio); + } +} + +static bool bio_post_read_required(struct bio *bio) +{ + return bio->bi_private && !bio->bi_status; } /* @@ -70,30 +160,53 @@ static inline bool ext4_bio_encrypted(struct bio *bio) */ static void mpage_end_io(struct bio *bio) { - struct bio_vec *bv; - struct bvec_iter_all iter_all; + if (bio_post_read_required(bio)) { + struct bio_post_read_ctx *ctx = bio->bi_private; - if (ext4_bio_encrypted(bio)) { - if (bio->bi_status) { - fscrypt_release_ctx(bio->bi_private); - } else { - fscrypt_enqueue_decrypt_bio(bio->bi_private, bio); - return; - } + ctx->cur_step = STEP_INITIAL; + bio_post_read_processing(ctx); + return; } - bio_for_each_segment_all(bv, bio, iter_all) { - struct page *page = bv->bv_page; + __read_end_io(bio); +} - if (!bio->bi_status) { - SetPageUptodate(page); - } else { - ClearPageUptodate(page); - SetPageError(page); - } - unlock_page(page); +static inline bool ext4_need_verity(const struct inode *inode, pgoff_t idx) +{ + return fsverity_active(inode) && + idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE); +} + +static struct bio_post_read_ctx *get_bio_post_read_ctx(struct inode *inode, + struct bio *bio, + pgoff_t first_idx) +{ + unsigned int post_read_steps = 0; + struct bio_post_read_ctx *ctx = NULL; + + if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) + post_read_steps |= 1 << STEP_DECRYPT; + + if (ext4_need_verity(inode, first_idx)) + post_read_steps |= 1 << STEP_VERITY; + + if (post_read_steps) { + ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS); + if (!ctx) + return ERR_PTR(-ENOMEM); + ctx->bio = bio; + ctx->enabled_steps = post_read_steps; + bio->bi_private = ctx; } + return ctx; +} - bio_put(bio); +static inline loff_t ext4_readpage_limit(struct inode *inode) +{ + if (IS_ENABLED(CONFIG_FS_VERITY) && + (IS_VERITY(inode) || ext4_verity_in_progress(inode))) + return inode->i_sb->s_maxbytes; + + return i_size_read(inode); } int ext4_mpage_readpages(struct address_space *mapping, @@ -141,7 +254,8 @@ int ext4_mpage_readpages(struct address_space *mapping, block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits); last_block = block_in_file + nr_pages * blocks_per_page; - last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits; + last_block_in_file = (ext4_readpage_limit(inode) + + blocksize - 1) >> blkbits; if (last_block > last_block_in_file) last_block = last_block_in_file; page_block = 0; @@ -218,6 +332,9 @@ int ext4_mpage_readpages(struct address_space *mapping, zero_user_segment(page, first_hole << blkbits, PAGE_SIZE); if (first_hole == 0) { + if (ext4_need_verity(inode, page->index) && + !fsverity_verify_page(page)) + goto set_error_page; SetPageUptodate(page); unlock_page(page); goto next_page; @@ -241,18 +358,16 @@ int ext4_mpage_readpages(struct address_space *mapping, bio = NULL; } if (bio == NULL) { - struct fscrypt_ctx *ctx = NULL; + struct bio_post_read_ctx *ctx; - if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) { - ctx = fscrypt_get_ctx(GFP_NOFS); - if (IS_ERR(ctx)) - goto set_error_page; - } bio = bio_alloc(GFP_KERNEL, min_t(int, nr_pages, BIO_MAX_PAGES)); - if (!bio) { - if (ctx) - fscrypt_release_ctx(ctx); + if (!bio) + goto set_error_page; + ctx = get_bio_post_read_ctx(inode, bio, page->index); + if (IS_ERR(ctx)) { + bio_put(bio); + bio = NULL; goto set_error_page; } bio_set_dev(bio, bdev); @@ -293,3 +408,29 @@ int ext4_mpage_readpages(struct address_space *mapping, submit_bio(bio); return 0; } + +int __init ext4_init_post_read_processing(void) +{ + bio_post_read_ctx_cache = + kmem_cache_create("ext4_bio_post_read_ctx", + sizeof(struct bio_post_read_ctx), 0, 0, NULL); + if (!bio_post_read_ctx_cache) + goto fail; + bio_post_read_ctx_pool = + mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS, + bio_post_read_ctx_cache); + if (!bio_post_read_ctx_pool) + goto fail_free_cache; + return 0; + +fail_free_cache: + kmem_cache_destroy(bio_post_read_ctx_cache); +fail: + return -ENOMEM; +} + +void ext4_exit_post_read_processing(void) +{ + mempool_destroy(bio_post_read_ctx_pool); + kmem_cache_destroy(bio_post_read_ctx_cache); +} diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 757819139b8f..27cd622676e7 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1182,6 +1182,7 @@ void ext4_clear_inode(struct inode *inode) EXT4_I(inode)->jinode = NULL; } fscrypt_put_encryption_info(inode); + fsverity_cleanup_inode(inode); } static struct inode *ext4_nfs_get_inode(struct super_block *sb, @@ -4275,6 +4276,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) #ifdef CONFIG_FS_ENCRYPTION sb->s_cop = &ext4_cryptops; #endif +#ifdef CONFIG_FS_VERITY + sb->s_vop = &ext4_verityops; +#endif #ifdef CONFIG_QUOTA sb->dq_op = &ext4_quota_operations; if (ext4_has_feature_quota(sb)) @@ -4422,6 +4426,11 @@ no_journal: goto failed_mount_wq; } + if (ext4_has_feature_verity(sb) && blocksize != PAGE_SIZE) { + ext4_msg(sb, KERN_ERR, "Unsupported blocksize for fs-verity"); + goto failed_mount_wq; + } + if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) && !ext4_has_feature_encrypt(sb)) { ext4_set_feature_encrypt(sb); @@ -6097,6 +6106,10 @@ static int __init ext4_init_fs(void) return err; err = ext4_init_pending(); + if (err) + goto out7; + + err = ext4_init_post_read_processing(); if (err) goto out6; @@ -6138,8 +6151,10 @@ out3: out4: ext4_exit_pageio(); out5: - ext4_exit_pending(); + ext4_exit_post_read_processing(); out6: + ext4_exit_pending(); +out7: ext4_exit_es(); return err; @@ -6156,6 +6171,7 @@ static void __exit ext4_exit_fs(void) ext4_exit_sysfs(); ext4_exit_system_zone(); ext4_exit_pageio(); + ext4_exit_post_read_processing(); ext4_exit_es(); ext4_exit_pending(); } diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index b3cd7655a6ff..eb1efad0e20a 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -242,6 +242,9 @@ EXT4_ATTR_FEATURE(encryption); #ifdef CONFIG_UNICODE EXT4_ATTR_FEATURE(casefold); #endif +#ifdef CONFIG_FS_VERITY +EXT4_ATTR_FEATURE(verity); +#endif EXT4_ATTR_FEATURE(metadata_csum_seed); static struct attribute *ext4_feat_attrs[] = { @@ -253,6 +256,9 @@ static struct attribute *ext4_feat_attrs[] = { #endif #ifdef CONFIG_UNICODE ATTR_LIST(casefold), +#endif +#ifdef CONFIG_FS_VERITY + ATTR_LIST(verity), #endif ATTR_LIST(metadata_csum_seed), NULL, diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c new file mode 100644 index 000000000000..d0d8a9795dd6 --- /dev/null +++ b/fs/ext4/verity.c @@ -0,0 +1,367 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/ext4/verity.c: fs-verity support for ext4 + * + * Copyright 2019 Google LLC + */ + +/* + * Implementation of fsverity_operations for ext4. + * + * ext4 stores the verity metadata (Merkle tree and fsverity_descriptor) past + * the end of the file, starting at the first 64K boundary beyond i_size. This + * approach works because (a) verity files are readonly, and (b) pages fully + * beyond i_size aren't visible to userspace but can be read/written internally + * by ext4 with only some relatively small changes to ext4. This approach + * avoids having to depend on the EA_INODE feature and on rearchitecturing + * ext4's xattr support to support paging multi-gigabyte xattrs into memory, and + * to support encrypting xattrs. Note that the verity metadata *must* be + * encrypted when the file is, since it contains hashes of the plaintext data. + * + * Using a 64K boundary rather than a 4K one keeps things ready for + * architectures with 64K pages, and it doesn't necessarily waste space on-disk + * since there can be a hole between i_size and the start of the Merkle tree. + */ + +#include + +#include "ext4.h" +#include "ext4_extents.h" +#include "ext4_jbd2.h" + +static inline loff_t ext4_verity_metadata_pos(const struct inode *inode) +{ + return round_up(inode->i_size, 65536); +} + +/* + * Read some verity metadata from the inode. __vfs_read() can't be used because + * we need to read beyond i_size. + */ +static int pagecache_read(struct inode *inode, void *buf, size_t count, + loff_t pos) +{ + while (count) { + size_t n = min_t(size_t, count, + PAGE_SIZE - offset_in_page(pos)); + struct page *page; + void *addr; + + page = read_mapping_page(inode->i_mapping, pos >> PAGE_SHIFT, + NULL); + if (IS_ERR(page)) + return PTR_ERR(page); + + addr = kmap_atomic(page); + memcpy(buf, addr + offset_in_page(pos), n); + kunmap_atomic(addr); + + put_page(page); + + buf += n; + pos += n; + count -= n; + } + return 0; +} + +/* + * Write some verity metadata to the inode for FS_IOC_ENABLE_VERITY. + * kernel_write() can't be used because the file descriptor is readonly. + */ +static int pagecache_write(struct inode *inode, const void *buf, size_t count, + loff_t pos) +{ + if (pos + count > inode->i_sb->s_maxbytes) + return -EFBIG; + + while (count) { + size_t n = min_t(size_t, count, + PAGE_SIZE - offset_in_page(pos)); + struct page *page; + void *fsdata; + void *addr; + int res; + + res = pagecache_write_begin(NULL, inode->i_mapping, pos, n, 0, + &page, &fsdata); + if (res) + return res; + + addr = kmap_atomic(page); + memcpy(addr + offset_in_page(pos), buf, n); + kunmap_atomic(addr); + + res = pagecache_write_end(NULL, inode->i_mapping, pos, n, n, + page, fsdata); + if (res < 0) + return res; + if (res != n) + return -EIO; + + buf += n; + pos += n; + count -= n; + } + return 0; +} + +static int ext4_begin_enable_verity(struct file *filp) +{ + struct inode *inode = file_inode(filp); + const int credits = 2; /* superblock and inode for ext4_orphan_add() */ + handle_t *handle; + int err; + + if (ext4_verity_in_progress(inode)) + return -EBUSY; + + /* + * Since the file was opened readonly, we have to initialize the jbd + * inode and quotas here and not rely on ->open() doing it. This must + * be done before evicting the inline data. + */ + + err = ext4_inode_attach_jinode(inode); + if (err) + return err; + + err = dquot_initialize(inode); + if (err) + return err; + + err = ext4_convert_inline_data(inode); + if (err) + return err; + + if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { + ext4_warning_inode(inode, + "verity is only allowed on extent-based files"); + return -EOPNOTSUPP; + } + + /* + * ext4 uses the last allocated block to find the verity descriptor, so + * we must remove any other blocks past EOF which might confuse things. + */ + err = ext4_truncate(inode); + if (err) + return err; + + handle = ext4_journal_start(inode, EXT4_HT_INODE, credits); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + err = ext4_orphan_add(handle, inode); + if (err == 0) + ext4_set_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS); + + ext4_journal_stop(handle); + return err; +} + +/* + * ext4 stores the verity descriptor beginning on the next filesystem block + * boundary after the Merkle tree. Then, the descriptor size is stored in the + * last 4 bytes of the last allocated filesystem block --- which is either the + * block in which the descriptor ends, or the next block after that if there + * weren't at least 4 bytes remaining. + * + * We can't simply store the descriptor in an xattr because it *must* be + * encrypted when ext4 encryption is used, but ext4 encryption doesn't encrypt + * xattrs. Also, if the descriptor includes a large signature blob it may be + * too large to store in an xattr without the EA_INODE feature. + */ +static int ext4_write_verity_descriptor(struct inode *inode, const void *desc, + size_t desc_size, u64 merkle_tree_size) +{ + const u64 desc_pos = round_up(ext4_verity_metadata_pos(inode) + + merkle_tree_size, i_blocksize(inode)); + const u64 desc_end = desc_pos + desc_size; + const __le32 desc_size_disk = cpu_to_le32(desc_size); + const u64 desc_size_pos = round_up(desc_end + sizeof(desc_size_disk), + i_blocksize(inode)) - + sizeof(desc_size_disk); + int err; + + err = pagecache_write(inode, desc, desc_size, desc_pos); + if (err) + return err; + + return pagecache_write(inode, &desc_size_disk, sizeof(desc_size_disk), + desc_size_pos); +} + +static int ext4_end_enable_verity(struct file *filp, const void *desc, + size_t desc_size, u64 merkle_tree_size) +{ + struct inode *inode = file_inode(filp); + const int credits = 2; /* superblock and inode for ext4_orphan_del() */ + handle_t *handle; + int err = 0; + int err2; + + if (desc != NULL) { + /* Succeeded; write the verity descriptor. */ + err = ext4_write_verity_descriptor(inode, desc, desc_size, + merkle_tree_size); + + /* Write all pages before clearing VERITY_IN_PROGRESS. */ + if (!err) + err = filemap_write_and_wait(inode->i_mapping); + } + + /* If we failed, truncate anything we wrote past i_size. */ + if (desc == NULL || err) + ext4_truncate(inode); + + /* + * We must always clean up by clearing EXT4_STATE_VERITY_IN_PROGRESS and + * deleting the inode from the orphan list, even if something failed. + * If everything succeeded, we'll also set the verity bit in the same + * transaction. + */ + + ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS); + + handle = ext4_journal_start(inode, EXT4_HT_INODE, credits); + if (IS_ERR(handle)) { + ext4_orphan_del(NULL, inode); + return PTR_ERR(handle); + } + + err2 = ext4_orphan_del(handle, inode); + if (err2) + goto out_stop; + + if (desc != NULL && !err) { + struct ext4_iloc iloc; + + err = ext4_reserve_inode_write(handle, inode, &iloc); + if (err) + goto out_stop; + ext4_set_inode_flag(inode, EXT4_INODE_VERITY); + ext4_set_inode_flags(inode); + err = ext4_mark_iloc_dirty(handle, inode, &iloc); + } +out_stop: + ext4_journal_stop(handle); + return err ?: err2; +} + +static int ext4_get_verity_descriptor_location(struct inode *inode, + size_t *desc_size_ret, + u64 *desc_pos_ret) +{ + struct ext4_ext_path *path; + struct ext4_extent *last_extent; + u32 end_lblk; + u64 desc_size_pos; + __le32 desc_size_disk; + u32 desc_size; + u64 desc_pos; + int err; + + /* + * Descriptor size is in last 4 bytes of last allocated block. + * See ext4_write_verity_descriptor(). + */ + + if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { + EXT4_ERROR_INODE(inode, "verity file doesn't use extents"); + return -EFSCORRUPTED; + } + + path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); + if (IS_ERR(path)) + return PTR_ERR(path); + + last_extent = path[path->p_depth].p_ext; + if (!last_extent) { + EXT4_ERROR_INODE(inode, "verity file has no extents"); + ext4_ext_drop_refs(path); + kfree(path); + return -EFSCORRUPTED; + } + + end_lblk = le32_to_cpu(last_extent->ee_block) + + ext4_ext_get_actual_len(last_extent); + desc_size_pos = (u64)end_lblk << inode->i_blkbits; + ext4_ext_drop_refs(path); + kfree(path); + + if (desc_size_pos < sizeof(desc_size_disk)) + goto bad; + desc_size_pos -= sizeof(desc_size_disk); + + err = pagecache_read(inode, &desc_size_disk, sizeof(desc_size_disk), + desc_size_pos); + if (err) + return err; + desc_size = le32_to_cpu(desc_size_disk); + + /* + * The descriptor is stored just before the desc_size_disk, but starting + * on a filesystem block boundary. + */ + + if (desc_size > INT_MAX || desc_size > desc_size_pos) + goto bad; + + desc_pos = round_down(desc_size_pos - desc_size, i_blocksize(inode)); + if (desc_pos < ext4_verity_metadata_pos(inode)) + goto bad; + + *desc_size_ret = desc_size; + *desc_pos_ret = desc_pos; + return 0; + +bad: + EXT4_ERROR_INODE(inode, "verity file corrupted; can't find descriptor"); + return -EFSCORRUPTED; +} + +static int ext4_get_verity_descriptor(struct inode *inode, void *buf, + size_t buf_size) +{ + size_t desc_size = 0; + u64 desc_pos = 0; + int err; + + err = ext4_get_verity_descriptor_location(inode, &desc_size, &desc_pos); + if (err) + return err; + + if (buf_size) { + if (desc_size > buf_size) + return -ERANGE; + err = pagecache_read(inode, buf, desc_size, desc_pos); + if (err) + return err; + } + return desc_size; +} + +static struct page *ext4_read_merkle_tree_page(struct inode *inode, + pgoff_t index) +{ + index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT; + + return read_mapping_page(inode->i_mapping, index, NULL); +} + +static int ext4_write_merkle_tree_block(struct inode *inode, const void *buf, + u64 index, int log_blocksize) +{ + loff_t pos = ext4_verity_metadata_pos(inode) + (index << log_blocksize); + + return pagecache_write(inode, buf, 1 << log_blocksize, pos); +} + +const struct fsverity_operations ext4_verityops = { + .begin_enable_verity = ext4_begin_enable_verity, + .end_enable_verity = ext4_end_enable_verity, + .get_verity_descriptor = ext4_get_verity_descriptor, + .read_merkle_tree_page = ext4_read_merkle_tree_page, + .write_merkle_tree_block = ext4_write_merkle_tree_block, +}; diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile index 776c4b936504..2aaecc63834f 100644 --- a/fs/f2fs/Makefile +++ b/fs/f2fs/Makefile @@ -8,3 +8,4 @@ f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o +f2fs-$(CONFIG_FS_VERITY) += verity.o diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index abbf14e9bd72..54cad80acb7d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -74,6 +74,7 @@ static enum count_type __read_io_type(struct page *page) enum bio_post_read_step { STEP_INITIAL = 0, STEP_DECRYPT, + STEP_VERITY, }; struct bio_post_read_ctx { @@ -120,8 +121,23 @@ static void decrypt_work(struct work_struct *work) bio_post_read_processing(ctx); } +static void verity_work(struct work_struct *work) +{ + struct bio_post_read_ctx *ctx = + container_of(work, struct bio_post_read_ctx, work); + + fsverity_verify_bio(ctx->bio); + + bio_post_read_processing(ctx); +} + static void bio_post_read_processing(struct bio_post_read_ctx *ctx) { + /* + * We use different work queues for decryption and for verity because + * verity may require reading metadata pages that need decryption, and + * we shouldn't recurse to the same workqueue. + */ switch (++ctx->cur_step) { case STEP_DECRYPT: if (ctx->enabled_steps & (1 << STEP_DECRYPT)) { @@ -131,6 +147,14 @@ static void bio_post_read_processing(struct bio_post_read_ctx *ctx) } ctx->cur_step++; /* fall-through */ + case STEP_VERITY: + if (ctx->enabled_steps & (1 << STEP_VERITY)) { + INIT_WORK(&ctx->work, verity_work); + fsverity_enqueue_verify_work(&ctx->work); + return; + } + ctx->cur_step++; + /* fall-through */ default: __read_end_io(ctx->bio); } @@ -608,8 +632,15 @@ out: up_write(&io->io_rwsem); } +static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx) +{ + return fsverity_active(inode) && + idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE); +} + static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, - unsigned nr_pages, unsigned op_flag) + unsigned nr_pages, unsigned op_flag, + pgoff_t first_idx) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct bio *bio; @@ -625,6 +656,10 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, if (f2fs_encrypted_file(inode)) post_read_steps |= 1 << STEP_DECRYPT; + + if (f2fs_need_verity(inode, first_idx)) + post_read_steps |= 1 << STEP_VERITY; + if (post_read_steps) { ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS); if (!ctx) { @@ -646,7 +681,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct bio *bio; - bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0); + bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0, page->index); if (IS_ERR(bio)) return PTR_ERR(bio); @@ -1569,6 +1604,15 @@ out: return ret; } +static inline loff_t f2fs_readpage_limit(struct inode *inode) +{ + if (IS_ENABLED(CONFIG_FS_VERITY) && + (IS_VERITY(inode) || f2fs_verity_in_progress(inode))) + return inode->i_sb->s_maxbytes; + + return i_size_read(inode); +} + static int f2fs_read_single_page(struct inode *inode, struct page *page, unsigned nr_pages, struct f2fs_map_blocks *map, @@ -1587,7 +1631,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page, block_in_file = (sector_t)page_index(page); last_block = block_in_file + nr_pages; - last_block_in_file = (i_size_read(inode) + blocksize - 1) >> + last_block_in_file = (f2fs_readpage_limit(inode) + blocksize - 1) >> blkbits; if (last_block > last_block_in_file) last_block = last_block_in_file; @@ -1632,6 +1676,11 @@ got_it: } else { zero_out: zero_user_segment(page, 0, PAGE_SIZE); + if (f2fs_need_verity(inode, page->index) && + !fsverity_verify_page(page)) { + ret = -EIO; + goto out; + } if (!PageUptodate(page)) SetPageUptodate(page); unlock_page(page); @@ -1650,7 +1699,7 @@ submit_and_realloc: } if (bio == NULL) { bio = f2fs_grab_read_bio(inode, block_nr, nr_pages, - is_readahead ? REQ_RAHEAD : 0); + is_readahead ? REQ_RAHEAD : 0, page->index); if (IS_ERR(bio)) { ret = PTR_ERR(bio); bio = NULL; @@ -2052,7 +2101,7 @@ static int __write_data_page(struct page *page, bool *submitted, if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; - if (page->index < end_index) + if (page->index < end_index || f2fs_verity_in_progress(inode)) goto write; /* @@ -2427,7 +2476,8 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to) struct inode *inode = mapping->host; loff_t i_size = i_size_read(inode); - if (to > i_size) { + /* In the fs-verity case, f2fs_end_enable_verity() does the truncate */ + if (to > i_size && !f2fs_verity_in_progress(inode)) { down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); down_write(&F2FS_I(inode)->i_mmap_sem); @@ -2458,7 +2508,8 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, * the block addresses when there is no need to fill the page. */ if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE && - !is_inode_flag_set(inode, FI_NO_PREALLOC)) + !is_inode_flag_set(inode, FI_NO_PREALLOC) && + !f2fs_verity_in_progress(inode)) return 0; /* f2fs_lock_op avoids race between write CP and convert_inline_page */ @@ -2597,7 +2648,8 @@ repeat: if (len == PAGE_SIZE || PageUptodate(page)) return 0; - if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode)) { + if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) && + !f2fs_verity_in_progress(inode)) { zero_user_segment(page, len, PAGE_SIZE); return 0; } @@ -2660,7 +2712,8 @@ static int f2fs_write_end(struct file *file, set_page_dirty(page); - if (pos + copied > i_size_read(inode)) + if (pos + copied > i_size_read(inode) && + !f2fs_verity_in_progress(inode)) f2fs_i_size_write(inode, pos + copied); unlock_out: f2fs_put_page(page, 1); @@ -3104,7 +3157,9 @@ void f2fs_clear_page_cache_dirty_tag(struct page *page) int __init f2fs_init_post_read_processing(void) { - bio_post_read_ctx_cache = KMEM_CACHE(bio_post_read_ctx, 0); + bio_post_read_ctx_cache = + kmem_cache_create("f2fs_bio_post_read_ctx", + sizeof(struct bio_post_read_ctx), 0, 0, NULL); if (!bio_post_read_ctx_cache) goto fail; bio_post_read_ctx_pool = diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 17382da7f0bd..7c5f121edac5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -25,6 +25,7 @@ #include #include +#include #ifdef CONFIG_F2FS_CHECK_FS #define f2fs_bug_on(sbi, condition) BUG_ON(condition) @@ -151,7 +152,7 @@ struct f2fs_mount_info { #define F2FS_FEATURE_QUOTA_INO 0x0080 #define F2FS_FEATURE_INODE_CRTIME 0x0100 #define F2FS_FEATURE_LOST_FOUND 0x0200 -#define F2FS_FEATURE_VERITY 0x0400 /* reserved */ +#define F2FS_FEATURE_VERITY 0x0400 #define F2FS_FEATURE_SB_CHKSUM 0x0800 #define __F2FS_HAS_FEATURE(raw_super, mask) \ @@ -630,7 +631,7 @@ enum { #define FADVISE_ENC_NAME_BIT 0x08 #define FADVISE_KEEP_SIZE_BIT 0x10 #define FADVISE_HOT_BIT 0x20 -#define FADVISE_VERITY_BIT 0x40 /* reserved */ +#define FADVISE_VERITY_BIT 0x40 #define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT) @@ -650,6 +651,8 @@ enum { #define file_is_hot(inode) is_file(inode, FADVISE_HOT_BIT) #define file_set_hot(inode) set_file(inode, FADVISE_HOT_BIT) #define file_clear_hot(inode) clear_file(inode, FADVISE_HOT_BIT) +#define file_is_verity(inode) is_file(inode, FADVISE_VERITY_BIT) +#define file_set_verity(inode) set_file(inode, FADVISE_VERITY_BIT) #define DEF_DIR_LEVEL 0 @@ -2412,6 +2415,7 @@ enum { FI_PROJ_INHERIT, /* indicate file inherits projectid */ FI_PIN_FILE, /* indicate file should not be gced */ FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */ + FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */ }; static inline void __mark_inode_dirty_flag(struct inode *inode, @@ -2451,6 +2455,12 @@ static inline void clear_inode_flag(struct inode *inode, int flag) __mark_inode_dirty_flag(inode, flag, false); } +static inline bool f2fs_verity_in_progress(struct inode *inode) +{ + return IS_ENABLED(CONFIG_FS_VERITY) && + is_inode_flag_set(inode, FI_VERITY_IN_PROGRESS); +} + static inline void set_acl_inode(struct inode *inode, umode_t mode) { F2FS_I(inode)->i_acl_mode = mode; @@ -3521,6 +3531,9 @@ void f2fs_exit_sysfs(void); int f2fs_register_sysfs(struct f2fs_sb_info *sbi); void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi); +/* verity.c */ +extern const struct fsverity_operations f2fs_verityops; + /* * crypto support */ @@ -3543,7 +3556,7 @@ static inline void f2fs_set_encrypted_inode(struct inode *inode) */ static inline bool f2fs_post_read_required(struct inode *inode) { - return f2fs_encrypted_file(inode); + return f2fs_encrypted_file(inode) || fsverity_active(inode); } #define F2FS_FEATURE_FUNCS(name, flagname) \ @@ -3561,6 +3574,7 @@ F2FS_FEATURE_FUNCS(flexible_inline_xattr, FLEXIBLE_INLINE_XATTR); F2FS_FEATURE_FUNCS(quota_ino, QUOTA_INO); F2FS_FEATURE_FUNCS(inode_crtime, INODE_CRTIME); F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND); +F2FS_FEATURE_FUNCS(verity, VERITY); F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM); #ifdef CONFIG_BLK_DEV_ZONED diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6a7349f9ac15..39fffc19e00c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -493,6 +493,10 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) { int err = fscrypt_file_open(inode, filp); + if (err) + return err; + + err = fsverity_file_open(inode, filp); if (err) return err; @@ -778,6 +782,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) if (err) return err; + err = fsverity_prepare_setattr(dentry, attr); + if (err) + return err; + if (is_quota_modification(inode, attr)) { err = dquot_initialize(inode); if (err) @@ -1705,7 +1713,8 @@ static const struct { FS_PROJINHERIT_FL | \ FS_ENCRYPT_FL | \ FS_INLINE_DATA_FL | \ - FS_NOCOW_FL) + FS_NOCOW_FL | \ + FS_VERITY_FL) #define F2FS_SETTABLE_FS_FL ( \ FS_SYNC_FL | \ @@ -1750,6 +1759,8 @@ static int f2fs_ioc_getflags(struct file *filp, unsigned long arg) if (IS_ENCRYPTED(inode)) fsflags |= FS_ENCRYPT_FL; + if (IS_VERITY(inode)) + fsflags |= FS_VERITY_FL; if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) fsflags |= FS_INLINE_DATA_FL; if (is_inode_flag_set(inode, FI_PIN_FILE)) @@ -3103,6 +3114,30 @@ static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg) return ret; } +static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + + if (!f2fs_sb_has_verity(F2FS_I_SB(inode))) { + f2fs_warn(F2FS_I_SB(inode), + "Can't enable fs-verity on inode %lu: the verity feature is not enabled on this filesystem.\n", + inode->i_ino); + return -EOPNOTSUPP; + } + + return fsverity_ioctl_enable(filp, (const void __user *)arg); +} + +static int f2fs_ioc_measure_verity(struct file *filp, unsigned long arg) +{ + if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp)))) + return -EOPNOTSUPP; + + return fsverity_ioctl_measure(filp, (void __user *)arg); +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) @@ -3171,6 +3206,10 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_precache_extents(filp, arg); case F2FS_IOC_RESIZE_FS: return f2fs_ioc_resize_fs(filp, arg); + case FS_IOC_ENABLE_VERITY: + return f2fs_ioc_enable_verity(filp, arg); + case FS_IOC_MEASURE_VERITY: + return f2fs_ioc_measure_verity(filp, arg); default: return -ENOTTY; } @@ -3290,6 +3329,8 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_SET_PIN_FILE: case F2FS_IOC_PRECACHE_EXTENTS: case F2FS_IOC_RESIZE_FS: + case FS_IOC_ENABLE_VERITY: + case FS_IOC_MEASURE_VERITY: break; default: return -ENOIOCTLCMD; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index a33d7a849b2d..06da75d418e0 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -46,9 +46,11 @@ void f2fs_set_inode_flags(struct inode *inode) new_fl |= S_DIRSYNC; if (file_is_encrypt(inode)) new_fl |= S_ENCRYPTED; + if (file_is_verity(inode)) + new_fl |= S_VERITY; inode_set_flags(inode, new_fl, S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC| - S_ENCRYPTED); + S_ENCRYPTED|S_VERITY); } static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) @@ -733,6 +735,7 @@ no_delete: } out_clear: fscrypt_put_encryption_info(inode); + fsverity_cleanup_inode(inode); clear_inode(inode); } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e15bd29bd453..f43befda0e1a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3145,6 +3145,9 @@ try_onemore: sb->s_op = &f2fs_sops; #ifdef CONFIG_FS_ENCRYPTION sb->s_cop = &f2fs_cryptops; +#endif +#ifdef CONFIG_FS_VERITY + sb->s_vop = &f2fs_verityops; #endif sb->s_xattr = f2fs_xattr_handlers; sb->s_export_op = &f2fs_export_ops; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 3aeacd0aacfd..0cd64f994068 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -131,6 +131,9 @@ static ssize_t features_show(struct f2fs_attr *a, if (f2fs_sb_has_lost_found(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "lost_found"); + if (f2fs_sb_has_verity(sbi)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "verity"); if (f2fs_sb_has_sb_chksum(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "sb_checksum"); @@ -364,6 +367,7 @@ enum feat_id { FEAT_QUOTA_INO, FEAT_INODE_CRTIME, FEAT_LOST_FOUND, + FEAT_VERITY, FEAT_SB_CHECKSUM, }; @@ -381,6 +385,7 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a, case FEAT_QUOTA_INO: case FEAT_INODE_CRTIME: case FEAT_LOST_FOUND: + case FEAT_VERITY: case FEAT_SB_CHECKSUM: return snprintf(buf, PAGE_SIZE, "supported\n"); } @@ -470,6 +475,9 @@ F2FS_FEATURE_RO_ATTR(flexible_inline_xattr, FEAT_FLEXIBLE_INLINE_XATTR); F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO); F2FS_FEATURE_RO_ATTR(inode_crtime, FEAT_INODE_CRTIME); F2FS_FEATURE_RO_ATTR(lost_found, FEAT_LOST_FOUND); +#ifdef CONFIG_FS_VERITY +F2FS_FEATURE_RO_ATTR(verity, FEAT_VERITY); +#endif F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) @@ -534,6 +542,9 @@ static struct attribute *f2fs_feat_attrs[] = { ATTR_LIST(quota_ino), ATTR_LIST(inode_crtime), ATTR_LIST(lost_found), +#ifdef CONFIG_FS_VERITY + ATTR_LIST(verity), +#endif ATTR_LIST(sb_checksum), NULL, }; diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c new file mode 100644 index 000000000000..a401ef72bc82 --- /dev/null +++ b/fs/f2fs/verity.c @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/f2fs/verity.c: fs-verity support for f2fs + * + * Copyright 2019 Google LLC + */ + +/* + * Implementation of fsverity_operations for f2fs. + * + * Like ext4, f2fs stores the verity metadata (Merkle tree and + * fsverity_descriptor) past the end of the file, starting at the first 64K + * boundary beyond i_size. This approach works because (a) verity files are + * readonly, and (b) pages fully beyond i_size aren't visible to userspace but + * can be read/written internally by f2fs with only some relatively small + * changes to f2fs. Extended attributes cannot be used because (a) f2fs limits + * the total size of an inode's xattr entries to 4096 bytes, which wouldn't be + * enough for even a single Merkle tree block, and (b) f2fs encryption doesn't + * encrypt xattrs, yet the verity metadata *must* be encrypted when the file is + * because it contains hashes of the plaintext data. + * + * Using a 64K boundary rather than a 4K one keeps things ready for + * architectures with 64K pages, and it doesn't necessarily waste space on-disk + * since there can be a hole between i_size and the start of the Merkle tree. + */ + +#include + +#include "f2fs.h" +#include "xattr.h" + +static inline loff_t f2fs_verity_metadata_pos(const struct inode *inode) +{ + return round_up(inode->i_size, 65536); +} + +/* + * Read some verity metadata from the inode. __vfs_read() can't be used because + * we need to read beyond i_size. + */ +static int pagecache_read(struct inode *inode, void *buf, size_t count, + loff_t pos) +{ + while (count) { + size_t n = min_t(size_t, count, + PAGE_SIZE - offset_in_page(pos)); + struct page *page; + void *addr; + + page = read_mapping_page(inode->i_mapping, pos >> PAGE_SHIFT, + NULL); + if (IS_ERR(page)) + return PTR_ERR(page); + + addr = kmap_atomic(page); + memcpy(buf, addr + offset_in_page(pos), n); + kunmap_atomic(addr); + + put_page(page); + + buf += n; + pos += n; + count -= n; + } + return 0; +} + +/* + * Write some verity metadata to the inode for FS_IOC_ENABLE_VERITY. + * kernel_write() can't be used because the file descriptor is readonly. + */ +static int pagecache_write(struct inode *inode, const void *buf, size_t count, + loff_t pos) +{ + if (pos + count > inode->i_sb->s_maxbytes) + return -EFBIG; + + while (count) { + size_t n = min_t(size_t, count, + PAGE_SIZE - offset_in_page(pos)); + struct page *page; + void *fsdata; + void *addr; + int res; + + res = pagecache_write_begin(NULL, inode->i_mapping, pos, n, 0, + &page, &fsdata); + if (res) + return res; + + addr = kmap_atomic(page); + memcpy(addr + offset_in_page(pos), buf, n); + kunmap_atomic(addr); + + res = pagecache_write_end(NULL, inode->i_mapping, pos, n, n, + page, fsdata); + if (res < 0) + return res; + if (res != n) + return -EIO; + + buf += n; + pos += n; + count -= n; + } + return 0; +} + +/* + * Format of f2fs verity xattr. This points to the location of the verity + * descriptor within the file data rather than containing it directly because + * the verity descriptor *must* be encrypted when f2fs encryption is used. But, + * f2fs encryption does not encrypt xattrs. + */ +struct fsverity_descriptor_location { + __le32 version; + __le32 size; + __le64 pos; +}; + +static int f2fs_begin_enable_verity(struct file *filp) +{ + struct inode *inode = file_inode(filp); + int err; + + if (f2fs_verity_in_progress(inode)) + return -EBUSY; + + if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) + return -EOPNOTSUPP; + + /* + * Since the file was opened readonly, we have to initialize the quotas + * here and not rely on ->open() doing it. This must be done before + * evicting the inline data. + */ + err = dquot_initialize(inode); + if (err) + return err; + + err = f2fs_convert_inline_inode(inode); + if (err) + return err; + + set_inode_flag(inode, FI_VERITY_IN_PROGRESS); + return 0; +} + +static int f2fs_end_enable_verity(struct file *filp, const void *desc, + size_t desc_size, u64 merkle_tree_size) +{ + struct inode *inode = file_inode(filp); + u64 desc_pos = f2fs_verity_metadata_pos(inode) + merkle_tree_size; + struct fsverity_descriptor_location dloc = { + .version = cpu_to_le32(1), + .size = cpu_to_le32(desc_size), + .pos = cpu_to_le64(desc_pos), + }; + int err = 0; + + if (desc != NULL) { + /* Succeeded; write the verity descriptor. */ + err = pagecache_write(inode, desc, desc_size, desc_pos); + + /* Write all pages before clearing FI_VERITY_IN_PROGRESS. */ + if (!err) + err = filemap_write_and_wait(inode->i_mapping); + } + + /* If we failed, truncate anything we wrote past i_size. */ + if (desc == NULL || err) + f2fs_truncate(inode); + + clear_inode_flag(inode, FI_VERITY_IN_PROGRESS); + + if (desc != NULL && !err) { + err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_VERITY, + F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc), + NULL, XATTR_CREATE); + if (!err) { + file_set_verity(inode); + f2fs_set_inode_flags(inode); + f2fs_mark_inode_dirty_sync(inode, true); + } + } + return err; +} + +static int f2fs_get_verity_descriptor(struct inode *inode, void *buf, + size_t buf_size) +{ + struct fsverity_descriptor_location dloc; + int res; + u32 size; + u64 pos; + + /* Get the descriptor location */ + res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_VERITY, + F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc), NULL); + if (res < 0 && res != -ERANGE) + return res; + if (res != sizeof(dloc) || dloc.version != cpu_to_le32(1)) { + f2fs_warn(F2FS_I_SB(inode), "unknown verity xattr format"); + return -EINVAL; + } + size = le32_to_cpu(dloc.size); + pos = le64_to_cpu(dloc.pos); + + /* Get the descriptor */ + if (pos + size < pos || pos + size > inode->i_sb->s_maxbytes || + pos < f2fs_verity_metadata_pos(inode) || size > INT_MAX) { + f2fs_warn(F2FS_I_SB(inode), "invalid verity xattr"); + return -EFSCORRUPTED; + } + if (buf_size) { + if (size > buf_size) + return -ERANGE; + res = pagecache_read(inode, buf, size, pos); + if (res) + return res; + } + return size; +} + +static struct page *f2fs_read_merkle_tree_page(struct inode *inode, + pgoff_t index) +{ + index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT; + + return read_mapping_page(inode->i_mapping, index, NULL); +} + +static int f2fs_write_merkle_tree_block(struct inode *inode, const void *buf, + u64 index, int log_blocksize) +{ + loff_t pos = f2fs_verity_metadata_pos(inode) + (index << log_blocksize); + + return pagecache_write(inode, buf, 1 << log_blocksize, pos); +} + +const struct fsverity_operations f2fs_verityops = { + .begin_enable_verity = f2fs_begin_enable_verity, + .end_enable_verity = f2fs_end_enable_verity, + .get_verity_descriptor = f2fs_get_verity_descriptor, + .read_merkle_tree_page = f2fs_read_merkle_tree_page, + .write_merkle_tree_block = f2fs_write_merkle_tree_block, +}; diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index a90920e2f949..de0c600b9cab 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -34,8 +34,10 @@ #define F2FS_XATTR_INDEX_ADVISE 7 /* Should be same as EXT4_XATTR_INDEX_ENCRYPTION */ #define F2FS_XATTR_INDEX_ENCRYPTION 9 +#define F2FS_XATTR_INDEX_VERITY 11 #define F2FS_XATTR_NAME_ENCRYPTION_CONTEXT "c" +#define F2FS_XATTR_NAME_VERITY "v" struct f2fs_xattr_header { __le32 h_magic; /* magic number for identification */ diff --git a/fs/verity/Kconfig b/fs/verity/Kconfig new file mode 100644 index 000000000000..88fb25119899 --- /dev/null +++ b/fs/verity/Kconfig @@ -0,0 +1,55 @@ +# SPDX-License-Identifier: GPL-2.0 + +config FS_VERITY + bool "FS Verity (read-only file-based authenticity protection)" + select CRYPTO + # SHA-256 is selected as it's intended to be the default hash algorithm. + # To avoid bloat, other wanted algorithms must be selected explicitly. + select CRYPTO_SHA256 + help + This option enables fs-verity. fs-verity is the dm-verity + mechanism implemented at the file level. On supported + filesystems (currently EXT4 and F2FS), userspace can use an + ioctl to enable verity for a file, which causes the filesystem + to build a Merkle tree for the file. The filesystem will then + transparently verify any data read from the file against the + Merkle tree. The file is also made read-only. + + This serves as an integrity check, but the availability of the + Merkle tree root hash also allows efficiently supporting + various use cases where normally the whole file would need to + be hashed at once, such as: (a) auditing (logging the file's + hash), or (b) authenticity verification (comparing the hash + against a known good value, e.g. from a digital signature). + + fs-verity is especially useful on large files where not all + the contents may actually be needed. Also, fs-verity verifies + data each time it is paged back in, which provides better + protection against malicious disks vs. an ahead-of-time hash. + + If unsure, say N. + +config FS_VERITY_DEBUG + bool "FS Verity debugging" + depends on FS_VERITY + help + Enable debugging messages related to fs-verity by default. + + Say N unless you are an fs-verity developer. + +config FS_VERITY_BUILTIN_SIGNATURES + bool "FS Verity builtin signature support" + depends on FS_VERITY + select SYSTEM_DATA_VERIFICATION + help + Support verifying signatures of verity files against the X.509 + certificates that have been loaded into the ".fs-verity" + kernel keyring. + + This is meant as a relatively simple mechanism that can be + used to provide an authenticity guarantee for verity files, as + an alternative to IMA appraisal. Userspace programs still + need to check that the verity bit is set in order to get an + authenticity guarantee. + + If unsure, say N. diff --git a/fs/verity/Makefile b/fs/verity/Makefile new file mode 100644 index 000000000000..570e9136334d --- /dev/null +++ b/fs/verity/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_FS_VERITY) += enable.o \ + hash_algs.o \ + init.o \ + measure.o \ + open.o \ + verify.o + +obj-$(CONFIG_FS_VERITY_BUILTIN_SIGNATURES) += signature.o diff --git a/fs/verity/enable.c b/fs/verity/enable.c new file mode 100644 index 000000000000..eabc6ac19906 --- /dev/null +++ b/fs/verity/enable.c @@ -0,0 +1,377 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/verity/enable.c: ioctl to enable verity on a file + * + * Copyright 2019 Google LLC + */ + +#include "fsverity_private.h" + +#include +#include +#include +#include +#include + +static int build_merkle_tree_level(struct inode *inode, unsigned int level, + u64 num_blocks_to_hash, + const struct merkle_tree_params *params, + u8 *pending_hashes, + struct ahash_request *req) +{ + const struct fsverity_operations *vops = inode->i_sb->s_vop; + unsigned int pending_size = 0; + u64 dst_block_num; + u64 i; + int err; + + if (WARN_ON(params->block_size != PAGE_SIZE)) /* checked earlier too */ + return -EINVAL; + + if (level < params->num_levels) { + dst_block_num = params->level_start[level]; + } else { + if (WARN_ON(num_blocks_to_hash != 1)) + return -EINVAL; + dst_block_num = 0; /* unused */ + } + + for (i = 0; i < num_blocks_to_hash; i++) { + struct page *src_page; + + if ((pgoff_t)i % 10000 == 0 || i + 1 == num_blocks_to_hash) + pr_debug("Hashing block %llu of %llu for level %u\n", + i + 1, num_blocks_to_hash, level); + + if (level == 0) { + /* Leaf: hashing a data block */ + src_page = read_mapping_page(inode->i_mapping, i, NULL); + if (IS_ERR(src_page)) { + err = PTR_ERR(src_page); + fsverity_err(inode, + "Error %d reading data page %llu", + err, i); + return err; + } + } else { + /* Non-leaf: hashing hash block from level below */ + src_page = vops->read_merkle_tree_page(inode, + params->level_start[level - 1] + i); + if (IS_ERR(src_page)) { + err = PTR_ERR(src_page); + fsverity_err(inode, + "Error %d reading Merkle tree page %llu", + err, params->level_start[level - 1] + i); + return err; + } + } + + err = fsverity_hash_page(params, inode, req, src_page, + &pending_hashes[pending_size]); + put_page(src_page); + if (err) + return err; + pending_size += params->digest_size; + + if (level == params->num_levels) /* Root hash? */ + return 0; + + if (pending_size + params->digest_size > params->block_size || + i + 1 == num_blocks_to_hash) { + /* Flush the pending hash block */ + memset(&pending_hashes[pending_size], 0, + params->block_size - pending_size); + err = vops->write_merkle_tree_block(inode, + pending_hashes, + dst_block_num, + params->log_blocksize); + if (err) { + fsverity_err(inode, + "Error %d writing Merkle tree block %llu", + err, dst_block_num); + return err; + } + dst_block_num++; + pending_size = 0; + } + + if (fatal_signal_pending(current)) + return -EINTR; + cond_resched(); + } + return 0; +} + +/* + * Build the Merkle tree for the given inode using the given parameters, and + * return the root hash in @root_hash. + * + * The tree is written to a filesystem-specific location as determined by the + * ->write_merkle_tree_block() method. However, the blocks that comprise the + * tree are the same for all filesystems. + */ +static int build_merkle_tree(struct inode *inode, + const struct merkle_tree_params *params, + u8 *root_hash) +{ + u8 *pending_hashes; + struct ahash_request *req; + u64 blocks; + unsigned int level; + int err = -ENOMEM; + + if (inode->i_size == 0) { + /* Empty file is a special case; root hash is all 0's */ + memset(root_hash, 0, params->digest_size); + return 0; + } + + pending_hashes = kmalloc(params->block_size, GFP_KERNEL); + req = ahash_request_alloc(params->hash_alg->tfm, GFP_KERNEL); + if (!pending_hashes || !req) + goto out; + + /* + * Build each level of the Merkle tree, starting at the leaf level + * (level 0) and ascending to the root node (level 'num_levels - 1'). + * Then at the end (level 'num_levels'), calculate the root hash. + */ + blocks = (inode->i_size + params->block_size - 1) >> + params->log_blocksize; + for (level = 0; level <= params->num_levels; level++) { + err = build_merkle_tree_level(inode, level, blocks, params, + pending_hashes, req); + if (err) + goto out; + blocks = (blocks + params->hashes_per_block - 1) >> + params->log_arity; + } + memcpy(root_hash, pending_hashes, params->digest_size); + err = 0; +out: + kfree(pending_hashes); + ahash_request_free(req); + return err; +} + +static int enable_verity(struct file *filp, + const struct fsverity_enable_arg *arg) +{ + struct inode *inode = file_inode(filp); + const struct fsverity_operations *vops = inode->i_sb->s_vop; + struct merkle_tree_params params = { }; + struct fsverity_descriptor *desc; + size_t desc_size = sizeof(*desc) + arg->sig_size; + struct fsverity_info *vi; + int err; + + /* Start initializing the fsverity_descriptor */ + desc = kzalloc(desc_size, GFP_KERNEL); + if (!desc) + return -ENOMEM; + desc->version = 1; + desc->hash_algorithm = arg->hash_algorithm; + desc->log_blocksize = ilog2(arg->block_size); + + /* Get the salt if the user provided one */ + if (arg->salt_size && + copy_from_user(desc->salt, + (const u8 __user *)(uintptr_t)arg->salt_ptr, + arg->salt_size)) { + err = -EFAULT; + goto out; + } + desc->salt_size = arg->salt_size; + + /* Get the signature if the user provided one */ + if (arg->sig_size && + copy_from_user(desc->signature, + (const u8 __user *)(uintptr_t)arg->sig_ptr, + arg->sig_size)) { + err = -EFAULT; + goto out; + } + desc->sig_size = cpu_to_le32(arg->sig_size); + + desc->data_size = cpu_to_le64(inode->i_size); + + /* Prepare the Merkle tree parameters */ + err = fsverity_init_merkle_tree_params(¶ms, inode, + arg->hash_algorithm, + desc->log_blocksize, + desc->salt, desc->salt_size); + if (err) + goto out; + + /* + * Start enabling verity on this file, serialized by the inode lock. + * Fail if verity is already enabled or is already being enabled. + */ + inode_lock(inode); + if (IS_VERITY(inode)) + err = -EEXIST; + else + err = vops->begin_enable_verity(filp); + inode_unlock(inode); + if (err) + goto out; + + /* + * Build the Merkle tree. Don't hold the inode lock during this, since + * on huge files this may take a very long time and we don't want to + * force unrelated syscalls like chown() to block forever. We don't + * need the inode lock here because deny_write_access() already prevents + * the file from being written to or truncated, and we still serialize + * ->begin_enable_verity() and ->end_enable_verity() using the inode + * lock and only allow one process to be here at a time on a given file. + */ + pr_debug("Building Merkle tree...\n"); + BUILD_BUG_ON(sizeof(desc->root_hash) < FS_VERITY_MAX_DIGEST_SIZE); + err = build_merkle_tree(inode, ¶ms, desc->root_hash); + if (err) { + fsverity_err(inode, "Error %d building Merkle tree", err); + goto rollback; + } + pr_debug("Done building Merkle tree. Root hash is %s:%*phN\n", + params.hash_alg->name, params.digest_size, desc->root_hash); + + /* + * Create the fsverity_info. Don't bother trying to save work by + * reusing the merkle_tree_params from above. Instead, just create the + * fsverity_info from the fsverity_descriptor as if it were just loaded + * from disk. This is simpler, and it serves as an extra check that the + * metadata we're writing is valid before actually enabling verity. + */ + vi = fsverity_create_info(inode, desc, desc_size); + if (IS_ERR(vi)) { + err = PTR_ERR(vi); + goto rollback; + } + + if (arg->sig_size) + pr_debug("Storing a %u-byte PKCS#7 signature alongside the file\n", + arg->sig_size); + + /* + * Tell the filesystem to finish enabling verity on the file. + * Serialized with ->begin_enable_verity() by the inode lock. + */ + inode_lock(inode); + err = vops->end_enable_verity(filp, desc, desc_size, params.tree_size); + inode_unlock(inode); + if (err) { + fsverity_err(inode, "%ps() failed with err %d", + vops->end_enable_verity, err); + fsverity_free_info(vi); + } else if (WARN_ON(!IS_VERITY(inode))) { + err = -EINVAL; + fsverity_free_info(vi); + } else { + /* Successfully enabled verity */ + + /* + * Readers can start using ->i_verity_info immediately, so it + * can't be rolled back once set. So don't set it until just + * after the filesystem has successfully enabled verity. + */ + fsverity_set_info(inode, vi); + } +out: + kfree(params.hashstate); + kfree(desc); + return err; + +rollback: + inode_lock(inode); + (void)vops->end_enable_verity(filp, NULL, 0, params.tree_size); + inode_unlock(inode); + goto out; +} + +/** + * fsverity_ioctl_enable() - enable verity on a file + * + * Enable fs-verity on a file. See the "FS_IOC_ENABLE_VERITY" section of + * Documentation/filesystems/fsverity.rst for the documentation. + * + * Return: 0 on success, -errno on failure + */ +int fsverity_ioctl_enable(struct file *filp, const void __user *uarg) +{ + struct inode *inode = file_inode(filp); + struct fsverity_enable_arg arg; + int err; + + if (copy_from_user(&arg, uarg, sizeof(arg))) + return -EFAULT; + + if (arg.version != 1) + return -EINVAL; + + if (arg.__reserved1 || + memchr_inv(arg.__reserved2, 0, sizeof(arg.__reserved2))) + return -EINVAL; + + if (arg.block_size != PAGE_SIZE) + return -EINVAL; + + if (arg.salt_size > FIELD_SIZEOF(struct fsverity_descriptor, salt)) + return -EMSGSIZE; + + if (arg.sig_size > FS_VERITY_MAX_SIGNATURE_SIZE) + return -EMSGSIZE; + + /* + * Require a regular file with write access. But the actual fd must + * still be readonly so that we can lock out all writers. This is + * needed to guarantee that no writable fds exist to the file once it + * has verity enabled, and to stabilize the data being hashed. + */ + + err = inode_permission(inode, MAY_WRITE); + if (err) + return err; + + if (IS_APPEND(inode)) + return -EPERM; + + if (S_ISDIR(inode->i_mode)) + return -EISDIR; + + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + + err = mnt_want_write_file(filp); + if (err) /* -EROFS */ + return err; + + err = deny_write_access(filp); + if (err) /* -ETXTBSY */ + goto out_drop_write; + + err = enable_verity(filp, &arg); + if (err) + goto out_allow_write_access; + + /* + * Some pages of the file may have been evicted from pagecache after + * being used in the Merkle tree construction, then read into pagecache + * again by another process reading from the file concurrently. Since + * these pages didn't undergo verification against the file measurement + * which fs-verity now claims to be enforcing, we have to wipe the + * pagecache to ensure that all future reads are verified. + */ + filemap_write_and_wait(inode->i_mapping); + invalidate_inode_pages2(inode->i_mapping); + + /* + * allow_write_access() is needed to pair with deny_write_access(). + * Regardless, the filesystem won't allow writing to verity files. + */ +out_allow_write_access: + allow_write_access(filp); +out_drop_write: + mnt_drop_write_file(filp); + return err; +} +EXPORT_SYMBOL_GPL(fsverity_ioctl_enable); diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h new file mode 100644 index 000000000000..e74c79b64d88 --- /dev/null +++ b/fs/verity/fsverity_private.h @@ -0,0 +1,185 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs-verity: read-only file-based authenticity protection + * + * Copyright 2019 Google LLC + */ + +#ifndef _FSVERITY_PRIVATE_H +#define _FSVERITY_PRIVATE_H + +#ifdef CONFIG_FS_VERITY_DEBUG +#define DEBUG +#endif + +#define pr_fmt(fmt) "fs-verity: " fmt + +#include +#include + +struct ahash_request; + +/* + * Implementation limit: maximum depth of the Merkle tree. For now 8 is plenty; + * it's enough for over U64_MAX bytes of data using SHA-256 and 4K blocks. + */ +#define FS_VERITY_MAX_LEVELS 8 + +/* + * Largest digest size among all hash algorithms supported by fs-verity. + * Currently assumed to be <= size of fsverity_descriptor::root_hash. + */ +#define FS_VERITY_MAX_DIGEST_SIZE SHA512_DIGEST_SIZE + +/* A hash algorithm supported by fs-verity */ +struct fsverity_hash_alg { + struct crypto_ahash *tfm; /* hash tfm, allocated on demand */ + const char *name; /* crypto API name, e.g. sha256 */ + unsigned int digest_size; /* digest size in bytes, e.g. 32 for SHA-256 */ + unsigned int block_size; /* block size in bytes, e.g. 64 for SHA-256 */ +}; + +/* Merkle tree parameters: hash algorithm, initial hash state, and topology */ +struct merkle_tree_params { + const struct fsverity_hash_alg *hash_alg; /* the hash algorithm */ + const u8 *hashstate; /* initial hash state or NULL */ + unsigned int digest_size; /* same as hash_alg->digest_size */ + unsigned int block_size; /* size of data and tree blocks */ + unsigned int hashes_per_block; /* number of hashes per tree block */ + unsigned int log_blocksize; /* log2(block_size) */ + unsigned int log_arity; /* log2(hashes_per_block) */ + unsigned int num_levels; /* number of levels in Merkle tree */ + u64 tree_size; /* Merkle tree size in bytes */ + + /* + * Starting block index for each tree level, ordered from leaf level (0) + * to root level ('num_levels - 1') + */ + u64 level_start[FS_VERITY_MAX_LEVELS]; +}; + +/** + * fsverity_info - cached verity metadata for an inode + * + * When a verity file is first opened, an instance of this struct is allocated + * and stored in ->i_verity_info; it remains until the inode is evicted. It + * caches information about the Merkle tree that's needed to efficiently verify + * data read from the file. It also caches the file measurement. The Merkle + * tree pages themselves are not cached here, but the filesystem may cache them. + */ +struct fsverity_info { + struct merkle_tree_params tree_params; + u8 root_hash[FS_VERITY_MAX_DIGEST_SIZE]; + u8 measurement[FS_VERITY_MAX_DIGEST_SIZE]; + const struct inode *inode; +}; + +/* + * Merkle tree properties. The file measurement is the hash of this structure + * excluding the signature and with the sig_size field set to 0. + */ +struct fsverity_descriptor { + __u8 version; /* must be 1 */ + __u8 hash_algorithm; /* Merkle tree hash algorithm */ + __u8 log_blocksize; /* log2 of size of data and tree blocks */ + __u8 salt_size; /* size of salt in bytes; 0 if none */ + __le32 sig_size; /* size of signature in bytes; 0 if none */ + __le64 data_size; /* size of file the Merkle tree is built over */ + __u8 root_hash[64]; /* Merkle tree root hash */ + __u8 salt[32]; /* salt prepended to each hashed block */ + __u8 __reserved[144]; /* must be 0's */ + __u8 signature[]; /* optional PKCS#7 signature */ +}; + +/* Arbitrary limit to bound the kmalloc() size. Can be changed. */ +#define FS_VERITY_MAX_DESCRIPTOR_SIZE 16384 + +#define FS_VERITY_MAX_SIGNATURE_SIZE (FS_VERITY_MAX_DESCRIPTOR_SIZE - \ + sizeof(struct fsverity_descriptor)) + +/* + * Format in which verity file measurements are signed. This is the same as + * 'struct fsverity_digest', except here some magic bytes are prepended to + * provide some context about what is being signed in case the same key is used + * for non-fsverity purposes, and here the fields have fixed endianness. + */ +struct fsverity_signed_digest { + char magic[8]; /* must be "FSVerity" */ + __le16 digest_algorithm; + __le16 digest_size; + __u8 digest[]; +}; + +/* hash_algs.c */ + +extern struct fsverity_hash_alg fsverity_hash_algs[]; + +const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, + unsigned int num); +const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg, + const u8 *salt, size_t salt_size); +int fsverity_hash_page(const struct merkle_tree_params *params, + const struct inode *inode, + struct ahash_request *req, struct page *page, u8 *out); +int fsverity_hash_buffer(const struct fsverity_hash_alg *alg, + const void *data, size_t size, u8 *out); +void __init fsverity_check_hash_algs(void); + +/* init.c */ + +extern void __printf(3, 4) __cold +fsverity_msg(const struct inode *inode, const char *level, + const char *fmt, ...); + +#define fsverity_warn(inode, fmt, ...) \ + fsverity_msg((inode), KERN_WARNING, fmt, ##__VA_ARGS__) +#define fsverity_err(inode, fmt, ...) \ + fsverity_msg((inode), KERN_ERR, fmt, ##__VA_ARGS__) + +/* open.c */ + +int fsverity_init_merkle_tree_params(struct merkle_tree_params *params, + const struct inode *inode, + unsigned int hash_algorithm, + unsigned int log_blocksize, + const u8 *salt, size_t salt_size); + +struct fsverity_info *fsverity_create_info(const struct inode *inode, + void *desc, size_t desc_size); + +void fsverity_set_info(struct inode *inode, struct fsverity_info *vi); + +void fsverity_free_info(struct fsverity_info *vi); + +int __init fsverity_init_info_cache(void); +void __init fsverity_exit_info_cache(void); + +/* signature.c */ + +#ifdef CONFIG_FS_VERITY_BUILTIN_SIGNATURES +int fsverity_verify_signature(const struct fsverity_info *vi, + const struct fsverity_descriptor *desc, + size_t desc_size); + +int __init fsverity_init_signature(void); +#else /* !CONFIG_FS_VERITY_BUILTIN_SIGNATURES */ +static inline int +fsverity_verify_signature(const struct fsverity_info *vi, + const struct fsverity_descriptor *desc, + size_t desc_size) +{ + return 0; +} + +static inline int fsverity_init_signature(void) +{ + return 0; +} +#endif /* !CONFIG_FS_VERITY_BUILTIN_SIGNATURES */ + +/* verify.c */ + +int __init fsverity_init_workqueue(void); +void __init fsverity_exit_workqueue(void); + +#endif /* _FSVERITY_PRIVATE_H */ diff --git a/fs/verity/hash_algs.c b/fs/verity/hash_algs.c new file mode 100644 index 000000000000..31e6d7d2389a --- /dev/null +++ b/fs/verity/hash_algs.c @@ -0,0 +1,280 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/verity/hash_algs.c: fs-verity hash algorithms + * + * Copyright 2019 Google LLC + */ + +#include "fsverity_private.h" + +#include +#include + +/* The hash algorithms supported by fs-verity */ +struct fsverity_hash_alg fsverity_hash_algs[] = { + [FS_VERITY_HASH_ALG_SHA256] = { + .name = "sha256", + .digest_size = SHA256_DIGEST_SIZE, + .block_size = SHA256_BLOCK_SIZE, + }, + [FS_VERITY_HASH_ALG_SHA512] = { + .name = "sha512", + .digest_size = SHA512_DIGEST_SIZE, + .block_size = SHA512_BLOCK_SIZE, + }, +}; + +/** + * fsverity_get_hash_alg() - validate and prepare a hash algorithm + * @inode: optional inode for logging purposes + * @num: the hash algorithm number + * + * Get the struct fsverity_hash_alg for the given hash algorithm number, and + * ensure it has a hash transform ready to go. The hash transforms are + * allocated on-demand so that we don't waste resources unnecessarily, and + * because the crypto modules may be initialized later than fs/verity/. + * + * Return: pointer to the hash alg on success, else an ERR_PTR() + */ +const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, + unsigned int num) +{ + struct fsverity_hash_alg *alg; + struct crypto_ahash *tfm; + int err; + + if (num >= ARRAY_SIZE(fsverity_hash_algs) || + !fsverity_hash_algs[num].name) { + fsverity_warn(inode, "Unknown hash algorithm number: %u", num); + return ERR_PTR(-EINVAL); + } + alg = &fsverity_hash_algs[num]; + + /* pairs with cmpxchg() below */ + tfm = READ_ONCE(alg->tfm); + if (likely(tfm != NULL)) + return alg; + /* + * Using the shash API would make things a bit simpler, but the ahash + * API is preferable as it allows the use of crypto accelerators. + */ + tfm = crypto_alloc_ahash(alg->name, 0, 0); + if (IS_ERR(tfm)) { + if (PTR_ERR(tfm) == -ENOENT) { + fsverity_warn(inode, + "Missing crypto API support for hash algorithm \"%s\"", + alg->name); + return ERR_PTR(-ENOPKG); + } + fsverity_err(inode, + "Error allocating hash algorithm \"%s\": %ld", + alg->name, PTR_ERR(tfm)); + return ERR_CAST(tfm); + } + + err = -EINVAL; + if (WARN_ON(alg->digest_size != crypto_ahash_digestsize(tfm))) + goto err_free_tfm; + if (WARN_ON(alg->block_size != crypto_ahash_blocksize(tfm))) + goto err_free_tfm; + + pr_info("%s using implementation \"%s\"\n", + alg->name, crypto_ahash_driver_name(tfm)); + + /* pairs with READ_ONCE() above */ + if (cmpxchg(&alg->tfm, NULL, tfm) != NULL) + crypto_free_ahash(tfm); + + return alg; + +err_free_tfm: + crypto_free_ahash(tfm); + return ERR_PTR(err); +} + +/** + * fsverity_prepare_hash_state() - precompute the initial hash state + * @alg: hash algorithm + * @salt: a salt which is to be prepended to all data to be hashed + * @salt_size: salt size in bytes, possibly 0 + * + * Return: NULL if the salt is empty, otherwise the kmalloc()'ed precomputed + * initial hash state on success or an ERR_PTR() on failure. + */ +const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg, + const u8 *salt, size_t salt_size) +{ + u8 *hashstate = NULL; + struct ahash_request *req = NULL; + u8 *padded_salt = NULL; + size_t padded_salt_size; + struct scatterlist sg; + DECLARE_CRYPTO_WAIT(wait); + int err; + + if (salt_size == 0) + return NULL; + + hashstate = kmalloc(crypto_ahash_statesize(alg->tfm), GFP_KERNEL); + if (!hashstate) + return ERR_PTR(-ENOMEM); + + req = ahash_request_alloc(alg->tfm, GFP_KERNEL); + if (!req) { + err = -ENOMEM; + goto err_free; + } + + /* + * Zero-pad the salt to the next multiple of the input size of the hash + * algorithm's compression function, e.g. 64 bytes for SHA-256 or 128 + * bytes for SHA-512. This ensures that the hash algorithm won't have + * any bytes buffered internally after processing the salt, thus making + * salted hashing just as fast as unsalted hashing. + */ + padded_salt_size = round_up(salt_size, alg->block_size); + padded_salt = kzalloc(padded_salt_size, GFP_KERNEL); + if (!padded_salt) { + err = -ENOMEM; + goto err_free; + } + memcpy(padded_salt, salt, salt_size); + + sg_init_one(&sg, padded_salt, padded_salt_size); + ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP | + CRYPTO_TFM_REQ_MAY_BACKLOG, + crypto_req_done, &wait); + ahash_request_set_crypt(req, &sg, NULL, padded_salt_size); + + err = crypto_wait_req(crypto_ahash_init(req), &wait); + if (err) + goto err_free; + + err = crypto_wait_req(crypto_ahash_update(req), &wait); + if (err) + goto err_free; + + err = crypto_ahash_export(req, hashstate); + if (err) + goto err_free; +out: + ahash_request_free(req); + kfree(padded_salt); + return hashstate; + +err_free: + kfree(hashstate); + hashstate = ERR_PTR(err); + goto out; +} + +/** + * fsverity_hash_page() - hash a single data or hash page + * @params: the Merkle tree's parameters + * @inode: inode for which the hashing is being done + * @req: preallocated hash request + * @page: the page to hash + * @out: output digest, size 'params->digest_size' bytes + * + * Hash a single data or hash block, assuming block_size == PAGE_SIZE. + * The hash is salted if a salt is specified in the Merkle tree parameters. + * + * Return: 0 on success, -errno on failure + */ +int fsverity_hash_page(const struct merkle_tree_params *params, + const struct inode *inode, + struct ahash_request *req, struct page *page, u8 *out) +{ + struct scatterlist sg; + DECLARE_CRYPTO_WAIT(wait); + int err; + + if (WARN_ON(params->block_size != PAGE_SIZE)) + return -EINVAL; + + sg_init_table(&sg, 1); + sg_set_page(&sg, page, PAGE_SIZE, 0); + ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP | + CRYPTO_TFM_REQ_MAY_BACKLOG, + crypto_req_done, &wait); + ahash_request_set_crypt(req, &sg, out, PAGE_SIZE); + + if (params->hashstate) { + err = crypto_ahash_import(req, params->hashstate); + if (err) { + fsverity_err(inode, + "Error %d importing hash state", err); + return err; + } + err = crypto_ahash_finup(req); + } else { + err = crypto_ahash_digest(req); + } + + err = crypto_wait_req(err, &wait); + if (err) + fsverity_err(inode, "Error %d computing page hash", err); + return err; +} + +/** + * fsverity_hash_buffer() - hash some data + * @alg: the hash algorithm to use + * @data: the data to hash + * @size: size of data to hash, in bytes + * @out: output digest, size 'alg->digest_size' bytes + * + * Hash some data which is located in physically contiguous memory (i.e. memory + * allocated by kmalloc(), not by vmalloc()). No salt is used. + * + * Return: 0 on success, -errno on failure + */ +int fsverity_hash_buffer(const struct fsverity_hash_alg *alg, + const void *data, size_t size, u8 *out) +{ + struct ahash_request *req; + struct scatterlist sg; + DECLARE_CRYPTO_WAIT(wait); + int err; + + req = ahash_request_alloc(alg->tfm, GFP_KERNEL); + if (!req) + return -ENOMEM; + + sg_init_one(&sg, data, size); + ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP | + CRYPTO_TFM_REQ_MAY_BACKLOG, + crypto_req_done, &wait); + ahash_request_set_crypt(req, &sg, out, size); + + err = crypto_wait_req(crypto_ahash_digest(req), &wait); + + ahash_request_free(req); + return err; +} + +void __init fsverity_check_hash_algs(void) +{ + size_t i; + + /* + * Sanity check the hash algorithms (could be a build-time check, but + * they're in an array) + */ + for (i = 0; i < ARRAY_SIZE(fsverity_hash_algs); i++) { + const struct fsverity_hash_alg *alg = &fsverity_hash_algs[i]; + + if (!alg->name) + continue; + + BUG_ON(alg->digest_size > FS_VERITY_MAX_DIGEST_SIZE); + + /* + * For efficiency, the implementation currently assumes the + * digest and block sizes are powers of 2. This limitation can + * be lifted if the code is updated to handle other values. + */ + BUG_ON(!is_power_of_2(alg->digest_size)); + BUG_ON(!is_power_of_2(alg->block_size)); + } +} diff --git a/fs/verity/init.c b/fs/verity/init.c new file mode 100644 index 000000000000..94c104e00861 --- /dev/null +++ b/fs/verity/init.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/verity/init.c: fs-verity module initialization and logging + * + * Copyright 2019 Google LLC + */ + +#include "fsverity_private.h" + +#include + +void fsverity_msg(const struct inode *inode, const char *level, + const char *fmt, ...) +{ + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + struct va_format vaf; + va_list args; + + if (!__ratelimit(&rs)) + return; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + if (inode) + printk("%sfs-verity (%s, inode %lu): %pV\n", + level, inode->i_sb->s_id, inode->i_ino, &vaf); + else + printk("%sfs-verity: %pV\n", level, &vaf); + va_end(args); +} + +static int __init fsverity_init(void) +{ + int err; + + fsverity_check_hash_algs(); + + err = fsverity_init_info_cache(); + if (err) + return err; + + err = fsverity_init_workqueue(); + if (err) + goto err_exit_info_cache; + + err = fsverity_init_signature(); + if (err) + goto err_exit_workqueue; + + pr_debug("Initialized fs-verity\n"); + return 0; + +err_exit_workqueue: + fsverity_exit_workqueue(); +err_exit_info_cache: + fsverity_exit_info_cache(); + return err; +} +late_initcall(fsverity_init) diff --git a/fs/verity/measure.c b/fs/verity/measure.c new file mode 100644 index 000000000000..05049b68c745 --- /dev/null +++ b/fs/verity/measure.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/verity/measure.c: ioctl to get a verity file's measurement + * + * Copyright 2019 Google LLC + */ + +#include "fsverity_private.h" + +#include + +/** + * fsverity_ioctl_measure() - get a verity file's measurement + * + * Retrieve the file measurement that the kernel is enforcing for reads from a + * verity file. See the "FS_IOC_MEASURE_VERITY" section of + * Documentation/filesystems/fsverity.rst for the documentation. + * + * Return: 0 on success, -errno on failure + */ +int fsverity_ioctl_measure(struct file *filp, void __user *_uarg) +{ + const struct inode *inode = file_inode(filp); + struct fsverity_digest __user *uarg = _uarg; + const struct fsverity_info *vi; + const struct fsverity_hash_alg *hash_alg; + struct fsverity_digest arg; + + vi = fsverity_get_info(inode); + if (!vi) + return -ENODATA; /* not a verity file */ + hash_alg = vi->tree_params.hash_alg; + + /* + * The user specifies the digest_size their buffer has space for; we can + * return the digest if it fits in the available space. We write back + * the actual size, which may be shorter than the user-specified size. + */ + + if (get_user(arg.digest_size, &uarg->digest_size)) + return -EFAULT; + if (arg.digest_size < hash_alg->digest_size) + return -EOVERFLOW; + + memset(&arg, 0, sizeof(arg)); + arg.digest_algorithm = hash_alg - fsverity_hash_algs; + arg.digest_size = hash_alg->digest_size; + + if (copy_to_user(uarg, &arg, sizeof(arg))) + return -EFAULT; + + if (copy_to_user(uarg->digest, vi->measurement, hash_alg->digest_size)) + return -EFAULT; + + return 0; +} +EXPORT_SYMBOL_GPL(fsverity_ioctl_measure); diff --git a/fs/verity/open.c b/fs/verity/open.c new file mode 100644 index 000000000000..63d1004b688c --- /dev/null +++ b/fs/verity/open.c @@ -0,0 +1,356 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/verity/open.c: opening fs-verity files + * + * Copyright 2019 Google LLC + */ + +#include "fsverity_private.h" + +#include + +static struct kmem_cache *fsverity_info_cachep; + +/** + * fsverity_init_merkle_tree_params() - initialize Merkle tree parameters + * @params: the parameters struct to initialize + * @inode: the inode for which the Merkle tree is being built + * @hash_algorithm: number of hash algorithm to use + * @log_blocksize: log base 2 of block size to use + * @salt: pointer to salt (optional) + * @salt_size: size of salt, possibly 0 + * + * Validate the hash algorithm and block size, then compute the tree topology + * (num levels, num blocks in each level, etc.) and initialize @params. + * + * Return: 0 on success, -errno on failure + */ +int fsverity_init_merkle_tree_params(struct merkle_tree_params *params, + const struct inode *inode, + unsigned int hash_algorithm, + unsigned int log_blocksize, + const u8 *salt, size_t salt_size) +{ + const struct fsverity_hash_alg *hash_alg; + int err; + u64 blocks; + u64 offset; + int level; + + memset(params, 0, sizeof(*params)); + + hash_alg = fsverity_get_hash_alg(inode, hash_algorithm); + if (IS_ERR(hash_alg)) + return PTR_ERR(hash_alg); + params->hash_alg = hash_alg; + params->digest_size = hash_alg->digest_size; + + params->hashstate = fsverity_prepare_hash_state(hash_alg, salt, + salt_size); + if (IS_ERR(params->hashstate)) { + err = PTR_ERR(params->hashstate); + params->hashstate = NULL; + fsverity_err(inode, "Error %d preparing hash state", err); + goto out_err; + } + + if (log_blocksize != PAGE_SHIFT) { + fsverity_warn(inode, "Unsupported log_blocksize: %u", + log_blocksize); + err = -EINVAL; + goto out_err; + } + params->log_blocksize = log_blocksize; + params->block_size = 1 << log_blocksize; + + if (WARN_ON(!is_power_of_2(params->digest_size))) { + err = -EINVAL; + goto out_err; + } + if (params->block_size < 2 * params->digest_size) { + fsverity_warn(inode, + "Merkle tree block size (%u) too small for hash algorithm \"%s\"", + params->block_size, hash_alg->name); + err = -EINVAL; + goto out_err; + } + params->log_arity = params->log_blocksize - ilog2(params->digest_size); + params->hashes_per_block = 1 << params->log_arity; + + pr_debug("Merkle tree uses %s with %u-byte blocks (%u hashes/block), salt=%*phN\n", + hash_alg->name, params->block_size, params->hashes_per_block, + (int)salt_size, salt); + + /* + * Compute the number of levels in the Merkle tree and create a map from + * level to the starting block of that level. Level 'num_levels - 1' is + * the root and is stored first. Level 0 is the level directly "above" + * the data blocks and is stored last. + */ + + /* Compute number of levels and the number of blocks in each level */ + blocks = (inode->i_size + params->block_size - 1) >> log_blocksize; + pr_debug("Data is %lld bytes (%llu blocks)\n", inode->i_size, blocks); + while (blocks > 1) { + if (params->num_levels >= FS_VERITY_MAX_LEVELS) { + fsverity_err(inode, "Too many levels in Merkle tree"); + err = -EINVAL; + goto out_err; + } + blocks = (blocks + params->hashes_per_block - 1) >> + params->log_arity; + /* temporarily using level_start[] to store blocks in level */ + params->level_start[params->num_levels++] = blocks; + } + + /* Compute the starting block of each level */ + offset = 0; + for (level = (int)params->num_levels - 1; level >= 0; level--) { + blocks = params->level_start[level]; + params->level_start[level] = offset; + pr_debug("Level %d is %llu blocks starting at index %llu\n", + level, blocks, offset); + offset += blocks; + } + + params->tree_size = offset << log_blocksize; + return 0; + +out_err: + kfree(params->hashstate); + memset(params, 0, sizeof(*params)); + return err; +} + +/* + * Compute the file measurement by hashing the fsverity_descriptor excluding the + * signature and with the sig_size field set to 0. + */ +static int compute_file_measurement(const struct fsverity_hash_alg *hash_alg, + struct fsverity_descriptor *desc, + u8 *measurement) +{ + __le32 sig_size = desc->sig_size; + int err; + + desc->sig_size = 0; + err = fsverity_hash_buffer(hash_alg, desc, sizeof(*desc), measurement); + desc->sig_size = sig_size; + + return err; +} + +/* + * Validate the given fsverity_descriptor and create a new fsverity_info from + * it. The signature (if present) is also checked. + */ +struct fsverity_info *fsverity_create_info(const struct inode *inode, + void *_desc, size_t desc_size) +{ + struct fsverity_descriptor *desc = _desc; + struct fsverity_info *vi; + int err; + + if (desc_size < sizeof(*desc)) { + fsverity_err(inode, "Unrecognized descriptor size: %zu bytes", + desc_size); + return ERR_PTR(-EINVAL); + } + + if (desc->version != 1) { + fsverity_err(inode, "Unrecognized descriptor version: %u", + desc->version); + return ERR_PTR(-EINVAL); + } + + if (memchr_inv(desc->__reserved, 0, sizeof(desc->__reserved))) { + fsverity_err(inode, "Reserved bits set in descriptor"); + return ERR_PTR(-EINVAL); + } + + if (desc->salt_size > sizeof(desc->salt)) { + fsverity_err(inode, "Invalid salt_size: %u", desc->salt_size); + return ERR_PTR(-EINVAL); + } + + if (le64_to_cpu(desc->data_size) != inode->i_size) { + fsverity_err(inode, + "Wrong data_size: %llu (desc) != %lld (inode)", + le64_to_cpu(desc->data_size), inode->i_size); + return ERR_PTR(-EINVAL); + } + + vi = kmem_cache_zalloc(fsverity_info_cachep, GFP_KERNEL); + if (!vi) + return ERR_PTR(-ENOMEM); + vi->inode = inode; + + err = fsverity_init_merkle_tree_params(&vi->tree_params, inode, + desc->hash_algorithm, + desc->log_blocksize, + desc->salt, desc->salt_size); + if (err) { + fsverity_err(inode, + "Error %d initializing Merkle tree parameters", + err); + goto out; + } + + memcpy(vi->root_hash, desc->root_hash, vi->tree_params.digest_size); + + err = compute_file_measurement(vi->tree_params.hash_alg, desc, + vi->measurement); + if (err) { + fsverity_err(inode, "Error %d computing file measurement", err); + goto out; + } + pr_debug("Computed file measurement: %s:%*phN\n", + vi->tree_params.hash_alg->name, + vi->tree_params.digest_size, vi->measurement); + + err = fsverity_verify_signature(vi, desc, desc_size); +out: + if (err) { + fsverity_free_info(vi); + vi = ERR_PTR(err); + } + return vi; +} + +void fsverity_set_info(struct inode *inode, struct fsverity_info *vi) +{ + /* + * Multiple processes may race to set ->i_verity_info, so use cmpxchg. + * This pairs with the READ_ONCE() in fsverity_get_info(). + */ + if (cmpxchg(&inode->i_verity_info, NULL, vi) != NULL) + fsverity_free_info(vi); +} + +void fsverity_free_info(struct fsverity_info *vi) +{ + if (!vi) + return; + kfree(vi->tree_params.hashstate); + kmem_cache_free(fsverity_info_cachep, vi); +} + +/* Ensure the inode has an ->i_verity_info */ +static int ensure_verity_info(struct inode *inode) +{ + struct fsverity_info *vi = fsverity_get_info(inode); + struct fsverity_descriptor *desc; + int res; + + if (vi) + return 0; + + res = inode->i_sb->s_vop->get_verity_descriptor(inode, NULL, 0); + if (res < 0) { + fsverity_err(inode, + "Error %d getting verity descriptor size", res); + return res; + } + if (res > FS_VERITY_MAX_DESCRIPTOR_SIZE) { + fsverity_err(inode, "Verity descriptor is too large (%d bytes)", + res); + return -EMSGSIZE; + } + desc = kmalloc(res, GFP_KERNEL); + if (!desc) + return -ENOMEM; + res = inode->i_sb->s_vop->get_verity_descriptor(inode, desc, res); + if (res < 0) { + fsverity_err(inode, "Error %d reading verity descriptor", res); + goto out_free_desc; + } + + vi = fsverity_create_info(inode, desc, res); + if (IS_ERR(vi)) { + res = PTR_ERR(vi); + goto out_free_desc; + } + + fsverity_set_info(inode, vi); + res = 0; +out_free_desc: + kfree(desc); + return res; +} + +/** + * fsverity_file_open() - prepare to open a verity file + * @inode: the inode being opened + * @filp: the struct file being set up + * + * When opening a verity file, deny the open if it is for writing. Otherwise, + * set up the inode's ->i_verity_info if not already done. + * + * When combined with fscrypt, this must be called after fscrypt_file_open(). + * Otherwise, we won't have the key set up to decrypt the verity metadata. + * + * Return: 0 on success, -errno on failure + */ +int fsverity_file_open(struct inode *inode, struct file *filp) +{ + if (!IS_VERITY(inode)) + return 0; + + if (filp->f_mode & FMODE_WRITE) { + pr_debug("Denying opening verity file (ino %lu) for write\n", + inode->i_ino); + return -EPERM; + } + + return ensure_verity_info(inode); +} +EXPORT_SYMBOL_GPL(fsverity_file_open); + +/** + * fsverity_prepare_setattr() - prepare to change a verity inode's attributes + * @dentry: dentry through which the inode is being changed + * @attr: attributes to change + * + * Verity files are immutable, so deny truncates. This isn't covered by the + * open-time check because sys_truncate() takes a path, not a file descriptor. + * + * Return: 0 on success, -errno on failure + */ +int fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr) +{ + if (IS_VERITY(d_inode(dentry)) && (attr->ia_valid & ATTR_SIZE)) { + pr_debug("Denying truncate of verity file (ino %lu)\n", + d_inode(dentry)->i_ino); + return -EPERM; + } + return 0; +} +EXPORT_SYMBOL_GPL(fsverity_prepare_setattr); + +/** + * fsverity_cleanup_inode() - free the inode's verity info, if present + * + * Filesystems must call this on inode eviction to free ->i_verity_info. + */ +void fsverity_cleanup_inode(struct inode *inode) +{ + fsverity_free_info(inode->i_verity_info); + inode->i_verity_info = NULL; +} +EXPORT_SYMBOL_GPL(fsverity_cleanup_inode); + +int __init fsverity_init_info_cache(void) +{ + fsverity_info_cachep = KMEM_CACHE_USERCOPY(fsverity_info, + SLAB_RECLAIM_ACCOUNT, + measurement); + if (!fsverity_info_cachep) + return -ENOMEM; + return 0; +} + +void __init fsverity_exit_info_cache(void) +{ + kmem_cache_destroy(fsverity_info_cachep); + fsverity_info_cachep = NULL; +} diff --git a/fs/verity/signature.c b/fs/verity/signature.c new file mode 100644 index 000000000000..c8b255232de5 --- /dev/null +++ b/fs/verity/signature.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/verity/signature.c: verification of builtin signatures + * + * Copyright 2019 Google LLC + */ + +#include "fsverity_private.h" + +#include +#include +#include +#include + +/* + * /proc/sys/fs/verity/require_signatures + * If 1, all verity files must have a valid builtin signature. + */ +static int fsverity_require_signatures; + +/* + * Keyring that contains the trusted X.509 certificates. + * + * Only root (kuid=0) can modify this. Also, root may use + * keyctl_restrict_keyring() to prevent any more additions. + */ +static struct key *fsverity_keyring; + +/** + * fsverity_verify_signature() - check a verity file's signature + * + * If the file's fs-verity descriptor includes a signature of the file + * measurement, verify it against the certificates in the fs-verity keyring. + * + * Return: 0 on success (signature valid or not required); -errno on failure + */ +int fsverity_verify_signature(const struct fsverity_info *vi, + const struct fsverity_descriptor *desc, + size_t desc_size) +{ + const struct inode *inode = vi->inode; + const struct fsverity_hash_alg *hash_alg = vi->tree_params.hash_alg; + const u32 sig_size = le32_to_cpu(desc->sig_size); + struct fsverity_signed_digest *d; + int err; + + if (sig_size == 0) { + if (fsverity_require_signatures) { + fsverity_err(inode, + "require_signatures=1, rejecting unsigned file!"); + return -EPERM; + } + return 0; + } + + if (sig_size > desc_size - sizeof(*desc)) { + fsverity_err(inode, "Signature overflows verity descriptor"); + return -EBADMSG; + } + + d = kzalloc(sizeof(*d) + hash_alg->digest_size, GFP_KERNEL); + if (!d) + return -ENOMEM; + memcpy(d->magic, "FSVerity", 8); + d->digest_algorithm = cpu_to_le16(hash_alg - fsverity_hash_algs); + d->digest_size = cpu_to_le16(hash_alg->digest_size); + memcpy(d->digest, vi->measurement, hash_alg->digest_size); + + err = verify_pkcs7_signature(d, sizeof(*d) + hash_alg->digest_size, + desc->signature, sig_size, + fsverity_keyring, + VERIFYING_UNSPECIFIED_SIGNATURE, + NULL, NULL); + kfree(d); + + if (err) { + if (err == -ENOKEY) + fsverity_err(inode, + "File's signing cert isn't in the fs-verity keyring"); + else if (err == -EKEYREJECTED) + fsverity_err(inode, "Incorrect file signature"); + else if (err == -EBADMSG) + fsverity_err(inode, "Malformed file signature"); + else + fsverity_err(inode, "Error %d verifying file signature", + err); + return err; + } + + pr_debug("Valid signature for file measurement %s:%*phN\n", + hash_alg->name, hash_alg->digest_size, vi->measurement); + return 0; +} + +#ifdef CONFIG_SYSCTL +static struct ctl_table_header *fsverity_sysctl_header; + +static const struct ctl_path fsverity_sysctl_path[] = { + { .procname = "fs", }, + { .procname = "verity", }, + { } +}; + +static struct ctl_table fsverity_sysctl_table[] = { + { + .procname = "require_signatures", + .data = &fsverity_require_signatures, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { } +}; + +static int __init fsverity_sysctl_init(void) +{ + fsverity_sysctl_header = register_sysctl_paths(fsverity_sysctl_path, + fsverity_sysctl_table); + if (!fsverity_sysctl_header) { + pr_err("sysctl registration failed!\n"); + return -ENOMEM; + } + return 0; +} +#else /* !CONFIG_SYSCTL */ +static inline int __init fsverity_sysctl_init(void) +{ + return 0; +} +#endif /* !CONFIG_SYSCTL */ + +int __init fsverity_init_signature(void) +{ + struct key *ring; + int err; + + ring = keyring_alloc(".fs-verity", KUIDT_INIT(0), KGIDT_INIT(0), + current_cred(), KEY_POS_SEARCH | + KEY_USR_VIEW | KEY_USR_READ | KEY_USR_WRITE | + KEY_USR_SEARCH | KEY_USR_SETATTR, + KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL); + if (IS_ERR(ring)) + return PTR_ERR(ring); + + err = fsverity_sysctl_init(); + if (err) + goto err_put_ring; + + fsverity_keyring = ring; + return 0; + +err_put_ring: + key_put(ring); + return err; +} diff --git a/fs/verity/verify.c b/fs/verity/verify.c new file mode 100644 index 000000000000..3e8f2de44667 --- /dev/null +++ b/fs/verity/verify.c @@ -0,0 +1,281 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/verity/verify.c: data verification functions, i.e. hooks for ->readpages() + * + * Copyright 2019 Google LLC + */ + +#include "fsverity_private.h" + +#include +#include +#include + +static struct workqueue_struct *fsverity_read_workqueue; + +/** + * hash_at_level() - compute the location of the block's hash at the given level + * + * @params: (in) the Merkle tree parameters + * @dindex: (in) the index of the data block being verified + * @level: (in) the level of hash we want (0 is leaf level) + * @hindex: (out) the index of the hash block containing the wanted hash + * @hoffset: (out) the byte offset to the wanted hash within the hash block + */ +static void hash_at_level(const struct merkle_tree_params *params, + pgoff_t dindex, unsigned int level, pgoff_t *hindex, + unsigned int *hoffset) +{ + pgoff_t position; + + /* Offset of the hash within the level's region, in hashes */ + position = dindex >> (level * params->log_arity); + + /* Index of the hash block in the tree overall */ + *hindex = params->level_start[level] + (position >> params->log_arity); + + /* Offset of the wanted hash (in bytes) within the hash block */ + *hoffset = (position & ((1 << params->log_arity) - 1)) << + (params->log_blocksize - params->log_arity); +} + +/* Extract a hash from a hash page */ +static void extract_hash(struct page *hpage, unsigned int hoffset, + unsigned int hsize, u8 *out) +{ + void *virt = kmap_atomic(hpage); + + memcpy(out, virt + hoffset, hsize); + kunmap_atomic(virt); +} + +static inline int cmp_hashes(const struct fsverity_info *vi, + const u8 *want_hash, const u8 *real_hash, + pgoff_t index, int level) +{ + const unsigned int hsize = vi->tree_params.digest_size; + + if (memcmp(want_hash, real_hash, hsize) == 0) + return 0; + + fsverity_err(vi->inode, + "FILE CORRUPTED! index=%lu, level=%d, want_hash=%s:%*phN, real_hash=%s:%*phN", + index, level, + vi->tree_params.hash_alg->name, hsize, want_hash, + vi->tree_params.hash_alg->name, hsize, real_hash); + return -EBADMSG; +} + +/* + * Verify a single data page against the file's Merkle tree. + * + * In principle, we need to verify the entire path to the root node. However, + * for efficiency the filesystem may cache the hash pages. Therefore we need + * only ascend the tree until an already-verified page is seen, as indicated by + * the PageChecked bit being set; then verify the path to that page. + * + * This code currently only supports the case where the verity block size is + * equal to PAGE_SIZE. Doing otherwise would be possible but tricky, since we + * wouldn't be able to use the PageChecked bit. + * + * Note that multiple processes may race to verify a hash page and mark it + * Checked, but it doesn't matter; the result will be the same either way. + * + * Return: true if the page is valid, else false. + */ +static bool verify_page(struct inode *inode, const struct fsverity_info *vi, + struct ahash_request *req, struct page *data_page) +{ + const struct merkle_tree_params *params = &vi->tree_params; + const unsigned int hsize = params->digest_size; + const pgoff_t index = data_page->index; + int level; + u8 _want_hash[FS_VERITY_MAX_DIGEST_SIZE]; + const u8 *want_hash; + u8 real_hash[FS_VERITY_MAX_DIGEST_SIZE]; + struct page *hpages[FS_VERITY_MAX_LEVELS]; + unsigned int hoffsets[FS_VERITY_MAX_LEVELS]; + int err; + + if (WARN_ON_ONCE(!PageLocked(data_page) || PageUptodate(data_page))) + return false; + + pr_debug_ratelimited("Verifying data page %lu...\n", index); + + /* + * Starting at the leaf level, ascend the tree saving hash pages along + * the way until we find a verified hash page, indicated by PageChecked; + * or until we reach the root. + */ + for (level = 0; level < params->num_levels; level++) { + pgoff_t hindex; + unsigned int hoffset; + struct page *hpage; + + hash_at_level(params, index, level, &hindex, &hoffset); + + pr_debug_ratelimited("Level %d: hindex=%lu, hoffset=%u\n", + level, hindex, hoffset); + + hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode, + hindex); + if (IS_ERR(hpage)) { + err = PTR_ERR(hpage); + fsverity_err(inode, + "Error %d reading Merkle tree page %lu", + err, hindex); + goto out; + } + + if (PageChecked(hpage)) { + extract_hash(hpage, hoffset, hsize, _want_hash); + want_hash = _want_hash; + put_page(hpage); + pr_debug_ratelimited("Hash page already checked, want %s:%*phN\n", + params->hash_alg->name, + hsize, want_hash); + goto descend; + } + pr_debug_ratelimited("Hash page not yet checked\n"); + hpages[level] = hpage; + hoffsets[level] = hoffset; + } + + want_hash = vi->root_hash; + pr_debug("Want root hash: %s:%*phN\n", + params->hash_alg->name, hsize, want_hash); +descend: + /* Descend the tree verifying hash pages */ + for (; level > 0; level--) { + struct page *hpage = hpages[level - 1]; + unsigned int hoffset = hoffsets[level - 1]; + + err = fsverity_hash_page(params, inode, req, hpage, real_hash); + if (err) + goto out; + err = cmp_hashes(vi, want_hash, real_hash, index, level - 1); + if (err) + goto out; + SetPageChecked(hpage); + extract_hash(hpage, hoffset, hsize, _want_hash); + want_hash = _want_hash; + put_page(hpage); + pr_debug("Verified hash page at level %d, now want %s:%*phN\n", + level - 1, params->hash_alg->name, hsize, want_hash); + } + + /* Finally, verify the data page */ + err = fsverity_hash_page(params, inode, req, data_page, real_hash); + if (err) + goto out; + err = cmp_hashes(vi, want_hash, real_hash, index, -1); +out: + for (; level > 0; level--) + put_page(hpages[level - 1]); + + return err == 0; +} + +/** + * fsverity_verify_page() - verify a data page + * + * Verify a page that has just been read from a verity file. The page must be a + * pagecache page that is still locked and not yet uptodate. + * + * Return: true if the page is valid, else false. + */ +bool fsverity_verify_page(struct page *page) +{ + struct inode *inode = page->mapping->host; + const struct fsverity_info *vi = inode->i_verity_info; + struct ahash_request *req; + bool valid; + + req = ahash_request_alloc(vi->tree_params.hash_alg->tfm, GFP_NOFS); + if (unlikely(!req)) + return false; + + valid = verify_page(inode, vi, req, page); + + ahash_request_free(req); + + return valid; +} +EXPORT_SYMBOL_GPL(fsverity_verify_page); + +#ifdef CONFIG_BLOCK +/** + * fsverity_verify_bio() - verify a 'read' bio that has just completed + * + * Verify a set of pages that have just been read from a verity file. The pages + * must be pagecache pages that are still locked and not yet uptodate. Pages + * that fail verification are set to the Error state. Verification is skipped + * for pages already in the Error state, e.g. due to fscrypt decryption failure. + * + * This is a helper function for use by the ->readpages() method of filesystems + * that issue bios to read data directly into the page cache. Filesystems that + * populate the page cache without issuing bios (e.g. non block-based + * filesystems) must instead call fsverity_verify_page() directly on each page. + * All filesystems must also call fsverity_verify_page() on holes. + */ +void fsverity_verify_bio(struct bio *bio) +{ + struct inode *inode = bio_first_page_all(bio)->mapping->host; + const struct fsverity_info *vi = inode->i_verity_info; + struct ahash_request *req; + struct bio_vec *bv; + struct bvec_iter_all iter_all; + + req = ahash_request_alloc(vi->tree_params.hash_alg->tfm, GFP_NOFS); + if (unlikely(!req)) { + bio_for_each_segment_all(bv, bio, iter_all) + SetPageError(bv->bv_page); + return; + } + + bio_for_each_segment_all(bv, bio, iter_all) { + struct page *page = bv->bv_page; + + if (!PageError(page) && !verify_page(inode, vi, req, page)) + SetPageError(page); + } + + ahash_request_free(req); +} +EXPORT_SYMBOL_GPL(fsverity_verify_bio); +#endif /* CONFIG_BLOCK */ + +/** + * fsverity_enqueue_verify_work() - enqueue work on the fs-verity workqueue + * + * Enqueue verification work for asynchronous processing. + */ +void fsverity_enqueue_verify_work(struct work_struct *work) +{ + queue_work(fsverity_read_workqueue, work); +} +EXPORT_SYMBOL_GPL(fsverity_enqueue_verify_work); + +int __init fsverity_init_workqueue(void) +{ + /* + * Use an unbound workqueue to allow bios to be verified in parallel + * even when they happen to complete on the same CPU. This sacrifices + * locality, but it's worthwhile since hashing is CPU-intensive. + * + * Also use a high-priority workqueue to prioritize verification work, + * which blocks reads from completing, over regular application tasks. + */ + fsverity_read_workqueue = alloc_workqueue("fsverity_read_queue", + WQ_UNBOUND | WQ_HIGHPRI, + num_online_cpus()); + if (!fsverity_read_workqueue) + return -ENOMEM; + return 0; +} + +void __init fsverity_exit_workqueue(void) +{ + destroy_workqueue(fsverity_read_workqueue); + fsverity_read_workqueue = NULL; +} diff --git a/include/linux/fs.h b/include/linux/fs.h index 5dff77326cec..104a727f8a67 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -64,6 +64,8 @@ struct workqueue_struct; struct iov_iter; struct fscrypt_info; struct fscrypt_operations; +struct fsverity_info; +struct fsverity_operations; struct fs_context; struct fs_parameter_description; @@ -723,6 +725,10 @@ struct inode { struct fscrypt_info *i_crypt_info; #endif +#ifdef CONFIG_FS_VERITY + struct fsverity_info *i_verity_info; +#endif + void *i_private; /* fs or device private pointer */ } __randomize_layout; @@ -1428,6 +1434,9 @@ struct super_block { #ifdef CONFIG_FS_ENCRYPTION const struct fscrypt_operations *s_cop; struct key *s_master_keys; /* master crypto keys in use */ +#endif +#ifdef CONFIG_FS_VERITY + const struct fsverity_operations *s_vop; #endif struct hlist_bl_head s_roots; /* alternate root dentries for NFS */ struct list_head s_mounts; /* list of mounts; _not_ for fs use */ @@ -1966,6 +1975,7 @@ struct super_operations { #endif #define S_ENCRYPTED 16384 /* Encrypted file (using fs/crypto/) */ #define S_CASEFOLD 32768 /* Casefolded file */ +#define S_VERITY 65536 /* Verity file (using fs/verity/) */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -2007,6 +2017,7 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags #define IS_DAX(inode) ((inode)->i_flags & S_DAX) #define IS_ENCRYPTED(inode) ((inode)->i_flags & S_ENCRYPTED) #define IS_CASEFOLDED(inode) ((inode)->i_flags & S_CASEFOLD) +#define IS_VERITY(inode) ((inode)->i_flags & S_VERITY) #define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ (inode)->i_rdev == WHITEOUT_DEV) diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h new file mode 100644 index 000000000000..3b6b8ccebe7d --- /dev/null +++ b/include/linux/fsverity.h @@ -0,0 +1,211 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs-verity: read-only file-based authenticity protection + * + * This header declares the interface between the fs/verity/ support layer and + * filesystems that support fs-verity. + * + * Copyright 2019 Google LLC + */ + +#ifndef _LINUX_FSVERITY_H +#define _LINUX_FSVERITY_H + +#include +#include + +/* Verity operations for filesystems */ +struct fsverity_operations { + + /** + * Begin enabling verity on the given file. + * + * @filp: a readonly file descriptor for the file + * + * The filesystem must do any needed filesystem-specific preparations + * for enabling verity, e.g. evicting inline data. It also must return + * -EBUSY if verity is already being enabled on the given file. + * + * i_rwsem is held for write. + * + * Return: 0 on success, -errno on failure + */ + int (*begin_enable_verity)(struct file *filp); + + /** + * End enabling verity on the given file. + * + * @filp: a readonly file descriptor for the file + * @desc: the verity descriptor to write, or NULL on failure + * @desc_size: size of verity descriptor, or 0 on failure + * @merkle_tree_size: total bytes the Merkle tree took up + * + * If desc == NULL, then enabling verity failed and the filesystem only + * must do any necessary cleanups. Else, it must also store the given + * verity descriptor to a fs-specific location associated with the inode + * and do any fs-specific actions needed to mark the inode as a verity + * inode, e.g. setting a bit in the on-disk inode. The filesystem is + * also responsible for setting the S_VERITY flag in the VFS inode. + * + * i_rwsem is held for write, but it may have been dropped between + * ->begin_enable_verity() and ->end_enable_verity(). + * + * Return: 0 on success, -errno on failure + */ + int (*end_enable_verity)(struct file *filp, const void *desc, + size_t desc_size, u64 merkle_tree_size); + + /** + * Get the verity descriptor of the given inode. + * + * @inode: an inode with the S_VERITY flag set + * @buf: buffer in which to place the verity descriptor + * @bufsize: size of @buf, or 0 to retrieve the size only + * + * If bufsize == 0, then the size of the verity descriptor is returned. + * Otherwise the verity descriptor is written to 'buf' and its actual + * size is returned; -ERANGE is returned if it's too large. This may be + * called by multiple processes concurrently on the same inode. + * + * Return: the size on success, -errno on failure + */ + int (*get_verity_descriptor)(struct inode *inode, void *buf, + size_t bufsize); + + /** + * Read a Merkle tree page of the given inode. + * + * @inode: the inode + * @index: 0-based index of the page within the Merkle tree + * + * This can be called at any time on an open verity file, as well as + * between ->begin_enable_verity() and ->end_enable_verity(). It may be + * called by multiple processes concurrently, even with the same page. + * + * Note that this must retrieve a *page*, not necessarily a *block*. + * + * Return: the page on success, ERR_PTR() on failure + */ + struct page *(*read_merkle_tree_page)(struct inode *inode, + pgoff_t index); + + /** + * Write a Merkle tree block to the given inode. + * + * @inode: the inode for which the Merkle tree is being built + * @buf: block to write + * @index: 0-based index of the block within the Merkle tree + * @log_blocksize: log base 2 of the Merkle tree block size + * + * This is only called between ->begin_enable_verity() and + * ->end_enable_verity(). + * + * Return: 0 on success, -errno on failure + */ + int (*write_merkle_tree_block)(struct inode *inode, const void *buf, + u64 index, int log_blocksize); +}; + +#ifdef CONFIG_FS_VERITY + +static inline struct fsverity_info *fsverity_get_info(const struct inode *inode) +{ + /* pairs with the cmpxchg() in fsverity_set_info() */ + return READ_ONCE(inode->i_verity_info); +} + +/* enable.c */ + +extern int fsverity_ioctl_enable(struct file *filp, const void __user *arg); + +/* measure.c */ + +extern int fsverity_ioctl_measure(struct file *filp, void __user *arg); + +/* open.c */ + +extern int fsverity_file_open(struct inode *inode, struct file *filp); +extern int fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr); +extern void fsverity_cleanup_inode(struct inode *inode); + +/* verify.c */ + +extern bool fsverity_verify_page(struct page *page); +extern void fsverity_verify_bio(struct bio *bio); +extern void fsverity_enqueue_verify_work(struct work_struct *work); + +#else /* !CONFIG_FS_VERITY */ + +static inline struct fsverity_info *fsverity_get_info(const struct inode *inode) +{ + return NULL; +} + +/* enable.c */ + +static inline int fsverity_ioctl_enable(struct file *filp, + const void __user *arg) +{ + return -EOPNOTSUPP; +} + +/* measure.c */ + +static inline int fsverity_ioctl_measure(struct file *filp, void __user *arg) +{ + return -EOPNOTSUPP; +} + +/* open.c */ + +static inline int fsverity_file_open(struct inode *inode, struct file *filp) +{ + return IS_VERITY(inode) ? -EOPNOTSUPP : 0; +} + +static inline int fsverity_prepare_setattr(struct dentry *dentry, + struct iattr *attr) +{ + return IS_VERITY(d_inode(dentry)) ? -EOPNOTSUPP : 0; +} + +static inline void fsverity_cleanup_inode(struct inode *inode) +{ +} + +/* verify.c */ + +static inline bool fsverity_verify_page(struct page *page) +{ + WARN_ON(1); + return false; +} + +static inline void fsverity_verify_bio(struct bio *bio) +{ + WARN_ON(1); +} + +static inline void fsverity_enqueue_verify_work(struct work_struct *work) +{ + WARN_ON(1); +} + +#endif /* !CONFIG_FS_VERITY */ + +/** + * fsverity_active() - do reads from the inode need to go through fs-verity? + * + * This checks whether ->i_verity_info has been set. + * + * Filesystems call this from ->readpages() to check whether the pages need to + * be verified or not. Don't use IS_VERITY() for this purpose; it's subject to + * a race condition where the file is being read concurrently with + * FS_IOC_ENABLE_VERITY completing. (S_VERITY is set before ->i_verity_info.) + */ +static inline bool fsverity_active(const struct inode *inode) +{ + return fsverity_get_info(inode) != NULL; +} + +#endif /* _LINUX_FSVERITY_H */ diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 41bd84d25a98..aad225b05be7 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -258,6 +258,7 @@ struct fsxattr { #define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ #define FS_HUGE_FILE_FL 0x00040000 /* Reserved for ext4 */ #define FS_EXTENT_FL 0x00080000 /* Extents */ +#define FS_VERITY_FL 0x00100000 /* Verity protected inode */ #define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */ #define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */ #define FS_NOCOW_FL 0x00800000 /* Do not cow file */ diff --git a/include/uapi/linux/fsverity.h b/include/uapi/linux/fsverity.h new file mode 100644 index 000000000000..da0daf6c193b --- /dev/null +++ b/include/uapi/linux/fsverity.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * fs-verity user API + * + * These ioctls can be used on filesystems that support fs-verity. See the + * "User API" section of Documentation/filesystems/fsverity.rst. + * + * Copyright 2019 Google LLC + */ +#ifndef _UAPI_LINUX_FSVERITY_H +#define _UAPI_LINUX_FSVERITY_H + +#include +#include + +#define FS_VERITY_HASH_ALG_SHA256 1 +#define FS_VERITY_HASH_ALG_SHA512 2 + +struct fsverity_enable_arg { + __u32 version; + __u32 hash_algorithm; + __u32 block_size; + __u32 salt_size; + __u64 salt_ptr; + __u32 sig_size; + __u32 __reserved1; + __u64 sig_ptr; + __u64 __reserved2[11]; +}; + +struct fsverity_digest { + __u16 digest_algorithm; + __u16 digest_size; /* input/output */ + __u8 digest[]; +}; + +#define FS_IOC_ENABLE_VERITY _IOW('f', 133, struct fsverity_enable_arg) +#define FS_IOC_MEASURE_VERITY _IOWR('f', 134, struct fsverity_digest) + +#endif /* _UAPI_LINUX_FSVERITY_H */