diff --git a/Documentation/crypto/api.rst b/Documentation/crypto/api.rst index 2e519193ab4a..b91b31736df8 100644 --- a/Documentation/crypto/api.rst +++ b/Documentation/crypto/api.rst @@ -1,15 +1,6 @@ Programming Interface ===================== -Please note that the kernel crypto API contains the AEAD givcrypt API -(crypto_aead_giv\* and aead_givcrypt\* function calls in -include/crypto/aead.h). This API is obsolete and will be removed in the -future. To obtain the functionality of an AEAD cipher with internal IV -generation, use the IV generator as a regular cipher. For example, -rfc4106(gcm(aes)) is the AEAD cipher with external IV generation and -seqniv(rfc4106(gcm(aes))) implies that the kernel crypto API generates -the IV. Different IV generators are available. - .. class:: toc-title Table of contents diff --git a/Documentation/crypto/architecture.rst b/Documentation/crypto/architecture.rst index ca2d09b991f5..ee8ff0762d7f 100644 --- a/Documentation/crypto/architecture.rst +++ b/Documentation/crypto/architecture.rst @@ -157,10 +157,6 @@ applicable to a cipher, it is not displayed: - rng for random number generator - - givcipher for cipher with associated IV generator (see the geniv - entry below for the specification of the IV generator type used by - the cipher implementation) - - kpp for a Key-agreement Protocol Primitive (KPP) cipher such as an ECDH or DH implementation @@ -174,16 +170,7 @@ applicable to a cipher, it is not displayed: - digestsize: output size of the message digest -- geniv: IV generation type: - - - eseqiv for encrypted sequence number based IV generation - - - seqiv for sequence number based IV generation - - - chainiv for chain iv generation - - - is a marker that the cipher implements IV generation and - handling as it is specific to the given cipher +- geniv: IV generator (obsolete) Key Sizes --------- @@ -218,10 +205,6 @@ the aforementioned cipher types: - CRYPTO_ALG_TYPE_ABLKCIPHER Asynchronous multi-block cipher -- CRYPTO_ALG_TYPE_GIVCIPHER Asynchronous multi-block cipher packed - together with an IV generator (see geniv field in the /proc/crypto - listing for the known IV generators) - - CRYPTO_ALG_TYPE_KPP Key-agreement Protocol Primitive (KPP) such as an ECDH or DH implementation @@ -338,18 +321,14 @@ uses the API applicable to the cipher type specified for the block. The following call sequence is applicable when the IPSEC layer triggers an encryption operation with the esp_output function. During -configuration, the administrator set up the use of rfc4106(gcm(aes)) as -the cipher for ESP. The following call sequence is now depicted in the -ASCII art above: +configuration, the administrator set up the use of seqiv(rfc4106(gcm(aes))) +as the cipher for ESP. The following call sequence is now depicted in +the ASCII art above: 1. esp_output() invokes crypto_aead_encrypt() to trigger an encryption operation of the AEAD cipher with IV generator. - In case of GCM, the SEQIV implementation is registered as GIVCIPHER - in crypto_rfc4106_alloc(). - - The SEQIV performs its operation to generate an IV where the core - function is seqiv_geniv(). + The SEQIV generates the IV. 2. Now, SEQIV uses the AEAD API function calls to invoke the associated AEAD cipher. In our case, during the instantiation of SEQIV, the diff --git a/Documentation/devicetree/bindings/crypto/arm-cryptocell.txt b/Documentation/devicetree/bindings/crypto/arm-cryptocell.txt index 999fb2a810f6..6130e6eb4af8 100644 --- a/Documentation/devicetree/bindings/crypto/arm-cryptocell.txt +++ b/Documentation/devicetree/bindings/crypto/arm-cryptocell.txt @@ -1,8 +1,12 @@ Arm TrustZone CryptoCell cryptographic engine Required properties: -- compatible: Should be one of: "arm,cryptocell-712-ree", - "arm,cryptocell-710-ree" or "arm,cryptocell-630p-ree". +- compatible: Should be one of - + "arm,cryptocell-713-ree" + "arm,cryptocell-703-ree" + "arm,cryptocell-712-ree" + "arm,cryptocell-710-ree" + "arm,cryptocell-630p-ree" - reg: Base physical address of the engine and length of memory mapped region. - interrupts: Interrupt number for the device. diff --git a/Documentation/devicetree/bindings/crypto/fsl-dcp.txt b/Documentation/devicetree/bindings/crypto/fsl-dcp.txt index 76a0b4e80e83..4e4d387e38a5 100644 --- a/Documentation/devicetree/bindings/crypto/fsl-dcp.txt +++ b/Documentation/devicetree/bindings/crypto/fsl-dcp.txt @@ -6,6 +6,8 @@ Required properties: - interrupts : Should contain MXS DCP interrupt numbers, VMI IRQ and DCP IRQ must be supplied, optionally Secure IRQ can be present, but is currently not implemented and not used. +- clocks : Clock reference (only required on some SOCs: 6ull and 6sll). +- clock-names : Must be "dcp". Example: diff --git a/MAINTAINERS b/MAINTAINERS index db3f690eb590..7a9804a891fd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3484,6 +3484,7 @@ F: include/linux/spi/cc2520.h F: Documentation/devicetree/bindings/net/ieee802154/cc2520.txt CCREE ARM TRUSTZONE CRYPTOCELL REE DRIVER +M: Yael Chemla M: Gilad Ben-Yossef L: linux-crypto@vger.kernel.org S: Supported @@ -7147,7 +7148,9 @@ F: crypto/842.c F: lib/842/ IBM Power in-Nest Crypto Acceleration -M: Paulo Flabiano Smorigo +M: Breno Leitão +M: Nayna Jain +M: Paulo Flabiano Smorigo L: linux-crypto@vger.kernel.org S: Supported F: drivers/crypto/nx/Makefile @@ -7211,7 +7214,9 @@ S: Supported F: drivers/scsi/ibmvscsi_tgt/ IBM Power VMX Cryptographic instructions -M: Paulo Flabiano Smorigo +M: Breno Leitão +M: Nayna Jain +M: Paulo Flabiano Smorigo L: linux-crypto@vger.kernel.org S: Supported F: drivers/crypto/vmx/Makefile diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index ef0c7feea6e2..a95322b59799 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -69,6 +69,15 @@ config CRYPTO_AES_ARM help Use optimized AES assembler routines for ARM platforms. + On ARM processors without the Crypto Extensions, this is the + fastest AES implementation for single blocks. For multiple + blocks, the NEON bit-sliced implementation is usually faster. + + This implementation may be vulnerable to cache timing attacks, + since it uses lookup tables. However, as countermeasures it + disables IRQs and preloads the tables; it is hoped this makes + such attacks very difficult. + config CRYPTO_AES_ARM_BS tristate "Bit sliced AES using NEON instructions" depends on KERNEL_MODE_NEON @@ -117,9 +126,14 @@ config CRYPTO_CRC32_ARM_CE select CRYPTO_HASH config CRYPTO_CHACHA20_NEON - tristate "NEON accelerated ChaCha20 symmetric cipher" + tristate "NEON accelerated ChaCha stream cipher algorithms" depends on KERNEL_MODE_NEON select CRYPTO_BLKCIPHER select CRYPTO_CHACHA20 +config CRYPTO_NHPOLY1305_NEON + tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)" + depends on KERNEL_MODE_NEON + select CRYPTO_NHPOLY1305 + endif diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index bd5bceef0605..b65d6bfab8e6 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -9,7 +9,8 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o -obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o +obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o +obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o @@ -52,7 +53,8 @@ aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o -chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o +chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o +nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o ifdef REGENERATE_ARM_CRYPTO quiet_cmd_perl = PERL $@ diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c index d0a9cec73707..5affb8482379 100644 --- a/arch/arm/crypto/aes-ce-glue.c +++ b/arch/arm/crypto/aes-ce-glue.c @@ -10,7 +10,6 @@ #include #include -#include #include #include #include diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S index 184d6c2d15d5..f2d67c095e59 100644 --- a/arch/arm/crypto/aes-cipher-core.S +++ b/arch/arm/crypto/aes-cipher-core.S @@ -10,6 +10,7 @@ */ #include +#include #include .text @@ -41,7 +42,7 @@ .endif .endm - .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op + .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr __select \out0, \in0, 0 __select t0, \in1, 1 __load \out0, \out0, 0, \sz, \op @@ -73,6 +74,14 @@ __load t0, t0, 3, \sz, \op __load \t4, \t4, 3, \sz, \op + .ifnb \oldcpsr + /* + * This is the final round and we're done with all data-dependent table + * lookups, so we can safely re-enable interrupts. + */ + restore_irqs \oldcpsr + .endif + eor \out1, \out1, t1, ror #24 eor \out0, \out0, t2, ror #16 ldm rk!, {t1, t2} @@ -83,14 +92,14 @@ eor \out1, \out1, t2 .endm - .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op + .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op - __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op + __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr .endm - .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op + .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op - __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op + __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr .endm .macro __rev, out, in @@ -118,13 +127,14 @@ .macro do_crypt, round, ttab, ltab, bsz push {r3-r11, lr} + // Load keys first, to reduce latency in case they're not cached yet. + ldm rk!, {r8-r11} + ldr r4, [in] ldr r5, [in, #4] ldr r6, [in, #8] ldr r7, [in, #12] - ldm rk!, {r8-r11} - #ifdef CONFIG_CPU_BIG_ENDIAN __rev r4, r4 __rev r5, r5 @@ -138,6 +148,25 @@ eor r7, r7, r11 __adrl ttab, \ttab + /* + * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into + * L1 cache, assuming cacheline size >= 32. This is a hardening measure + * intended to make cache-timing attacks more difficult. They may not + * be fully prevented, however; see the paper + * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf + * ("Cache-timing attacks on AES") for a discussion of the many + * difficulties involved in writing truly constant-time AES software. + */ + save_and_disable_irqs t0 + .set i, 0 + .rept 1024 / 128 + ldr r8, [ttab, #i + 0] + ldr r9, [ttab, #i + 32] + ldr r10, [ttab, #i + 64] + ldr r11, [ttab, #i + 96] + .set i, i + 128 + .endr + push {t0} // oldcpsr tst rounds, #2 bne 1f @@ -151,8 +180,21 @@ \round r4, r5, r6, r7, r8, r9, r10, r11 b 0b -2: __adrl ttab, \ltab - \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b +2: .ifb \ltab + add ttab, ttab, #1 + .else + __adrl ttab, \ltab + // Prefetch inverse S-box for final round; see explanation above + .set i, 0 + .rept 256 / 64 + ldr t0, [ttab, #i + 0] + ldr t1, [ttab, #i + 32] + .set i, i + 64 + .endr + .endif + + pop {rounds} // oldcpsr + \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds #ifdef CONFIG_CPU_BIG_ENDIAN __rev r4, r4 @@ -175,7 +217,7 @@ .endm ENTRY(__aes_arm_encrypt) - do_crypt fround, crypto_ft_tab, crypto_ft_tab + 1, 2 + do_crypt fround, crypto_ft_tab,, 2 ENDPROC(__aes_arm_encrypt) .align 5 diff --git a/arch/arm/crypto/chacha20-neon-core.S b/arch/arm/crypto/chacha-neon-core.S similarity index 90% rename from arch/arm/crypto/chacha20-neon-core.S rename to arch/arm/crypto/chacha-neon-core.S index 50e7b9896818..eb22926d4912 100644 --- a/arch/arm/crypto/chacha20-neon-core.S +++ b/arch/arm/crypto/chacha-neon-core.S @@ -1,5 +1,5 @@ /* - * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions + * ChaCha/XChaCha NEON helper functions * * Copyright (C) 2016 Linaro, Ltd. * @@ -27,9 +27,9 @@ * (d) vtbl.8 + vtbl.8 (multiple of 8 bits rotations only, * needs index vector) * - * ChaCha20 has 16, 12, 8, and 7-bit rotations. For the 12 and 7-bit - * rotations, the only choices are (a) and (b). We use (a) since it takes - * two-thirds the cycles of (b) on both Cortex-A7 and Cortex-A53. + * ChaCha has 16, 12, 8, and 7-bit rotations. For the 12 and 7-bit rotations, + * the only choices are (a) and (b). We use (a) since it takes two-thirds the + * cycles of (b) on both Cortex-A7 and Cortex-A53. * * For the 16-bit rotation, we use vrev32.16 since it's consistently fastest * and doesn't need a temporary register. @@ -52,30 +52,20 @@ .fpu neon .align 5 -ENTRY(chacha20_block_xor_neon) - // r0: Input state matrix, s - // r1: 1 data block output, o - // r2: 1 data block input, i - - // - // This function encrypts one ChaCha20 block by loading the state matrix - // in four NEON registers. It performs matrix operation on four words in - // parallel, but requireds shuffling to rearrange the words after each - // round. - // - - // x0..3 = s0..3 - add ip, r0, #0x20 - vld1.32 {q0-q1}, [r0] - vld1.32 {q2-q3}, [ip] - - vmov q8, q0 - vmov q9, q1 - vmov q10, q2 - vmov q11, q3 +/* + * chacha_permute - permute one block + * + * Permute one 64-byte block where the state matrix is stored in the four NEON + * registers q0-q3. It performs matrix operations on four words in parallel, + * but requires shuffling to rearrange the words after each round. + * + * The round count is given in r3. + * + * Clobbers: r3, ip, q4-q5 + */ +chacha_permute: adr ip, .Lrol8_table - mov r3, #10 vld1.8 {d10}, [ip, :64] .Ldoubleround: @@ -139,9 +129,31 @@ ENTRY(chacha20_block_xor_neon) // x3 = shuffle32(x3, MASK(0, 3, 2, 1)) vext.8 q3, q3, q3, #4 - subs r3, r3, #1 + subs r3, r3, #2 bne .Ldoubleround + bx lr +ENDPROC(chacha_permute) + +ENTRY(chacha_block_xor_neon) + // r0: Input state matrix, s + // r1: 1 data block output, o + // r2: 1 data block input, i + // r3: nrounds + push {lr} + + // x0..3 = s0..3 + add ip, r0, #0x20 + vld1.32 {q0-q1}, [r0] + vld1.32 {q2-q3}, [ip] + + vmov q8, q0 + vmov q9, q1 + vmov q10, q2 + vmov q11, q3 + + bl chacha_permute + add ip, r2, #0x20 vld1.8 {q4-q5}, [r2] vld1.8 {q6-q7}, [ip] @@ -166,15 +178,33 @@ ENTRY(chacha20_block_xor_neon) vst1.8 {q0-q1}, [r1] vst1.8 {q2-q3}, [ip] - bx lr -ENDPROC(chacha20_block_xor_neon) + pop {pc} +ENDPROC(chacha_block_xor_neon) + +ENTRY(hchacha_block_neon) + // r0: Input state matrix, s + // r1: output (8 32-bit words) + // r2: nrounds + push {lr} + + vld1.32 {q0-q1}, [r0]! + vld1.32 {q2-q3}, [r0] + + mov r3, r2 + bl chacha_permute + + vst1.32 {q0}, [r1]! + vst1.32 {q3}, [r1] + + pop {pc} +ENDPROC(hchacha_block_neon) .align 4 .Lctrinc: .word 0, 1, 2, 3 .Lrol8_table: .byte 3, 0, 1, 2, 7, 4, 5, 6 .align 5 -ENTRY(chacha20_4block_xor_neon) +ENTRY(chacha_4block_xor_neon) push {r4-r5} mov r4, sp // preserve the stack pointer sub ip, sp, #0x20 // allocate a 32 byte buffer @@ -184,9 +214,10 @@ ENTRY(chacha20_4block_xor_neon) // r0: Input state matrix, s // r1: 4 data blocks output, o // r2: 4 data blocks input, i + // r3: nrounds // - // This function encrypts four consecutive ChaCha20 blocks by loading + // This function encrypts four consecutive ChaCha blocks by loading // the state matrix in NEON registers four times. The algorithm performs // each operation on the corresponding word of each state matrix, hence // requires no word shuffling. The words are re-interleaved before the @@ -219,7 +250,6 @@ ENTRY(chacha20_4block_xor_neon) vdup.32 q0, d0[0] adr ip, .Lrol8_table - mov r3, #10 b 1f .Ldoubleround4: @@ -417,7 +447,7 @@ ENTRY(chacha20_4block_xor_neon) vsri.u32 q5, q8, #25 vsri.u32 q6, q9, #25 - subs r3, r3, #1 + subs r3, r3, #2 bne .Ldoubleround4 // x0..7[0-3] are in q0-q7, x10..15[0-3] are in q10-q15. @@ -527,4 +557,4 @@ ENTRY(chacha20_4block_xor_neon) pop {r4-r5} bx lr -ENDPROC(chacha20_4block_xor_neon) +ENDPROC(chacha_4block_xor_neon) diff --git a/arch/arm/crypto/chacha-neon-glue.c b/arch/arm/crypto/chacha-neon-glue.c new file mode 100644 index 000000000000..9d6fda81986d --- /dev/null +++ b/arch/arm/crypto/chacha-neon-glue.c @@ -0,0 +1,201 @@ +/* + * ARM NEON accelerated ChaCha and XChaCha stream ciphers, + * including ChaCha20 (RFC7539) + * + * Copyright (C) 2016 Linaro, Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on: + * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src, + int nrounds); +asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src, + int nrounds); +asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); + +static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) +{ + u8 buf[CHACHA_BLOCK_SIZE]; + + while (bytes >= CHACHA_BLOCK_SIZE * 4) { + chacha_4block_xor_neon(state, dst, src, nrounds); + bytes -= CHACHA_BLOCK_SIZE * 4; + src += CHACHA_BLOCK_SIZE * 4; + dst += CHACHA_BLOCK_SIZE * 4; + state[12] += 4; + } + while (bytes >= CHACHA_BLOCK_SIZE) { + chacha_block_xor_neon(state, dst, src, nrounds); + bytes -= CHACHA_BLOCK_SIZE; + src += CHACHA_BLOCK_SIZE; + dst += CHACHA_BLOCK_SIZE; + state[12]++; + } + if (bytes) { + memcpy(buf, src, bytes); + chacha_block_xor_neon(state, buf, buf, nrounds); + memcpy(dst, buf, bytes); + } +} + +static int chacha_neon_stream_xor(struct skcipher_request *req, + struct chacha_ctx *ctx, u8 *iv) +{ + struct skcipher_walk walk; + u32 state[16]; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + crypto_chacha_init(state, ctx, iv); + + while (walk.nbytes > 0) { + unsigned int nbytes = walk.nbytes; + + if (nbytes < walk.total) + nbytes = round_down(nbytes, walk.stride); + + kernel_neon_begin(); + chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr, + nbytes, ctx->nrounds); + kernel_neon_end(); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + } + + return err; +} + +static int chacha_neon(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + + if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd()) + return crypto_chacha_crypt(req); + + return chacha_neon_stream_xor(req, ctx, req->iv); +} + +static int xchacha_neon(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + struct chacha_ctx subctx; + u32 state[16]; + u8 real_iv[16]; + + if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd()) + return crypto_xchacha_crypt(req); + + crypto_chacha_init(state, ctx, req->iv); + + kernel_neon_begin(); + hchacha_block_neon(state, subctx.key, ctx->nrounds); + kernel_neon_end(); + subctx.nrounds = ctx->nrounds; + + memcpy(&real_iv[0], req->iv + 24, 8); + memcpy(&real_iv[8], req->iv + 16, 8); + return chacha_neon_stream_xor(req, &subctx, real_iv); +} + +static struct skcipher_alg algs[] = { + { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-neon", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = CHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .walksize = 4 * CHACHA_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = chacha_neon, + .decrypt = chacha_neon, + }, { + .base.cra_name = "xchacha20", + .base.cra_driver_name = "xchacha20-neon", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .walksize = 4 * CHACHA_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = xchacha_neon, + .decrypt = xchacha_neon, + }, { + .base.cra_name = "xchacha12", + .base.cra_driver_name = "xchacha12-neon", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .walksize = 4 * CHACHA_BLOCK_SIZE, + .setkey = crypto_chacha12_setkey, + .encrypt = xchacha_neon, + .decrypt = xchacha_neon, + } +}; + +static int __init chacha_simd_mod_init(void) +{ + if (!(elf_hwcap & HWCAP_NEON)) + return -ENODEV; + + return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); +} + +static void __exit chacha_simd_mod_fini(void) +{ + crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); +} + +module_init(chacha_simd_mod_init); +module_exit(chacha_simd_mod_fini); + +MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)"); +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("chacha20"); +MODULE_ALIAS_CRYPTO("chacha20-neon"); +MODULE_ALIAS_CRYPTO("xchacha20"); +MODULE_ALIAS_CRYPTO("xchacha20-neon"); +MODULE_ALIAS_CRYPTO("xchacha12"); +MODULE_ALIAS_CRYPTO("xchacha12-neon"); diff --git a/arch/arm/crypto/chacha20-neon-glue.c b/arch/arm/crypto/chacha20-neon-glue.c deleted file mode 100644 index 59a7be08e80c..000000000000 --- a/arch/arm/crypto/chacha20-neon-glue.c +++ /dev/null @@ -1,127 +0,0 @@ -/* - * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions - * - * Copyright (C) 2016 Linaro, Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Based on: - * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code - * - * Copyright (C) 2015 Martin Willi - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include -#include -#include -#include -#include - -#include -#include -#include - -asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src); -asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src); - -static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src, - unsigned int bytes) -{ - u8 buf[CHACHA20_BLOCK_SIZE]; - - while (bytes >= CHACHA20_BLOCK_SIZE * 4) { - chacha20_4block_xor_neon(state, dst, src); - bytes -= CHACHA20_BLOCK_SIZE * 4; - src += CHACHA20_BLOCK_SIZE * 4; - dst += CHACHA20_BLOCK_SIZE * 4; - state[12] += 4; - } - while (bytes >= CHACHA20_BLOCK_SIZE) { - chacha20_block_xor_neon(state, dst, src); - bytes -= CHACHA20_BLOCK_SIZE; - src += CHACHA20_BLOCK_SIZE; - dst += CHACHA20_BLOCK_SIZE; - state[12]++; - } - if (bytes) { - memcpy(buf, src, bytes); - chacha20_block_xor_neon(state, buf, buf); - memcpy(dst, buf, bytes); - } -} - -static int chacha20_neon(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - u32 state[16]; - int err; - - if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd()) - return crypto_chacha20_crypt(req); - - err = skcipher_walk_virt(&walk, req, true); - - crypto_chacha20_init(state, ctx, walk.iv); - - kernel_neon_begin(); - while (walk.nbytes > 0) { - unsigned int nbytes = walk.nbytes; - - if (nbytes < walk.total) - nbytes = round_down(nbytes, walk.stride); - - chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr, - nbytes); - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); - } - kernel_neon_end(); - - return err; -} - -static struct skcipher_alg alg = { - .base.cra_name = "chacha20", - .base.cra_driver_name = "chacha20-neon", - .base.cra_priority = 300, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha20_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA20_KEY_SIZE, - .max_keysize = CHACHA20_KEY_SIZE, - .ivsize = CHACHA20_IV_SIZE, - .chunksize = CHACHA20_BLOCK_SIZE, - .walksize = 4 * CHACHA20_BLOCK_SIZE, - .setkey = crypto_chacha20_setkey, - .encrypt = chacha20_neon, - .decrypt = chacha20_neon, -}; - -static int __init chacha20_simd_mod_init(void) -{ - if (!(elf_hwcap & HWCAP_NEON)) - return -ENODEV; - - return crypto_register_skcipher(&alg); -} - -static void __exit chacha20_simd_mod_fini(void) -{ - crypto_unregister_skcipher(&alg); -} - -module_init(chacha20_simd_mod_init); -module_exit(chacha20_simd_mod_fini); - -MODULE_AUTHOR("Ard Biesheuvel "); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS_CRYPTO("chacha20"); diff --git a/arch/arm/crypto/nh-neon-core.S b/arch/arm/crypto/nh-neon-core.S new file mode 100644 index 000000000000..434d80ab531c --- /dev/null +++ b/arch/arm/crypto/nh-neon-core.S @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * NH - ε-almost-universal hash function, NEON accelerated version + * + * Copyright 2018 Google LLC + * + * Author: Eric Biggers + */ + +#include + + .text + .fpu neon + + KEY .req r0 + MESSAGE .req r1 + MESSAGE_LEN .req r2 + HASH .req r3 + + PASS0_SUMS .req q0 + PASS0_SUM_A .req d0 + PASS0_SUM_B .req d1 + PASS1_SUMS .req q1 + PASS1_SUM_A .req d2 + PASS1_SUM_B .req d3 + PASS2_SUMS .req q2 + PASS2_SUM_A .req d4 + PASS2_SUM_B .req d5 + PASS3_SUMS .req q3 + PASS3_SUM_A .req d6 + PASS3_SUM_B .req d7 + K0 .req q4 + K1 .req q5 + K2 .req q6 + K3 .req q7 + T0 .req q8 + T0_L .req d16 + T0_H .req d17 + T1 .req q9 + T1_L .req d18 + T1_H .req d19 + T2 .req q10 + T2_L .req d20 + T2_H .req d21 + T3 .req q11 + T3_L .req d22 + T3_H .req d23 + +.macro _nh_stride k0, k1, k2, k3 + + // Load next message stride + vld1.8 {T3}, [MESSAGE]! + + // Load next key stride + vld1.32 {\k3}, [KEY]! + + // Add message words to key words + vadd.u32 T0, T3, \k0 + vadd.u32 T1, T3, \k1 + vadd.u32 T2, T3, \k2 + vadd.u32 T3, T3, \k3 + + // Multiply 32x32 => 64 and accumulate + vmlal.u32 PASS0_SUMS, T0_L, T0_H + vmlal.u32 PASS1_SUMS, T1_L, T1_H + vmlal.u32 PASS2_SUMS, T2_L, T2_H + vmlal.u32 PASS3_SUMS, T3_L, T3_H +.endm + +/* + * void nh_neon(const u32 *key, const u8 *message, size_t message_len, + * u8 hash[NH_HASH_BYTES]) + * + * It's guaranteed that message_len % 16 == 0. + */ +ENTRY(nh_neon) + + vld1.32 {K0,K1}, [KEY]! + vmov.u64 PASS0_SUMS, #0 + vmov.u64 PASS1_SUMS, #0 + vld1.32 {K2}, [KEY]! + vmov.u64 PASS2_SUMS, #0 + vmov.u64 PASS3_SUMS, #0 + + subs MESSAGE_LEN, MESSAGE_LEN, #64 + blt .Lloop4_done +.Lloop4: + _nh_stride K0, K1, K2, K3 + _nh_stride K1, K2, K3, K0 + _nh_stride K2, K3, K0, K1 + _nh_stride K3, K0, K1, K2 + subs MESSAGE_LEN, MESSAGE_LEN, #64 + bge .Lloop4 + +.Lloop4_done: + ands MESSAGE_LEN, MESSAGE_LEN, #63 + beq .Ldone + _nh_stride K0, K1, K2, K3 + + subs MESSAGE_LEN, MESSAGE_LEN, #16 + beq .Ldone + _nh_stride K1, K2, K3, K0 + + subs MESSAGE_LEN, MESSAGE_LEN, #16 + beq .Ldone + _nh_stride K2, K3, K0, K1 + +.Ldone: + // Sum the accumulators for each pass, then store the sums to 'hash' + vadd.u64 T0_L, PASS0_SUM_A, PASS0_SUM_B + vadd.u64 T0_H, PASS1_SUM_A, PASS1_SUM_B + vadd.u64 T1_L, PASS2_SUM_A, PASS2_SUM_B + vadd.u64 T1_H, PASS3_SUM_A, PASS3_SUM_B + vst1.8 {T0-T1}, [HASH] + bx lr +ENDPROC(nh_neon) diff --git a/arch/arm/crypto/nhpoly1305-neon-glue.c b/arch/arm/crypto/nhpoly1305-neon-glue.c new file mode 100644 index 000000000000..49aae87cb2bc --- /dev/null +++ b/arch/arm/crypto/nhpoly1305-neon-glue.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NHPoly1305 - ε-almost-∆-universal hash function for Adiantum + * (NEON accelerated version) + * + * Copyright 2018 Google LLC + */ + +#include +#include +#include +#include +#include + +asmlinkage void nh_neon(const u32 *key, const u8 *message, size_t message_len, + u8 hash[NH_HASH_BYTES]); + +/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */ +static void _nh_neon(const u32 *key, const u8 *message, size_t message_len, + __le64 hash[NH_NUM_PASSES]) +{ + nh_neon(key, message, message_len, (u8 *)hash); +} + +static int nhpoly1305_neon_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + if (srclen < 64 || !may_use_simd()) + return crypto_nhpoly1305_update(desc, src, srclen); + + do { + unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE); + + kernel_neon_begin(); + crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon); + kernel_neon_end(); + src += n; + srclen -= n; + } while (srclen); + return 0; +} + +static struct shash_alg nhpoly1305_alg = { + .base.cra_name = "nhpoly1305", + .base.cra_driver_name = "nhpoly1305-neon", + .base.cra_priority = 200, + .base.cra_ctxsize = sizeof(struct nhpoly1305_key), + .base.cra_module = THIS_MODULE, + .digestsize = POLY1305_DIGEST_SIZE, + .init = crypto_nhpoly1305_init, + .update = nhpoly1305_neon_update, + .final = crypto_nhpoly1305_final, + .setkey = crypto_nhpoly1305_setkey, + .descsize = sizeof(struct nhpoly1305_state), +}; + +static int __init nhpoly1305_mod_init(void) +{ + if (!(elf_hwcap & HWCAP_NEON)) + return -ENODEV; + + return crypto_register_shash(&nhpoly1305_alg); +} + +static void __exit nhpoly1305_mod_exit(void) +{ + crypto_unregister_shash(&nhpoly1305_alg); +} + +module_init(nhpoly1305_mod_init); +module_exit(nhpoly1305_mod_exit); + +MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function (NEON-accelerated)"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Eric Biggers "); +MODULE_ALIAS_CRYPTO("nhpoly1305"); +MODULE_ALIAS_CRYPTO("nhpoly1305-neon"); diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index a5606823ed4d..d9a523ecdd83 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -101,11 +101,16 @@ config CRYPTO_AES_ARM64_NEON_BLK select CRYPTO_SIMD config CRYPTO_CHACHA20_NEON - tristate "NEON accelerated ChaCha20 symmetric cipher" + tristate "ChaCha20, XChaCha20, and XChaCha12 stream ciphers using NEON instructions" depends on KERNEL_MODE_NEON select CRYPTO_BLKCIPHER select CRYPTO_CHACHA20 +config CRYPTO_NHPOLY1305_NEON + tristate "NHPoly1305 hash function using NEON instructions (for Adiantum)" + depends on KERNEL_MODE_NEON + select CRYPTO_NHPOLY1305 + config CRYPTO_AES_ARM64_BS tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm" depends on KERNEL_MODE_NEON diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index f476fede09ba..a4ffd9fe3265 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -50,8 +50,11 @@ sha256-arm64-y := sha256-glue.o sha256-core.o obj-$(CONFIG_CRYPTO_SHA512_ARM64) += sha512-arm64.o sha512-arm64-y := sha512-glue.o sha512-core.o -obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o -chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o +obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o +chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o + +obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o +nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o obj-$(CONFIG_CRYPTO_AES_ARM64) += aes-arm64.o aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o diff --git a/arch/arm64/crypto/chacha20-neon-core.S b/arch/arm64/crypto/chacha-neon-core.S similarity index 52% rename from arch/arm64/crypto/chacha20-neon-core.S rename to arch/arm64/crypto/chacha-neon-core.S index 13c85e272c2a..021bb9e9784b 100644 --- a/arch/arm64/crypto/chacha20-neon-core.S +++ b/arch/arm64/crypto/chacha-neon-core.S @@ -1,13 +1,13 @@ /* - * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions + * ChaCha/XChaCha NEON helper functions * - * Copyright (C) 2016 Linaro, Ltd. + * Copyright (C) 2016-2018 Linaro, Ltd. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * - * Based on: + * Originally based on: * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions * * Copyright (C) 2015 Martin Willi @@ -19,29 +19,27 @@ */ #include +#include +#include .text .align 6 -ENTRY(chacha20_block_xor_neon) - // x0: Input state matrix, s - // x1: 1 data block output, o - // x2: 1 data block input, i +/* + * chacha_permute - permute one block + * + * Permute one 64-byte block where the state matrix is stored in the four NEON + * registers v0-v3. It performs matrix operations on four words in parallel, + * but requires shuffling to rearrange the words after each round. + * + * The round count is given in w3. + * + * Clobbers: w3, x10, v4, v12 + */ +chacha_permute: - // - // This function encrypts one ChaCha20 block by loading the state matrix - // in four NEON registers. It performs matrix operation on four words in - // parallel, but requires shuffling to rearrange the words after each - // round. - // - - // x0..3 = s0..3 - adr x3, ROT8 - ld1 {v0.4s-v3.4s}, [x0] - ld1 {v8.4s-v11.4s}, [x0] - ld1 {v12.4s}, [x3] - - mov x3, #10 + adr_l x10, ROT8 + ld1 {v12.4s}, [x10] .Ldoubleround: // x0 += x1, x3 = rotl32(x3 ^ x0, 16) @@ -102,9 +100,27 @@ ENTRY(chacha20_block_xor_neon) // x3 = shuffle32(x3, MASK(0, 3, 2, 1)) ext v3.16b, v3.16b, v3.16b, #4 - subs x3, x3, #1 + subs w3, w3, #2 b.ne .Ldoubleround + ret +ENDPROC(chacha_permute) + +ENTRY(chacha_block_xor_neon) + // x0: Input state matrix, s + // x1: 1 data block output, o + // x2: 1 data block input, i + // w3: nrounds + + stp x29, x30, [sp, #-16]! + mov x29, sp + + // x0..3 = s0..3 + ld1 {v0.4s-v3.4s}, [x0] + ld1 {v8.4s-v11.4s}, [x0] + + bl chacha_permute + ld1 {v4.16b-v7.16b}, [x2] // o0 = i0 ^ (x0 + s0) @@ -125,71 +141,156 @@ ENTRY(chacha20_block_xor_neon) st1 {v0.16b-v3.16b}, [x1] + ldp x29, x30, [sp], #16 ret -ENDPROC(chacha20_block_xor_neon) +ENDPROC(chacha_block_xor_neon) + +ENTRY(hchacha_block_neon) + // x0: Input state matrix, s + // x1: output (8 32-bit words) + // w2: nrounds + + stp x29, x30, [sp, #-16]! + mov x29, sp + + ld1 {v0.4s-v3.4s}, [x0] + + mov w3, w2 + bl chacha_permute + + st1 {v0.16b}, [x1], #16 + st1 {v3.16b}, [x1] + + ldp x29, x30, [sp], #16 + ret +ENDPROC(hchacha_block_neon) + + a0 .req w12 + a1 .req w13 + a2 .req w14 + a3 .req w15 + a4 .req w16 + a5 .req w17 + a6 .req w19 + a7 .req w20 + a8 .req w21 + a9 .req w22 + a10 .req w23 + a11 .req w24 + a12 .req w25 + a13 .req w26 + a14 .req w27 + a15 .req w28 .align 6 -ENTRY(chacha20_4block_xor_neon) +ENTRY(chacha_4block_xor_neon) + frame_push 10 + // x0: Input state matrix, s // x1: 4 data blocks output, o // x2: 4 data blocks input, i + // w3: nrounds + // x4: byte count + + adr_l x10, .Lpermute + and x5, x4, #63 + add x10, x10, x5 + add x11, x10, #64 // - // This function encrypts four consecutive ChaCha20 blocks by loading + // This function encrypts four consecutive ChaCha blocks by loading // the state matrix in NEON registers four times. The algorithm performs // each operation on the corresponding word of each state matrix, hence // requires no word shuffling. For final XORing step we transpose the // matrix by interleaving 32- and then 64-bit words, which allows us to // do XOR in NEON registers. // - adr x3, CTRINC // ... and ROT8 - ld1 {v30.4s-v31.4s}, [x3] + // At the same time, a fifth block is encrypted in parallel using + // scalar registers + // + adr_l x9, CTRINC // ... and ROT8 + ld1 {v30.4s-v31.4s}, [x9] // x0..15[0-3] = s0..3[0..3] - mov x4, x0 - ld4r { v0.4s- v3.4s}, [x4], #16 - ld4r { v4.4s- v7.4s}, [x4], #16 - ld4r { v8.4s-v11.4s}, [x4], #16 - ld4r {v12.4s-v15.4s}, [x4] + add x8, x0, #16 + ld4r { v0.4s- v3.4s}, [x0] + ld4r { v4.4s- v7.4s}, [x8], #16 + ld4r { v8.4s-v11.4s}, [x8], #16 + ld4r {v12.4s-v15.4s}, [x8] - // x12 += counter values 0-3 + mov a0, v0.s[0] + mov a1, v1.s[0] + mov a2, v2.s[0] + mov a3, v3.s[0] + mov a4, v4.s[0] + mov a5, v5.s[0] + mov a6, v6.s[0] + mov a7, v7.s[0] + mov a8, v8.s[0] + mov a9, v9.s[0] + mov a10, v10.s[0] + mov a11, v11.s[0] + mov a12, v12.s[0] + mov a13, v13.s[0] + mov a14, v14.s[0] + mov a15, v15.s[0] + + // x12 += counter values 1-4 add v12.4s, v12.4s, v30.4s - mov x3, #10 - .Ldoubleround4: // x0 += x4, x12 = rotl32(x12 ^ x0, 16) // x1 += x5, x13 = rotl32(x13 ^ x1, 16) // x2 += x6, x14 = rotl32(x14 ^ x2, 16) // x3 += x7, x15 = rotl32(x15 ^ x3, 16) add v0.4s, v0.4s, v4.4s + add a0, a0, a4 add v1.4s, v1.4s, v5.4s + add a1, a1, a5 add v2.4s, v2.4s, v6.4s + add a2, a2, a6 add v3.4s, v3.4s, v7.4s + add a3, a3, a7 eor v12.16b, v12.16b, v0.16b + eor a12, a12, a0 eor v13.16b, v13.16b, v1.16b + eor a13, a13, a1 eor v14.16b, v14.16b, v2.16b + eor a14, a14, a2 eor v15.16b, v15.16b, v3.16b + eor a15, a15, a3 rev32 v12.8h, v12.8h + ror a12, a12, #16 rev32 v13.8h, v13.8h + ror a13, a13, #16 rev32 v14.8h, v14.8h + ror a14, a14, #16 rev32 v15.8h, v15.8h + ror a15, a15, #16 // x8 += x12, x4 = rotl32(x4 ^ x8, 12) // x9 += x13, x5 = rotl32(x5 ^ x9, 12) // x10 += x14, x6 = rotl32(x6 ^ x10, 12) // x11 += x15, x7 = rotl32(x7 ^ x11, 12) add v8.4s, v8.4s, v12.4s + add a8, a8, a12 add v9.4s, v9.4s, v13.4s + add a9, a9, a13 add v10.4s, v10.4s, v14.4s + add a10, a10, a14 add v11.4s, v11.4s, v15.4s + add a11, a11, a15 eor v16.16b, v4.16b, v8.16b + eor a4, a4, a8 eor v17.16b, v5.16b, v9.16b + eor a5, a5, a9 eor v18.16b, v6.16b, v10.16b + eor a6, a6, a10 eor v19.16b, v7.16b, v11.16b + eor a7, a7, a11 shl v4.4s, v16.4s, #12 shl v5.4s, v17.4s, #12 @@ -197,42 +298,66 @@ ENTRY(chacha20_4block_xor_neon) shl v7.4s, v19.4s, #12 sri v4.4s, v16.4s, #20 + ror a4, a4, #20 sri v5.4s, v17.4s, #20 + ror a5, a5, #20 sri v6.4s, v18.4s, #20 + ror a6, a6, #20 sri v7.4s, v19.4s, #20 + ror a7, a7, #20 // x0 += x4, x12 = rotl32(x12 ^ x0, 8) // x1 += x5, x13 = rotl32(x13 ^ x1, 8) // x2 += x6, x14 = rotl32(x14 ^ x2, 8) // x3 += x7, x15 = rotl32(x15 ^ x3, 8) add v0.4s, v0.4s, v4.4s + add a0, a0, a4 add v1.4s, v1.4s, v5.4s + add a1, a1, a5 add v2.4s, v2.4s, v6.4s + add a2, a2, a6 add v3.4s, v3.4s, v7.4s + add a3, a3, a7 eor v12.16b, v12.16b, v0.16b + eor a12, a12, a0 eor v13.16b, v13.16b, v1.16b + eor a13, a13, a1 eor v14.16b, v14.16b, v2.16b + eor a14, a14, a2 eor v15.16b, v15.16b, v3.16b + eor a15, a15, a3 tbl v12.16b, {v12.16b}, v31.16b + ror a12, a12, #24 tbl v13.16b, {v13.16b}, v31.16b + ror a13, a13, #24 tbl v14.16b, {v14.16b}, v31.16b + ror a14, a14, #24 tbl v15.16b, {v15.16b}, v31.16b + ror a15, a15, #24 // x8 += x12, x4 = rotl32(x4 ^ x8, 7) // x9 += x13, x5 = rotl32(x5 ^ x9, 7) // x10 += x14, x6 = rotl32(x6 ^ x10, 7) // x11 += x15, x7 = rotl32(x7 ^ x11, 7) add v8.4s, v8.4s, v12.4s + add a8, a8, a12 add v9.4s, v9.4s, v13.4s + add a9, a9, a13 add v10.4s, v10.4s, v14.4s + add a10, a10, a14 add v11.4s, v11.4s, v15.4s + add a11, a11, a15 eor v16.16b, v4.16b, v8.16b + eor a4, a4, a8 eor v17.16b, v5.16b, v9.16b + eor a5, a5, a9 eor v18.16b, v6.16b, v10.16b + eor a6, a6, a10 eor v19.16b, v7.16b, v11.16b + eor a7, a7, a11 shl v4.4s, v16.4s, #7 shl v5.4s, v17.4s, #7 @@ -240,42 +365,66 @@ ENTRY(chacha20_4block_xor_neon) shl v7.4s, v19.4s, #7 sri v4.4s, v16.4s, #25 + ror a4, a4, #25 sri v5.4s, v17.4s, #25 + ror a5, a5, #25 sri v6.4s, v18.4s, #25 + ror a6, a6, #25 sri v7.4s, v19.4s, #25 + ror a7, a7, #25 // x0 += x5, x15 = rotl32(x15 ^ x0, 16) // x1 += x6, x12 = rotl32(x12 ^ x1, 16) // x2 += x7, x13 = rotl32(x13 ^ x2, 16) // x3 += x4, x14 = rotl32(x14 ^ x3, 16) add v0.4s, v0.4s, v5.4s + add a0, a0, a5 add v1.4s, v1.4s, v6.4s + add a1, a1, a6 add v2.4s, v2.4s, v7.4s + add a2, a2, a7 add v3.4s, v3.4s, v4.4s + add a3, a3, a4 eor v15.16b, v15.16b, v0.16b + eor a15, a15, a0 eor v12.16b, v12.16b, v1.16b + eor a12, a12, a1 eor v13.16b, v13.16b, v2.16b + eor a13, a13, a2 eor v14.16b, v14.16b, v3.16b + eor a14, a14, a3 rev32 v15.8h, v15.8h + ror a15, a15, #16 rev32 v12.8h, v12.8h + ror a12, a12, #16 rev32 v13.8h, v13.8h + ror a13, a13, #16 rev32 v14.8h, v14.8h + ror a14, a14, #16 // x10 += x15, x5 = rotl32(x5 ^ x10, 12) // x11 += x12, x6 = rotl32(x6 ^ x11, 12) // x8 += x13, x7 = rotl32(x7 ^ x8, 12) // x9 += x14, x4 = rotl32(x4 ^ x9, 12) add v10.4s, v10.4s, v15.4s + add a10, a10, a15 add v11.4s, v11.4s, v12.4s + add a11, a11, a12 add v8.4s, v8.4s, v13.4s + add a8, a8, a13 add v9.4s, v9.4s, v14.4s + add a9, a9, a14 eor v16.16b, v5.16b, v10.16b + eor a5, a5, a10 eor v17.16b, v6.16b, v11.16b + eor a6, a6, a11 eor v18.16b, v7.16b, v8.16b + eor a7, a7, a8 eor v19.16b, v4.16b, v9.16b + eor a4, a4, a9 shl v5.4s, v16.4s, #12 shl v6.4s, v17.4s, #12 @@ -283,42 +432,66 @@ ENTRY(chacha20_4block_xor_neon) shl v4.4s, v19.4s, #12 sri v5.4s, v16.4s, #20 + ror a5, a5, #20 sri v6.4s, v17.4s, #20 + ror a6, a6, #20 sri v7.4s, v18.4s, #20 + ror a7, a7, #20 sri v4.4s, v19.4s, #20 + ror a4, a4, #20 // x0 += x5, x15 = rotl32(x15 ^ x0, 8) // x1 += x6, x12 = rotl32(x12 ^ x1, 8) // x2 += x7, x13 = rotl32(x13 ^ x2, 8) // x3 += x4, x14 = rotl32(x14 ^ x3, 8) add v0.4s, v0.4s, v5.4s + add a0, a0, a5 add v1.4s, v1.4s, v6.4s + add a1, a1, a6 add v2.4s, v2.4s, v7.4s + add a2, a2, a7 add v3.4s, v3.4s, v4.4s + add a3, a3, a4 eor v15.16b, v15.16b, v0.16b + eor a15, a15, a0 eor v12.16b, v12.16b, v1.16b + eor a12, a12, a1 eor v13.16b, v13.16b, v2.16b + eor a13, a13, a2 eor v14.16b, v14.16b, v3.16b + eor a14, a14, a3 tbl v15.16b, {v15.16b}, v31.16b + ror a15, a15, #24 tbl v12.16b, {v12.16b}, v31.16b + ror a12, a12, #24 tbl v13.16b, {v13.16b}, v31.16b + ror a13, a13, #24 tbl v14.16b, {v14.16b}, v31.16b + ror a14, a14, #24 // x10 += x15, x5 = rotl32(x5 ^ x10, 7) // x11 += x12, x6 = rotl32(x6 ^ x11, 7) // x8 += x13, x7 = rotl32(x7 ^ x8, 7) // x9 += x14, x4 = rotl32(x4 ^ x9, 7) add v10.4s, v10.4s, v15.4s + add a10, a10, a15 add v11.4s, v11.4s, v12.4s + add a11, a11, a12 add v8.4s, v8.4s, v13.4s + add a8, a8, a13 add v9.4s, v9.4s, v14.4s + add a9, a9, a14 eor v16.16b, v5.16b, v10.16b + eor a5, a5, a10 eor v17.16b, v6.16b, v11.16b + eor a6, a6, a11 eor v18.16b, v7.16b, v8.16b + eor a7, a7, a8 eor v19.16b, v4.16b, v9.16b + eor a4, a4, a9 shl v5.4s, v16.4s, #7 shl v6.4s, v17.4s, #7 @@ -326,11 +499,15 @@ ENTRY(chacha20_4block_xor_neon) shl v4.4s, v19.4s, #7 sri v5.4s, v16.4s, #25 + ror a5, a5, #25 sri v6.4s, v17.4s, #25 + ror a6, a6, #25 sri v7.4s, v18.4s, #25 + ror a7, a7, #25 sri v4.4s, v19.4s, #25 + ror a4, a4, #25 - subs x3, x3, #1 + subs w3, w3, #2 b.ne .Ldoubleround4 ld4r {v16.4s-v19.4s}, [x0], #16 @@ -344,9 +521,17 @@ ENTRY(chacha20_4block_xor_neon) // x2[0-3] += s0[2] // x3[0-3] += s0[3] add v0.4s, v0.4s, v16.4s + mov w6, v16.s[0] + mov w7, v17.s[0] add v1.4s, v1.4s, v17.4s + mov w8, v18.s[0] + mov w9, v19.s[0] add v2.4s, v2.4s, v18.4s + add a0, a0, w6 + add a1, a1, w7 add v3.4s, v3.4s, v19.4s + add a2, a2, w8 + add a3, a3, w9 ld4r {v24.4s-v27.4s}, [x0], #16 ld4r {v28.4s-v31.4s}, [x0] @@ -356,95 +541,304 @@ ENTRY(chacha20_4block_xor_neon) // x6[0-3] += s1[2] // x7[0-3] += s1[3] add v4.4s, v4.4s, v20.4s + mov w6, v20.s[0] + mov w7, v21.s[0] add v5.4s, v5.4s, v21.4s + mov w8, v22.s[0] + mov w9, v23.s[0] add v6.4s, v6.4s, v22.4s + add a4, a4, w6 + add a5, a5, w7 add v7.4s, v7.4s, v23.4s + add a6, a6, w8 + add a7, a7, w9 // x8[0-3] += s2[0] // x9[0-3] += s2[1] // x10[0-3] += s2[2] // x11[0-3] += s2[3] add v8.4s, v8.4s, v24.4s + mov w6, v24.s[0] + mov w7, v25.s[0] add v9.4s, v9.4s, v25.4s + mov w8, v26.s[0] + mov w9, v27.s[0] add v10.4s, v10.4s, v26.4s + add a8, a8, w6 + add a9, a9, w7 add v11.4s, v11.4s, v27.4s + add a10, a10, w8 + add a11, a11, w9 // x12[0-3] += s3[0] // x13[0-3] += s3[1] // x14[0-3] += s3[2] // x15[0-3] += s3[3] add v12.4s, v12.4s, v28.4s + mov w6, v28.s[0] + mov w7, v29.s[0] add v13.4s, v13.4s, v29.4s + mov w8, v30.s[0] + mov w9, v31.s[0] add v14.4s, v14.4s, v30.4s + add a12, a12, w6 + add a13, a13, w7 add v15.4s, v15.4s, v31.4s + add a14, a14, w8 + add a15, a15, w9 // interleave 32-bit words in state n, n+1 + ldp w6, w7, [x2], #64 zip1 v16.4s, v0.4s, v1.4s + ldp w8, w9, [x2, #-56] + eor a0, a0, w6 zip2 v17.4s, v0.4s, v1.4s + eor a1, a1, w7 zip1 v18.4s, v2.4s, v3.4s + eor a2, a2, w8 zip2 v19.4s, v2.4s, v3.4s + eor a3, a3, w9 + ldp w6, w7, [x2, #-48] zip1 v20.4s, v4.4s, v5.4s + ldp w8, w9, [x2, #-40] + eor a4, a4, w6 zip2 v21.4s, v4.4s, v5.4s + eor a5, a5, w7 zip1 v22.4s, v6.4s, v7.4s + eor a6, a6, w8 zip2 v23.4s, v6.4s, v7.4s + eor a7, a7, w9 + ldp w6, w7, [x2, #-32] zip1 v24.4s, v8.4s, v9.4s + ldp w8, w9, [x2, #-24] + eor a8, a8, w6 zip2 v25.4s, v8.4s, v9.4s + eor a9, a9, w7 zip1 v26.4s, v10.4s, v11.4s + eor a10, a10, w8 zip2 v27.4s, v10.4s, v11.4s + eor a11, a11, w9 + ldp w6, w7, [x2, #-16] zip1 v28.4s, v12.4s, v13.4s + ldp w8, w9, [x2, #-8] + eor a12, a12, w6 zip2 v29.4s, v12.4s, v13.4s + eor a13, a13, w7 zip1 v30.4s, v14.4s, v15.4s + eor a14, a14, w8 zip2 v31.4s, v14.4s, v15.4s + eor a15, a15, w9 + + mov x3, #64 + subs x5, x4, #128 + add x6, x5, x2 + csel x3, x3, xzr, ge + csel x2, x2, x6, ge // interleave 64-bit words in state n, n+2 zip1 v0.2d, v16.2d, v18.2d zip2 v4.2d, v16.2d, v18.2d + stp a0, a1, [x1], #64 zip1 v8.2d, v17.2d, v19.2d zip2 v12.2d, v17.2d, v19.2d - ld1 {v16.16b-v19.16b}, [x2], #64 + stp a2, a3, [x1, #-56] + ld1 {v16.16b-v19.16b}, [x2], x3 + + subs x6, x4, #192 + ccmp x3, xzr, #4, lt + add x7, x6, x2 + csel x3, x3, xzr, eq + csel x2, x2, x7, eq zip1 v1.2d, v20.2d, v22.2d zip2 v5.2d, v20.2d, v22.2d + stp a4, a5, [x1, #-48] zip1 v9.2d, v21.2d, v23.2d zip2 v13.2d, v21.2d, v23.2d - ld1 {v20.16b-v23.16b}, [x2], #64 + stp a6, a7, [x1, #-40] + ld1 {v20.16b-v23.16b}, [x2], x3 + + subs x7, x4, #256 + ccmp x3, xzr, #4, lt + add x8, x7, x2 + csel x3, x3, xzr, eq + csel x2, x2, x8, eq zip1 v2.2d, v24.2d, v26.2d zip2 v6.2d, v24.2d, v26.2d + stp a8, a9, [x1, #-32] zip1 v10.2d, v25.2d, v27.2d zip2 v14.2d, v25.2d, v27.2d - ld1 {v24.16b-v27.16b}, [x2], #64 + stp a10, a11, [x1, #-24] + ld1 {v24.16b-v27.16b}, [x2], x3 + + subs x8, x4, #320 + ccmp x3, xzr, #4, lt + add x9, x8, x2 + csel x2, x2, x9, eq zip1 v3.2d, v28.2d, v30.2d zip2 v7.2d, v28.2d, v30.2d + stp a12, a13, [x1, #-16] zip1 v11.2d, v29.2d, v31.2d zip2 v15.2d, v29.2d, v31.2d + stp a14, a15, [x1, #-8] ld1 {v28.16b-v31.16b}, [x2] // xor with corresponding input, write to output + tbnz x5, #63, 0f eor v16.16b, v16.16b, v0.16b eor v17.16b, v17.16b, v1.16b eor v18.16b, v18.16b, v2.16b eor v19.16b, v19.16b, v3.16b + st1 {v16.16b-v19.16b}, [x1], #64 + cbz x5, .Lout + + tbnz x6, #63, 1f eor v20.16b, v20.16b, v4.16b eor v21.16b, v21.16b, v5.16b - st1 {v16.16b-v19.16b}, [x1], #64 eor v22.16b, v22.16b, v6.16b eor v23.16b, v23.16b, v7.16b + st1 {v20.16b-v23.16b}, [x1], #64 + cbz x6, .Lout + + tbnz x7, #63, 2f eor v24.16b, v24.16b, v8.16b eor v25.16b, v25.16b, v9.16b - st1 {v20.16b-v23.16b}, [x1], #64 eor v26.16b, v26.16b, v10.16b eor v27.16b, v27.16b, v11.16b - eor v28.16b, v28.16b, v12.16b st1 {v24.16b-v27.16b}, [x1], #64 + cbz x7, .Lout + + tbnz x8, #63, 3f + eor v28.16b, v28.16b, v12.16b eor v29.16b, v29.16b, v13.16b eor v30.16b, v30.16b, v14.16b eor v31.16b, v31.16b, v15.16b st1 {v28.16b-v31.16b}, [x1] +.Lout: frame_pop ret -ENDPROC(chacha20_4block_xor_neon) -CTRINC: .word 0, 1, 2, 3 + // fewer than 128 bytes of in/output +0: ld1 {v8.16b}, [x10] + ld1 {v9.16b}, [x11] + movi v10.16b, #16 + sub x2, x1, #64 + add x1, x1, x5 + ld1 {v16.16b-v19.16b}, [x2] + tbl v4.16b, {v0.16b-v3.16b}, v8.16b + tbx v20.16b, {v16.16b-v19.16b}, v9.16b + add v8.16b, v8.16b, v10.16b + add v9.16b, v9.16b, v10.16b + tbl v5.16b, {v0.16b-v3.16b}, v8.16b + tbx v21.16b, {v16.16b-v19.16b}, v9.16b + add v8.16b, v8.16b, v10.16b + add v9.16b, v9.16b, v10.16b + tbl v6.16b, {v0.16b-v3.16b}, v8.16b + tbx v22.16b, {v16.16b-v19.16b}, v9.16b + add v8.16b, v8.16b, v10.16b + add v9.16b, v9.16b, v10.16b + tbl v7.16b, {v0.16b-v3.16b}, v8.16b + tbx v23.16b, {v16.16b-v19.16b}, v9.16b + + eor v20.16b, v20.16b, v4.16b + eor v21.16b, v21.16b, v5.16b + eor v22.16b, v22.16b, v6.16b + eor v23.16b, v23.16b, v7.16b + st1 {v20.16b-v23.16b}, [x1] + b .Lout + + // fewer than 192 bytes of in/output +1: ld1 {v8.16b}, [x10] + ld1 {v9.16b}, [x11] + movi v10.16b, #16 + add x1, x1, x6 + tbl v0.16b, {v4.16b-v7.16b}, v8.16b + tbx v20.16b, {v16.16b-v19.16b}, v9.16b + add v8.16b, v8.16b, v10.16b + add v9.16b, v9.16b, v10.16b + tbl v1.16b, {v4.16b-v7.16b}, v8.16b + tbx v21.16b, {v16.16b-v19.16b}, v9.16b + add v8.16b, v8.16b, v10.16b + add v9.16b, v9.16b, v10.16b + tbl v2.16b, {v4.16b-v7.16b}, v8.16b + tbx v22.16b, {v16.16b-v19.16b}, v9.16b + add v8.16b, v8.16b, v10.16b + add v9.16b, v9.16b, v10.16b + tbl v3.16b, {v4.16b-v7.16b}, v8.16b + tbx v23.16b, {v16.16b-v19.16b}, v9.16b + + eor v20.16b, v20.16b, v0.16b + eor v21.16b, v21.16b, v1.16b + eor v22.16b, v22.16b, v2.16b + eor v23.16b, v23.16b, v3.16b + st1 {v20.16b-v23.16b}, [x1] + b .Lout + + // fewer than 256 bytes of in/output +2: ld1 {v4.16b}, [x10] + ld1 {v5.16b}, [x11] + movi v6.16b, #16 + add x1, x1, x7 + tbl v0.16b, {v8.16b-v11.16b}, v4.16b + tbx v24.16b, {v20.16b-v23.16b}, v5.16b + add v4.16b, v4.16b, v6.16b + add v5.16b, v5.16b, v6.16b + tbl v1.16b, {v8.16b-v11.16b}, v4.16b + tbx v25.16b, {v20.16b-v23.16b}, v5.16b + add v4.16b, v4.16b, v6.16b + add v5.16b, v5.16b, v6.16b + tbl v2.16b, {v8.16b-v11.16b}, v4.16b + tbx v26.16b, {v20.16b-v23.16b}, v5.16b + add v4.16b, v4.16b, v6.16b + add v5.16b, v5.16b, v6.16b + tbl v3.16b, {v8.16b-v11.16b}, v4.16b + tbx v27.16b, {v20.16b-v23.16b}, v5.16b + + eor v24.16b, v24.16b, v0.16b + eor v25.16b, v25.16b, v1.16b + eor v26.16b, v26.16b, v2.16b + eor v27.16b, v27.16b, v3.16b + st1 {v24.16b-v27.16b}, [x1] + b .Lout + + // fewer than 320 bytes of in/output +3: ld1 {v4.16b}, [x10] + ld1 {v5.16b}, [x11] + movi v6.16b, #16 + add x1, x1, x8 + tbl v0.16b, {v12.16b-v15.16b}, v4.16b + tbx v28.16b, {v24.16b-v27.16b}, v5.16b + add v4.16b, v4.16b, v6.16b + add v5.16b, v5.16b, v6.16b + tbl v1.16b, {v12.16b-v15.16b}, v4.16b + tbx v29.16b, {v24.16b-v27.16b}, v5.16b + add v4.16b, v4.16b, v6.16b + add v5.16b, v5.16b, v6.16b + tbl v2.16b, {v12.16b-v15.16b}, v4.16b + tbx v30.16b, {v24.16b-v27.16b}, v5.16b + add v4.16b, v4.16b, v6.16b + add v5.16b, v5.16b, v6.16b + tbl v3.16b, {v12.16b-v15.16b}, v4.16b + tbx v31.16b, {v24.16b-v27.16b}, v5.16b + + eor v28.16b, v28.16b, v0.16b + eor v29.16b, v29.16b, v1.16b + eor v30.16b, v30.16b, v2.16b + eor v31.16b, v31.16b, v3.16b + st1 {v28.16b-v31.16b}, [x1] + b .Lout +ENDPROC(chacha_4block_xor_neon) + + .section ".rodata", "a", %progbits + .align L1_CACHE_SHIFT +.Lpermute: + .set .Li, 0 + .rept 192 + .byte (.Li - 64) + .set .Li, .Li + 1 + .endr + +CTRINC: .word 1, 2, 3, 4 ROT8: .word 0x02010003, 0x06050407, 0x0a09080b, 0x0e0d0c0f diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c new file mode 100644 index 000000000000..bece1d85bd81 --- /dev/null +++ b/arch/arm64/crypto/chacha-neon-glue.c @@ -0,0 +1,198 @@ +/* + * ARM NEON accelerated ChaCha and XChaCha stream ciphers, + * including ChaCha20 (RFC7539) + * + * Copyright (C) 2016 - 2017 Linaro, Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on: + * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +asmlinkage void chacha_block_xor_neon(u32 *state, u8 *dst, const u8 *src, + int nrounds); +asmlinkage void chacha_4block_xor_neon(u32 *state, u8 *dst, const u8 *src, + int nrounds, int bytes); +asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); + +static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, + int bytes, int nrounds) +{ + while (bytes > 0) { + int l = min(bytes, CHACHA_BLOCK_SIZE * 5); + + if (l <= CHACHA_BLOCK_SIZE) { + u8 buf[CHACHA_BLOCK_SIZE]; + + memcpy(buf, src, l); + chacha_block_xor_neon(state, buf, buf, nrounds); + memcpy(dst, buf, l); + state[12] += 1; + break; + } + chacha_4block_xor_neon(state, dst, src, nrounds, l); + bytes -= CHACHA_BLOCK_SIZE * 5; + src += CHACHA_BLOCK_SIZE * 5; + dst += CHACHA_BLOCK_SIZE * 5; + state[12] += 5; + } +} + +static int chacha_neon_stream_xor(struct skcipher_request *req, + struct chacha_ctx *ctx, u8 *iv) +{ + struct skcipher_walk walk; + u32 state[16]; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + crypto_chacha_init(state, ctx, iv); + + while (walk.nbytes > 0) { + unsigned int nbytes = walk.nbytes; + + if (nbytes < walk.total) + nbytes = rounddown(nbytes, walk.stride); + + kernel_neon_begin(); + chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr, + nbytes, ctx->nrounds); + kernel_neon_end(); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + } + + return err; +} + +static int chacha_neon(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + + if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd()) + return crypto_chacha_crypt(req); + + return chacha_neon_stream_xor(req, ctx, req->iv); +} + +static int xchacha_neon(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + struct chacha_ctx subctx; + u32 state[16]; + u8 real_iv[16]; + + if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd()) + return crypto_xchacha_crypt(req); + + crypto_chacha_init(state, ctx, req->iv); + + kernel_neon_begin(); + hchacha_block_neon(state, subctx.key, ctx->nrounds); + kernel_neon_end(); + subctx.nrounds = ctx->nrounds; + + memcpy(&real_iv[0], req->iv + 24, 8); + memcpy(&real_iv[8], req->iv + 16, 8); + return chacha_neon_stream_xor(req, &subctx, real_iv); +} + +static struct skcipher_alg algs[] = { + { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-neon", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = CHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .walksize = 5 * CHACHA_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = chacha_neon, + .decrypt = chacha_neon, + }, { + .base.cra_name = "xchacha20", + .base.cra_driver_name = "xchacha20-neon", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .walksize = 5 * CHACHA_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = xchacha_neon, + .decrypt = xchacha_neon, + }, { + .base.cra_name = "xchacha12", + .base.cra_driver_name = "xchacha12-neon", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .walksize = 5 * CHACHA_BLOCK_SIZE, + .setkey = crypto_chacha12_setkey, + .encrypt = xchacha_neon, + .decrypt = xchacha_neon, + } +}; + +static int __init chacha_simd_mod_init(void) +{ + if (!(elf_hwcap & HWCAP_ASIMD)) + return -ENODEV; + + return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); +} + +static void __exit chacha_simd_mod_fini(void) +{ + crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); +} + +module_init(chacha_simd_mod_init); +module_exit(chacha_simd_mod_fini); + +MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)"); +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("chacha20"); +MODULE_ALIAS_CRYPTO("chacha20-neon"); +MODULE_ALIAS_CRYPTO("xchacha20"); +MODULE_ALIAS_CRYPTO("xchacha20-neon"); +MODULE_ALIAS_CRYPTO("xchacha12"); +MODULE_ALIAS_CRYPTO("xchacha12-neon"); diff --git a/arch/arm64/crypto/chacha20-neon-glue.c b/arch/arm64/crypto/chacha20-neon-glue.c deleted file mode 100644 index 727579c93ded..000000000000 --- a/arch/arm64/crypto/chacha20-neon-glue.c +++ /dev/null @@ -1,133 +0,0 @@ -/* - * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions - * - * Copyright (C) 2016 - 2017 Linaro, Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Based on: - * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code - * - * Copyright (C) 2015 Martin Willi - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include -#include -#include -#include -#include - -#include -#include -#include - -asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src); -asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src); - -static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src, - unsigned int bytes) -{ - u8 buf[CHACHA20_BLOCK_SIZE]; - - while (bytes >= CHACHA20_BLOCK_SIZE * 4) { - kernel_neon_begin(); - chacha20_4block_xor_neon(state, dst, src); - kernel_neon_end(); - bytes -= CHACHA20_BLOCK_SIZE * 4; - src += CHACHA20_BLOCK_SIZE * 4; - dst += CHACHA20_BLOCK_SIZE * 4; - state[12] += 4; - } - - if (!bytes) - return; - - kernel_neon_begin(); - while (bytes >= CHACHA20_BLOCK_SIZE) { - chacha20_block_xor_neon(state, dst, src); - bytes -= CHACHA20_BLOCK_SIZE; - src += CHACHA20_BLOCK_SIZE; - dst += CHACHA20_BLOCK_SIZE; - state[12]++; - } - if (bytes) { - memcpy(buf, src, bytes); - chacha20_block_xor_neon(state, buf, buf); - memcpy(dst, buf, bytes); - } - kernel_neon_end(); -} - -static int chacha20_neon(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - u32 state[16]; - int err; - - if (!may_use_simd() || req->cryptlen <= CHACHA20_BLOCK_SIZE) - return crypto_chacha20_crypt(req); - - err = skcipher_walk_virt(&walk, req, false); - - crypto_chacha20_init(state, ctx, walk.iv); - - while (walk.nbytes > 0) { - unsigned int nbytes = walk.nbytes; - - if (nbytes < walk.total) - nbytes = round_down(nbytes, walk.stride); - - chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr, - nbytes); - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); - } - - return err; -} - -static struct skcipher_alg alg = { - .base.cra_name = "chacha20", - .base.cra_driver_name = "chacha20-neon", - .base.cra_priority = 300, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha20_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA20_KEY_SIZE, - .max_keysize = CHACHA20_KEY_SIZE, - .ivsize = CHACHA20_IV_SIZE, - .chunksize = CHACHA20_BLOCK_SIZE, - .walksize = 4 * CHACHA20_BLOCK_SIZE, - .setkey = crypto_chacha20_setkey, - .encrypt = chacha20_neon, - .decrypt = chacha20_neon, -}; - -static int __init chacha20_simd_mod_init(void) -{ - if (!(elf_hwcap & HWCAP_ASIMD)) - return -ENODEV; - - return crypto_register_skcipher(&alg); -} - -static void __exit chacha20_simd_mod_fini(void) -{ - crypto_unregister_skcipher(&alg); -} - -module_init(chacha20_simd_mod_init); -module_exit(chacha20_simd_mod_fini); - -MODULE_AUTHOR("Ard Biesheuvel "); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS_CRYPTO("chacha20"); diff --git a/arch/arm64/crypto/nh-neon-core.S b/arch/arm64/crypto/nh-neon-core.S new file mode 100644 index 000000000000..e05570c38de7 --- /dev/null +++ b/arch/arm64/crypto/nh-neon-core.S @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * NH - ε-almost-universal hash function, ARM64 NEON accelerated version + * + * Copyright 2018 Google LLC + * + * Author: Eric Biggers + */ + +#include + + KEY .req x0 + MESSAGE .req x1 + MESSAGE_LEN .req x2 + HASH .req x3 + + PASS0_SUMS .req v0 + PASS1_SUMS .req v1 + PASS2_SUMS .req v2 + PASS3_SUMS .req v3 + K0 .req v4 + K1 .req v5 + K2 .req v6 + K3 .req v7 + T0 .req v8 + T1 .req v9 + T2 .req v10 + T3 .req v11 + T4 .req v12 + T5 .req v13 + T6 .req v14 + T7 .req v15 + +.macro _nh_stride k0, k1, k2, k3 + + // Load next message stride + ld1 {T3.16b}, [MESSAGE], #16 + + // Load next key stride + ld1 {\k3\().4s}, [KEY], #16 + + // Add message words to key words + add T0.4s, T3.4s, \k0\().4s + add T1.4s, T3.4s, \k1\().4s + add T2.4s, T3.4s, \k2\().4s + add T3.4s, T3.4s, \k3\().4s + + // Multiply 32x32 => 64 and accumulate + mov T4.d[0], T0.d[1] + mov T5.d[0], T1.d[1] + mov T6.d[0], T2.d[1] + mov T7.d[0], T3.d[1] + umlal PASS0_SUMS.2d, T0.2s, T4.2s + umlal PASS1_SUMS.2d, T1.2s, T5.2s + umlal PASS2_SUMS.2d, T2.2s, T6.2s + umlal PASS3_SUMS.2d, T3.2s, T7.2s +.endm + +/* + * void nh_neon(const u32 *key, const u8 *message, size_t message_len, + * u8 hash[NH_HASH_BYTES]) + * + * It's guaranteed that message_len % 16 == 0. + */ +ENTRY(nh_neon) + + ld1 {K0.4s,K1.4s}, [KEY], #32 + movi PASS0_SUMS.2d, #0 + movi PASS1_SUMS.2d, #0 + ld1 {K2.4s}, [KEY], #16 + movi PASS2_SUMS.2d, #0 + movi PASS3_SUMS.2d, #0 + + subs MESSAGE_LEN, MESSAGE_LEN, #64 + blt .Lloop4_done +.Lloop4: + _nh_stride K0, K1, K2, K3 + _nh_stride K1, K2, K3, K0 + _nh_stride K2, K3, K0, K1 + _nh_stride K3, K0, K1, K2 + subs MESSAGE_LEN, MESSAGE_LEN, #64 + bge .Lloop4 + +.Lloop4_done: + ands MESSAGE_LEN, MESSAGE_LEN, #63 + beq .Ldone + _nh_stride K0, K1, K2, K3 + + subs MESSAGE_LEN, MESSAGE_LEN, #16 + beq .Ldone + _nh_stride K1, K2, K3, K0 + + subs MESSAGE_LEN, MESSAGE_LEN, #16 + beq .Ldone + _nh_stride K2, K3, K0, K1 + +.Ldone: + // Sum the accumulators for each pass, then store the sums to 'hash' + addp T0.2d, PASS0_SUMS.2d, PASS1_SUMS.2d + addp T1.2d, PASS2_SUMS.2d, PASS3_SUMS.2d + st1 {T0.16b,T1.16b}, [HASH] + ret +ENDPROC(nh_neon) diff --git a/arch/arm64/crypto/nhpoly1305-neon-glue.c b/arch/arm64/crypto/nhpoly1305-neon-glue.c new file mode 100644 index 000000000000..22cc32ac9448 --- /dev/null +++ b/arch/arm64/crypto/nhpoly1305-neon-glue.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NHPoly1305 - ε-almost-∆-universal hash function for Adiantum + * (ARM64 NEON accelerated version) + * + * Copyright 2018 Google LLC + */ + +#include +#include +#include +#include +#include + +asmlinkage void nh_neon(const u32 *key, const u8 *message, size_t message_len, + u8 hash[NH_HASH_BYTES]); + +/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */ +static void _nh_neon(const u32 *key, const u8 *message, size_t message_len, + __le64 hash[NH_NUM_PASSES]) +{ + nh_neon(key, message, message_len, (u8 *)hash); +} + +static int nhpoly1305_neon_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + if (srclen < 64 || !may_use_simd()) + return crypto_nhpoly1305_update(desc, src, srclen); + + do { + unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE); + + kernel_neon_begin(); + crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon); + kernel_neon_end(); + src += n; + srclen -= n; + } while (srclen); + return 0; +} + +static struct shash_alg nhpoly1305_alg = { + .base.cra_name = "nhpoly1305", + .base.cra_driver_name = "nhpoly1305-neon", + .base.cra_priority = 200, + .base.cra_ctxsize = sizeof(struct nhpoly1305_key), + .base.cra_module = THIS_MODULE, + .digestsize = POLY1305_DIGEST_SIZE, + .init = crypto_nhpoly1305_init, + .update = nhpoly1305_neon_update, + .final = crypto_nhpoly1305_final, + .setkey = crypto_nhpoly1305_setkey, + .descsize = sizeof(struct nhpoly1305_state), +}; + +static int __init nhpoly1305_mod_init(void) +{ + if (!(elf_hwcap & HWCAP_ASIMD)) + return -ENODEV; + + return crypto_register_shash(&nhpoly1305_alg); +} + +static void __exit nhpoly1305_mod_exit(void) +{ + crypto_unregister_shash(&nhpoly1305_alg); +} + +module_init(nhpoly1305_mod_init); +module_exit(nhpoly1305_mod_exit); + +MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function (NEON-accelerated)"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Eric Biggers "); +MODULE_ALIAS_CRYPTO("nhpoly1305"); +MODULE_ALIAS_CRYPTO("nhpoly1305-neon"); diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 812d9498d97b..dd456725189f 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -137,7 +137,7 @@ static int fallback_init_cip(struct crypto_tfm *tfm) struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); sctx->fallback.cip = crypto_alloc_cipher(name, 0, - CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); + CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(sctx->fallback.cip)) { pr_err("Allocating AES fallback algorithm %s failed\n", diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c index 3cd4f6b198b6..a9b8b0b94a8d 100644 --- a/arch/sparc/crypto/aes_glue.c +++ b/arch/sparc/crypto/aes_glue.c @@ -476,11 +476,6 @@ static bool __init sparc64_has_aes_opcode(void) static int __init aes_sparc64_mod_init(void) { - int i; - - for (i = 0; i < ARRAY_SIZE(algs); i++) - INIT_LIST_HEAD(&algs[i].cra_list); - if (sparc64_has_aes_opcode()) { pr_info("Using sparc64 aes opcodes optimized AES implementation\n"); return crypto_register_algs(algs, ARRAY_SIZE(algs)); diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c index 561a84d93cf6..900d5c617e83 100644 --- a/arch/sparc/crypto/camellia_glue.c +++ b/arch/sparc/crypto/camellia_glue.c @@ -299,11 +299,6 @@ static bool __init sparc64_has_camellia_opcode(void) static int __init camellia_sparc64_mod_init(void) { - int i; - - for (i = 0; i < ARRAY_SIZE(algs); i++) - INIT_LIST_HEAD(&algs[i].cra_list); - if (sparc64_has_camellia_opcode()) { pr_info("Using sparc64 camellia opcodes optimized CAMELLIA implementation\n"); return crypto_register_algs(algs, ARRAY_SIZE(algs)); diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c index 61af794aa2d3..56499ea39fd3 100644 --- a/arch/sparc/crypto/des_glue.c +++ b/arch/sparc/crypto/des_glue.c @@ -510,11 +510,6 @@ static bool __init sparc64_has_des_opcode(void) static int __init des_sparc64_mod_init(void) { - int i; - - for (i = 0; i < ARRAY_SIZE(algs); i++) - INIT_LIST_HEAD(&algs[i].cra_list); - if (sparc64_has_des_opcode()) { pr_info("Using sparc64 des opcodes optimized DES implementation\n"); return crypto_register_algs(algs, ARRAY_SIZE(algs)); diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index a4b0007a54e1..45734e1cf967 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -8,6 +8,7 @@ OBJECT_FILES_NON_STANDARD := y avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no) avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ $(comma)4)$(comma)%ymm2,yes,no) +avx512_supported :=$(call as-instr,vpmovm2b %k1$(comma)%zmm5,yes,no) sha1_ni_supported :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,yes,no) sha256_ni_supported :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,yes,no) @@ -23,7 +24,7 @@ obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o -obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha20-x86_64.o +obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha-x86_64.o obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o @@ -46,6 +47,9 @@ obj-$(CONFIG_CRYPTO_MORUS1280_GLUE) += morus1280_glue.o obj-$(CONFIG_CRYPTO_MORUS640_SSE2) += morus640-sse2.o obj-$(CONFIG_CRYPTO_MORUS1280_SSE2) += morus1280-sse2.o +obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o +obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o + # These modules require assembler to support AVX. ifeq ($(avx_supported),yes) obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64) += \ @@ -74,7 +78,7 @@ camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o -chacha20-x86_64-y := chacha20-ssse3-x86_64.o chacha20_glue.o +chacha-x86_64-y := chacha-ssse3-x86_64.o chacha_glue.o serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o @@ -84,6 +88,8 @@ aegis256-aesni-y := aegis256-aesni-asm.o aegis256-aesni-glue.o morus640-sse2-y := morus640-sse2-asm.o morus640-sse2-glue.o morus1280-sse2-y := morus1280-sse2-asm.o morus1280-sse2-glue.o +nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o + ifeq ($(avx_supported),yes) camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \ camellia_aesni_avx_glue.o @@ -97,10 +103,16 @@ endif ifeq ($(avx2_supported),yes) camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o - chacha20-x86_64-y += chacha20-avx2-x86_64.o + chacha-x86_64-y += chacha-avx2-x86_64.o serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o morus1280-avx2-y := morus1280-avx2-asm.o morus1280-avx2-glue.o + + nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o +endif + +ifeq ($(avx512_supported),yes) + chacha-x86_64-y += chacha-avx512vl-x86_64.o endif aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S index 1985ea0b551b..91c039ab5699 100644 --- a/arch/x86/crypto/aesni-intel_avx-x86_64.S +++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S @@ -182,43 +182,30 @@ aad_shift_arr: .text -##define the fields of the gcm aes context -#{ -# u8 expanded_keys[16*11] store expanded keys -# u8 shifted_hkey_1[16] store HashKey <<1 mod poly here -# u8 shifted_hkey_2[16] store HashKey^2 <<1 mod poly here -# u8 shifted_hkey_3[16] store HashKey^3 <<1 mod poly here -# u8 shifted_hkey_4[16] store HashKey^4 <<1 mod poly here -# u8 shifted_hkey_5[16] store HashKey^5 <<1 mod poly here -# u8 shifted_hkey_6[16] store HashKey^6 <<1 mod poly here -# u8 shifted_hkey_7[16] store HashKey^7 <<1 mod poly here -# u8 shifted_hkey_8[16] store HashKey^8 <<1 mod poly here -# u8 shifted_hkey_1_k[16] store XOR HashKey <<1 mod poly here (for Karatsuba purposes) -# u8 shifted_hkey_2_k[16] store XOR HashKey^2 <<1 mod poly here (for Karatsuba purposes) -# u8 shifted_hkey_3_k[16] store XOR HashKey^3 <<1 mod poly here (for Karatsuba purposes) -# u8 shifted_hkey_4_k[16] store XOR HashKey^4 <<1 mod poly here (for Karatsuba purposes) -# u8 shifted_hkey_5_k[16] store XOR HashKey^5 <<1 mod poly here (for Karatsuba purposes) -# u8 shifted_hkey_6_k[16] store XOR HashKey^6 <<1 mod poly here (for Karatsuba purposes) -# u8 shifted_hkey_7_k[16] store XOR HashKey^7 <<1 mod poly here (for Karatsuba purposes) -# u8 shifted_hkey_8_k[16] store XOR HashKey^8 <<1 mod poly here (for Karatsuba purposes) -#} gcm_ctx# +#define AadHash 16*0 +#define AadLen 16*1 +#define InLen (16*1)+8 +#define PBlockEncKey 16*2 +#define OrigIV 16*3 +#define CurCount 16*4 +#define PBlockLen 16*5 -HashKey = 16*11 # store HashKey <<1 mod poly here -HashKey_2 = 16*12 # store HashKey^2 <<1 mod poly here -HashKey_3 = 16*13 # store HashKey^3 <<1 mod poly here -HashKey_4 = 16*14 # store HashKey^4 <<1 mod poly here -HashKey_5 = 16*15 # store HashKey^5 <<1 mod poly here -HashKey_6 = 16*16 # store HashKey^6 <<1 mod poly here -HashKey_7 = 16*17 # store HashKey^7 <<1 mod poly here -HashKey_8 = 16*18 # store HashKey^8 <<1 mod poly here -HashKey_k = 16*19 # store XOR of HashKey <<1 mod poly here (for Karatsuba purposes) -HashKey_2_k = 16*20 # store XOR of HashKey^2 <<1 mod poly here (for Karatsuba purposes) -HashKey_3_k = 16*21 # store XOR of HashKey^3 <<1 mod poly here (for Karatsuba purposes) -HashKey_4_k = 16*22 # store XOR of HashKey^4 <<1 mod poly here (for Karatsuba purposes) -HashKey_5_k = 16*23 # store XOR of HashKey^5 <<1 mod poly here (for Karatsuba purposes) -HashKey_6_k = 16*24 # store XOR of HashKey^6 <<1 mod poly here (for Karatsuba purposes) -HashKey_7_k = 16*25 # store XOR of HashKey^7 <<1 mod poly here (for Karatsuba purposes) -HashKey_8_k = 16*26 # store XOR of HashKey^8 <<1 mod poly here (for Karatsuba purposes) +HashKey = 16*6 # store HashKey <<1 mod poly here +HashKey_2 = 16*7 # store HashKey^2 <<1 mod poly here +HashKey_3 = 16*8 # store HashKey^3 <<1 mod poly here +HashKey_4 = 16*9 # store HashKey^4 <<1 mod poly here +HashKey_5 = 16*10 # store HashKey^5 <<1 mod poly here +HashKey_6 = 16*11 # store HashKey^6 <<1 mod poly here +HashKey_7 = 16*12 # store HashKey^7 <<1 mod poly here +HashKey_8 = 16*13 # store HashKey^8 <<1 mod poly here +HashKey_k = 16*14 # store XOR of HashKey <<1 mod poly here (for Karatsuba purposes) +HashKey_2_k = 16*15 # store XOR of HashKey^2 <<1 mod poly here (for Karatsuba purposes) +HashKey_3_k = 16*16 # store XOR of HashKey^3 <<1 mod poly here (for Karatsuba purposes) +HashKey_4_k = 16*17 # store XOR of HashKey^4 <<1 mod poly here (for Karatsuba purposes) +HashKey_5_k = 16*18 # store XOR of HashKey^5 <<1 mod poly here (for Karatsuba purposes) +HashKey_6_k = 16*19 # store XOR of HashKey^6 <<1 mod poly here (for Karatsuba purposes) +HashKey_7_k = 16*20 # store XOR of HashKey^7 <<1 mod poly here (for Karatsuba purposes) +HashKey_8_k = 16*21 # store XOR of HashKey^8 <<1 mod poly here (for Karatsuba purposes) #define arg1 %rdi #define arg2 %rsi @@ -229,6 +216,8 @@ HashKey_8_k = 16*26 # store XOR of HashKey^8 <<1 mod poly here (for Karatsu #define arg7 STACK_OFFSET+8*1(%r14) #define arg8 STACK_OFFSET+8*2(%r14) #define arg9 STACK_OFFSET+8*3(%r14) +#define arg10 STACK_OFFSET+8*4(%r14) +#define keysize 2*15*16(arg1) i = 0 j = 0 @@ -267,19 +256,636 @@ VARIABLE_OFFSET = 16*8 # Utility Macros ################################ -# Encryption of a single block -.macro ENCRYPT_SINGLE_BLOCK XMM0 - vpxor (arg1), \XMM0, \XMM0 - i = 1 - setreg -.rep 9 - vaesenc 16*i(arg1), \XMM0, \XMM0 - i = (i+1) - setreg -.endr - vaesenclast 16*10(arg1), \XMM0, \XMM0 +.macro FUNC_SAVE + #the number of pushes must equal STACK_OFFSET + push %r12 + push %r13 + push %r14 + push %r15 + + mov %rsp, %r14 + + + + sub $VARIABLE_OFFSET, %rsp + and $~63, %rsp # align rsp to 64 bytes .endm +.macro FUNC_RESTORE + mov %r14, %rsp + + pop %r15 + pop %r14 + pop %r13 + pop %r12 +.endm + +# Encryption of a single block +.macro ENCRYPT_SINGLE_BLOCK REP XMM0 + vpxor (arg1), \XMM0, \XMM0 + i = 1 + setreg +.rep \REP + vaesenc 16*i(arg1), \XMM0, \XMM0 + i = (i+1) + setreg +.endr + vaesenclast 16*i(arg1), \XMM0, \XMM0 +.endm + +# combined for GCM encrypt and decrypt functions +# clobbering all xmm registers +# clobbering r10, r11, r12, r13, r14, r15 +.macro GCM_ENC_DEC INITIAL_BLOCKS GHASH_8_ENCRYPT_8_PARALLEL GHASH_LAST_8 GHASH_MUL ENC_DEC REP + vmovdqu AadHash(arg2), %xmm8 + vmovdqu HashKey(arg2), %xmm13 # xmm13 = HashKey + add arg5, InLen(arg2) + + # initialize the data pointer offset as zero + xor %r11d, %r11d + + PARTIAL_BLOCK \GHASH_MUL, arg3, arg4, arg5, %r11, %xmm8, \ENC_DEC + sub %r11, arg5 + + mov arg5, %r13 # save the number of bytes of plaintext/ciphertext + and $-16, %r13 # r13 = r13 - (r13 mod 16) + + mov %r13, %r12 + shr $4, %r12 + and $7, %r12 + jz _initial_num_blocks_is_0\@ + + cmp $7, %r12 + je _initial_num_blocks_is_7\@ + cmp $6, %r12 + je _initial_num_blocks_is_6\@ + cmp $5, %r12 + je _initial_num_blocks_is_5\@ + cmp $4, %r12 + je _initial_num_blocks_is_4\@ + cmp $3, %r12 + je _initial_num_blocks_is_3\@ + cmp $2, %r12 + je _initial_num_blocks_is_2\@ + + jmp _initial_num_blocks_is_1\@ + +_initial_num_blocks_is_7\@: + \INITIAL_BLOCKS \REP, 7, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC + sub $16*7, %r13 + jmp _initial_blocks_encrypted\@ + +_initial_num_blocks_is_6\@: + \INITIAL_BLOCKS \REP, 6, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC + sub $16*6, %r13 + jmp _initial_blocks_encrypted\@ + +_initial_num_blocks_is_5\@: + \INITIAL_BLOCKS \REP, 5, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC + sub $16*5, %r13 + jmp _initial_blocks_encrypted\@ + +_initial_num_blocks_is_4\@: + \INITIAL_BLOCKS \REP, 4, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC + sub $16*4, %r13 + jmp _initial_blocks_encrypted\@ + +_initial_num_blocks_is_3\@: + \INITIAL_BLOCKS \REP, 3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC + sub $16*3, %r13 + jmp _initial_blocks_encrypted\@ + +_initial_num_blocks_is_2\@: + \INITIAL_BLOCKS \REP, 2, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC + sub $16*2, %r13 + jmp _initial_blocks_encrypted\@ + +_initial_num_blocks_is_1\@: + \INITIAL_BLOCKS \REP, 1, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC + sub $16*1, %r13 + jmp _initial_blocks_encrypted\@ + +_initial_num_blocks_is_0\@: + \INITIAL_BLOCKS \REP, 0, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC + + +_initial_blocks_encrypted\@: + cmp $0, %r13 + je _zero_cipher_left\@ + + sub $128, %r13 + je _eight_cipher_left\@ + + + + + vmovd %xmm9, %r15d + and $255, %r15d + vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 + + +_encrypt_by_8_new\@: + cmp $(255-8), %r15d + jg _encrypt_by_8\@ + + + + add $8, %r15b + \GHASH_8_ENCRYPT_8_PARALLEL \REP, %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, out_order, \ENC_DEC + add $128, %r11 + sub $128, %r13 + jne _encrypt_by_8_new\@ + + vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 + jmp _eight_cipher_left\@ + +_encrypt_by_8\@: + vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 + add $8, %r15b + \GHASH_8_ENCRYPT_8_PARALLEL \REP, %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, in_order, \ENC_DEC + vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 + add $128, %r11 + sub $128, %r13 + jne _encrypt_by_8_new\@ + + vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 + + + + +_eight_cipher_left\@: + \GHASH_LAST_8 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8 + + +_zero_cipher_left\@: + vmovdqu %xmm14, AadHash(arg2) + vmovdqu %xmm9, CurCount(arg2) + + # check for 0 length + mov arg5, %r13 + and $15, %r13 # r13 = (arg5 mod 16) + + je _multiple_of_16_bytes\@ + + # handle the last <16 Byte block separately + + mov %r13, PBlockLen(arg2) + + vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn + vmovdqu %xmm9, CurCount(arg2) + vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 + + ENCRYPT_SINGLE_BLOCK \REP, %xmm9 # E(K, Yn) + vmovdqu %xmm9, PBlockEncKey(arg2) + + cmp $16, arg5 + jge _large_enough_update\@ + + lea (arg4,%r11,1), %r10 + mov %r13, %r12 + + READ_PARTIAL_BLOCK %r10 %r12 %xmm1 + + lea SHIFT_MASK+16(%rip), %r12 + sub %r13, %r12 # adjust the shuffle mask pointer to be + # able to shift 16-r13 bytes (r13 is the + # number of bytes in plaintext mod 16) + + jmp _final_ghash_mul\@ + +_large_enough_update\@: + sub $16, %r11 + add %r13, %r11 + + # receive the last <16 Byte block + vmovdqu (arg4, %r11, 1), %xmm1 + + sub %r13, %r11 + add $16, %r11 + + lea SHIFT_MASK+16(%rip), %r12 + # adjust the shuffle mask pointer to be able to shift 16-r13 bytes + # (r13 is the number of bytes in plaintext mod 16) + sub %r13, %r12 + # get the appropriate shuffle mask + vmovdqu (%r12), %xmm2 + # shift right 16-r13 bytes + vpshufb %xmm2, %xmm1, %xmm1 + +_final_ghash_mul\@: + .if \ENC_DEC == DEC + vmovdqa %xmm1, %xmm2 + vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) + vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to + # mask out top 16-r13 bytes of xmm9 + vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 + vpand %xmm1, %xmm2, %xmm2 + vpshufb SHUF_MASK(%rip), %xmm2, %xmm2 + vpxor %xmm2, %xmm14, %xmm14 + + vmovdqu %xmm14, AadHash(arg2) + .else + vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) + vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to + # mask out top 16-r13 bytes of xmm9 + vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 + vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 + vpxor %xmm9, %xmm14, %xmm14 + + vmovdqu %xmm14, AadHash(arg2) + vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 # shuffle xmm9 back to output as ciphertext + .endif + + + ############################# + # output r13 Bytes + vmovq %xmm9, %rax + cmp $8, %r13 + jle _less_than_8_bytes_left\@ + + mov %rax, (arg3 , %r11) + add $8, %r11 + vpsrldq $8, %xmm9, %xmm9 + vmovq %xmm9, %rax + sub $8, %r13 + +_less_than_8_bytes_left\@: + movb %al, (arg3 , %r11) + add $1, %r11 + shr $8, %rax + sub $1, %r13 + jne _less_than_8_bytes_left\@ + ############################# + +_multiple_of_16_bytes\@: +.endm + + +# GCM_COMPLETE Finishes update of tag of last partial block +# Output: Authorization Tag (AUTH_TAG) +# Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15 +.macro GCM_COMPLETE GHASH_MUL REP AUTH_TAG AUTH_TAG_LEN + vmovdqu AadHash(arg2), %xmm14 + vmovdqu HashKey(arg2), %xmm13 + + mov PBlockLen(arg2), %r12 + cmp $0, %r12 + je _partial_done\@ + + #GHASH computation for the last <16 Byte block + \GHASH_MUL %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 + +_partial_done\@: + mov AadLen(arg2), %r12 # r12 = aadLen (number of bytes) + shl $3, %r12 # convert into number of bits + vmovd %r12d, %xmm15 # len(A) in xmm15 + + mov InLen(arg2), %r12 + shl $3, %r12 # len(C) in bits (*128) + vmovq %r12, %xmm1 + vpslldq $8, %xmm15, %xmm15 # xmm15 = len(A)|| 0x0000000000000000 + vpxor %xmm1, %xmm15, %xmm15 # xmm15 = len(A)||len(C) + + vpxor %xmm15, %xmm14, %xmm14 + \GHASH_MUL %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 # final GHASH computation + vpshufb SHUF_MASK(%rip), %xmm14, %xmm14 # perform a 16Byte swap + + vmovdqu OrigIV(arg2), %xmm9 + + ENCRYPT_SINGLE_BLOCK \REP, %xmm9 # E(K, Y0) + + vpxor %xmm14, %xmm9, %xmm9 + + + +_return_T\@: + mov \AUTH_TAG, %r10 # r10 = authTag + mov \AUTH_TAG_LEN, %r11 # r11 = auth_tag_len + + cmp $16, %r11 + je _T_16\@ + + cmp $8, %r11 + jl _T_4\@ + +_T_8\@: + vmovq %xmm9, %rax + mov %rax, (%r10) + add $8, %r10 + sub $8, %r11 + vpsrldq $8, %xmm9, %xmm9 + cmp $0, %r11 + je _return_T_done\@ +_T_4\@: + vmovd %xmm9, %eax + mov %eax, (%r10) + add $4, %r10 + sub $4, %r11 + vpsrldq $4, %xmm9, %xmm9 + cmp $0, %r11 + je _return_T_done\@ +_T_123\@: + vmovd %xmm9, %eax + cmp $2, %r11 + jl _T_1\@ + mov %ax, (%r10) + cmp $2, %r11 + je _return_T_done\@ + add $2, %r10 + sar $16, %eax +_T_1\@: + mov %al, (%r10) + jmp _return_T_done\@ + +_T_16\@: + vmovdqu %xmm9, (%r10) + +_return_T_done\@: +.endm + +.macro CALC_AAD_HASH GHASH_MUL AAD AADLEN T1 T2 T3 T4 T5 T6 T7 T8 + + mov \AAD, %r10 # r10 = AAD + mov \AADLEN, %r12 # r12 = aadLen + + + mov %r12, %r11 + + vpxor \T8, \T8, \T8 + vpxor \T7, \T7, \T7 + cmp $16, %r11 + jl _get_AAD_rest8\@ +_get_AAD_blocks\@: + vmovdqu (%r10), \T7 + vpshufb SHUF_MASK(%rip), \T7, \T7 + vpxor \T7, \T8, \T8 + \GHASH_MUL \T8, \T2, \T1, \T3, \T4, \T5, \T6 + add $16, %r10 + sub $16, %r12 + sub $16, %r11 + cmp $16, %r11 + jge _get_AAD_blocks\@ + vmovdqu \T8, \T7 + cmp $0, %r11 + je _get_AAD_done\@ + + vpxor \T7, \T7, \T7 + + /* read the last <16B of AAD. since we have at least 4B of + data right after the AAD (the ICV, and maybe some CT), we can + read 4B/8B blocks safely, and then get rid of the extra stuff */ +_get_AAD_rest8\@: + cmp $4, %r11 + jle _get_AAD_rest4\@ + movq (%r10), \T1 + add $8, %r10 + sub $8, %r11 + vpslldq $8, \T1, \T1 + vpsrldq $8, \T7, \T7 + vpxor \T1, \T7, \T7 + jmp _get_AAD_rest8\@ +_get_AAD_rest4\@: + cmp $0, %r11 + jle _get_AAD_rest0\@ + mov (%r10), %eax + movq %rax, \T1 + add $4, %r10 + sub $4, %r11 + vpslldq $12, \T1, \T1 + vpsrldq $4, \T7, \T7 + vpxor \T1, \T7, \T7 +_get_AAD_rest0\@: + /* finalize: shift out the extra bytes we read, and align + left. since pslldq can only shift by an immediate, we use + vpshufb and an array of shuffle masks */ + movq %r12, %r11 + salq $4, %r11 + vmovdqu aad_shift_arr(%r11), \T1 + vpshufb \T1, \T7, \T7 +_get_AAD_rest_final\@: + vpshufb SHUF_MASK(%rip), \T7, \T7 + vpxor \T8, \T7, \T7 + \GHASH_MUL \T7, \T2, \T1, \T3, \T4, \T5, \T6 + +_get_AAD_done\@: + vmovdqu \T7, AadHash(arg2) +.endm + +.macro INIT GHASH_MUL PRECOMPUTE + mov arg6, %r11 + mov %r11, AadLen(arg2) # ctx_data.aad_length = aad_length + xor %r11d, %r11d + mov %r11, InLen(arg2) # ctx_data.in_length = 0 + + mov %r11, PBlockLen(arg2) # ctx_data.partial_block_length = 0 + mov %r11, PBlockEncKey(arg2) # ctx_data.partial_block_enc_key = 0 + mov arg3, %rax + movdqu (%rax), %xmm0 + movdqu %xmm0, OrigIV(arg2) # ctx_data.orig_IV = iv + + vpshufb SHUF_MASK(%rip), %xmm0, %xmm0 + movdqu %xmm0, CurCount(arg2) # ctx_data.current_counter = iv + + vmovdqu (arg4), %xmm6 # xmm6 = HashKey + + vpshufb SHUF_MASK(%rip), %xmm6, %xmm6 + ############### PRECOMPUTATION of HashKey<<1 mod poly from the HashKey + vmovdqa %xmm6, %xmm2 + vpsllq $1, %xmm6, %xmm6 + vpsrlq $63, %xmm2, %xmm2 + vmovdqa %xmm2, %xmm1 + vpslldq $8, %xmm2, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpor %xmm2, %xmm6, %xmm6 + #reduction + vpshufd $0b00100100, %xmm1, %xmm2 + vpcmpeqd TWOONE(%rip), %xmm2, %xmm2 + vpand POLY(%rip), %xmm2, %xmm2 + vpxor %xmm2, %xmm6, %xmm6 # xmm6 holds the HashKey<<1 mod poly + ####################################################################### + vmovdqu %xmm6, HashKey(arg2) # store HashKey<<1 mod poly + + CALC_AAD_HASH \GHASH_MUL, arg5, arg6, %xmm2, %xmm6, %xmm3, %xmm4, %xmm5, %xmm7, %xmm1, %xmm0 + + \PRECOMPUTE %xmm6, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5 +.endm + + +# Reads DLEN bytes starting at DPTR and stores in XMMDst +# where 0 < DLEN < 16 +# Clobbers %rax, DLEN +.macro READ_PARTIAL_BLOCK DPTR DLEN XMMDst + vpxor \XMMDst, \XMMDst, \XMMDst + + cmp $8, \DLEN + jl _read_lt8_\@ + mov (\DPTR), %rax + vpinsrq $0, %rax, \XMMDst, \XMMDst + sub $8, \DLEN + jz _done_read_partial_block_\@ + xor %eax, %eax +_read_next_byte_\@: + shl $8, %rax + mov 7(\DPTR, \DLEN, 1), %al + dec \DLEN + jnz _read_next_byte_\@ + vpinsrq $1, %rax, \XMMDst, \XMMDst + jmp _done_read_partial_block_\@ +_read_lt8_\@: + xor %eax, %eax +_read_next_byte_lt8_\@: + shl $8, %rax + mov -1(\DPTR, \DLEN, 1), %al + dec \DLEN + jnz _read_next_byte_lt8_\@ + vpinsrq $0, %rax, \XMMDst, \XMMDst +_done_read_partial_block_\@: +.endm + +# PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks +# between update calls. +# Requires the input data be at least 1 byte long due to READ_PARTIAL_BLOCK +# Outputs encrypted bytes, and updates hash and partial info in gcm_data_context +# Clobbers rax, r10, r12, r13, xmm0-6, xmm9-13 +.macro PARTIAL_BLOCK GHASH_MUL CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \ + AAD_HASH ENC_DEC + mov PBlockLen(arg2), %r13 + cmp $0, %r13 + je _partial_block_done_\@ # Leave Macro if no partial blocks + # Read in input data without over reading + cmp $16, \PLAIN_CYPH_LEN + jl _fewer_than_16_bytes_\@ + vmovdqu (\PLAIN_CYPH_IN), %xmm1 # If more than 16 bytes, just fill xmm + jmp _data_read_\@ + +_fewer_than_16_bytes_\@: + lea (\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10 + mov \PLAIN_CYPH_LEN, %r12 + READ_PARTIAL_BLOCK %r10 %r12 %xmm1 + + mov PBlockLen(arg2), %r13 + +_data_read_\@: # Finished reading in data + + vmovdqu PBlockEncKey(arg2), %xmm9 + vmovdqu HashKey(arg2), %xmm13 + + lea SHIFT_MASK(%rip), %r12 + + # adjust the shuffle mask pointer to be able to shift r13 bytes + # r16-r13 is the number of bytes in plaintext mod 16) + add %r13, %r12 + vmovdqu (%r12), %xmm2 # get the appropriate shuffle mask + vpshufb %xmm2, %xmm9, %xmm9 # shift right r13 bytes + +.if \ENC_DEC == DEC + vmovdqa %xmm1, %xmm3 + pxor %xmm1, %xmm9 # Cyphertext XOR E(K, Yn) + + mov \PLAIN_CYPH_LEN, %r10 + add %r13, %r10 + # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling + sub $16, %r10 + # Determine if if partial block is not being filled and + # shift mask accordingly + jge _no_extra_mask_1_\@ + sub %r10, %r12 +_no_extra_mask_1_\@: + + vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 + # get the appropriate mask to mask out bottom r13 bytes of xmm9 + vpand %xmm1, %xmm9, %xmm9 # mask out bottom r13 bytes of xmm9 + + vpand %xmm1, %xmm3, %xmm3 + vmovdqa SHUF_MASK(%rip), %xmm10 + vpshufb %xmm10, %xmm3, %xmm3 + vpshufb %xmm2, %xmm3, %xmm3 + vpxor %xmm3, \AAD_HASH, \AAD_HASH + + cmp $0, %r10 + jl _partial_incomplete_1_\@ + + # GHASH computation for the last <16 Byte block + \GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 + xor %eax,%eax + + mov %rax, PBlockLen(arg2) + jmp _dec_done_\@ +_partial_incomplete_1_\@: + add \PLAIN_CYPH_LEN, PBlockLen(arg2) +_dec_done_\@: + vmovdqu \AAD_HASH, AadHash(arg2) +.else + vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) + + mov \PLAIN_CYPH_LEN, %r10 + add %r13, %r10 + # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling + sub $16, %r10 + # Determine if if partial block is not being filled and + # shift mask accordingly + jge _no_extra_mask_2_\@ + sub %r10, %r12 +_no_extra_mask_2_\@: + + vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 + # get the appropriate mask to mask out bottom r13 bytes of xmm9 + vpand %xmm1, %xmm9, %xmm9 + + vmovdqa SHUF_MASK(%rip), %xmm1 + vpshufb %xmm1, %xmm9, %xmm9 + vpshufb %xmm2, %xmm9, %xmm9 + vpxor %xmm9, \AAD_HASH, \AAD_HASH + + cmp $0, %r10 + jl _partial_incomplete_2_\@ + + # GHASH computation for the last <16 Byte block + \GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 + xor %eax,%eax + + mov %rax, PBlockLen(arg2) + jmp _encode_done_\@ +_partial_incomplete_2_\@: + add \PLAIN_CYPH_LEN, PBlockLen(arg2) +_encode_done_\@: + vmovdqu \AAD_HASH, AadHash(arg2) + + vmovdqa SHUF_MASK(%rip), %xmm10 + # shuffle xmm9 back to output as ciphertext + vpshufb %xmm10, %xmm9, %xmm9 + vpshufb %xmm2, %xmm9, %xmm9 +.endif + # output encrypted Bytes + cmp $0, %r10 + jl _partial_fill_\@ + mov %r13, %r12 + mov $16, %r13 + # Set r13 to be the number of bytes to write out + sub %r12, %r13 + jmp _count_set_\@ +_partial_fill_\@: + mov \PLAIN_CYPH_LEN, %r13 +_count_set_\@: + vmovdqa %xmm9, %xmm0 + vmovq %xmm0, %rax + cmp $8, %r13 + jle _less_than_8_bytes_left_\@ + + mov %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) + add $8, \DATA_OFFSET + psrldq $8, %xmm0 + vmovq %xmm0, %rax + sub $8, %r13 +_less_than_8_bytes_left_\@: + movb %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) + add $1, \DATA_OFFSET + shr $8, %rax + sub $1, %r13 + jne _less_than_8_bytes_left_\@ +_partial_block_done_\@: +.endm # PARTIAL_BLOCK + #ifdef CONFIG_AS_AVX ############################################################################### # GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) @@ -341,49 +947,49 @@ VARIABLE_OFFSET = 16*8 vpshufd $0b01001110, \T5, \T1 vpxor \T5, \T1, \T1 - vmovdqa \T1, HashKey_k(arg1) + vmovdqu \T1, HashKey_k(arg2) GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^2<<1 mod poly - vmovdqa \T5, HashKey_2(arg1) # [HashKey_2] = HashKey^2<<1 mod poly + vmovdqu \T5, HashKey_2(arg2) # [HashKey_2] = HashKey^2<<1 mod poly vpshufd $0b01001110, \T5, \T1 vpxor \T5, \T1, \T1 - vmovdqa \T1, HashKey_2_k(arg1) + vmovdqu \T1, HashKey_2_k(arg2) GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^3<<1 mod poly - vmovdqa \T5, HashKey_3(arg1) + vmovdqu \T5, HashKey_3(arg2) vpshufd $0b01001110, \T5, \T1 vpxor \T5, \T1, \T1 - vmovdqa \T1, HashKey_3_k(arg1) + vmovdqu \T1, HashKey_3_k(arg2) GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^4<<1 mod poly - vmovdqa \T5, HashKey_4(arg1) + vmovdqu \T5, HashKey_4(arg2) vpshufd $0b01001110, \T5, \T1 vpxor \T5, \T1, \T1 - vmovdqa \T1, HashKey_4_k(arg1) + vmovdqu \T1, HashKey_4_k(arg2) GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^5<<1 mod poly - vmovdqa \T5, HashKey_5(arg1) + vmovdqu \T5, HashKey_5(arg2) vpshufd $0b01001110, \T5, \T1 vpxor \T5, \T1, \T1 - vmovdqa \T1, HashKey_5_k(arg1) + vmovdqu \T1, HashKey_5_k(arg2) GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^6<<1 mod poly - vmovdqa \T5, HashKey_6(arg1) + vmovdqu \T5, HashKey_6(arg2) vpshufd $0b01001110, \T5, \T1 vpxor \T5, \T1, \T1 - vmovdqa \T1, HashKey_6_k(arg1) + vmovdqu \T1, HashKey_6_k(arg2) GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^7<<1 mod poly - vmovdqa \T5, HashKey_7(arg1) + vmovdqu \T5, HashKey_7(arg2) vpshufd $0b01001110, \T5, \T1 vpxor \T5, \T1, \T1 - vmovdqa \T1, HashKey_7_k(arg1) + vmovdqu \T1, HashKey_7_k(arg2) GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^8<<1 mod poly - vmovdqa \T5, HashKey_8(arg1) + vmovdqu \T5, HashKey_8(arg2) vpshufd $0b01001110, \T5, \T1 vpxor \T5, \T1, \T1 - vmovdqa \T1, HashKey_8_k(arg1) + vmovdqu \T1, HashKey_8_k(arg2) .endm @@ -392,84 +998,15 @@ VARIABLE_OFFSET = 16*8 ## num_initial_blocks = b mod 4# ## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext ## r10, r11, r12, rax are clobbered -## arg1, arg2, arg3, r14 are used as a pointer only, not modified +## arg1, arg3, arg4, r14 are used as a pointer only, not modified -.macro INITIAL_BLOCKS_AVX num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC +.macro INITIAL_BLOCKS_AVX REP num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC i = (8-\num_initial_blocks) - j = 0 setreg - - mov arg6, %r10 # r10 = AAD - mov arg7, %r12 # r12 = aadLen - - - mov %r12, %r11 - - vpxor reg_j, reg_j, reg_j - vpxor reg_i, reg_i, reg_i - cmp $16, %r11 - jl _get_AAD_rest8\@ -_get_AAD_blocks\@: - vmovdqu (%r10), reg_i - vpshufb SHUF_MASK(%rip), reg_i, reg_i - vpxor reg_i, reg_j, reg_j - GHASH_MUL_AVX reg_j, \T2, \T1, \T3, \T4, \T5, \T6 - add $16, %r10 - sub $16, %r12 - sub $16, %r11 - cmp $16, %r11 - jge _get_AAD_blocks\@ - vmovdqu reg_j, reg_i - cmp $0, %r11 - je _get_AAD_done\@ - - vpxor reg_i, reg_i, reg_i - - /* read the last <16B of AAD. since we have at least 4B of - data right after the AAD (the ICV, and maybe some CT), we can - read 4B/8B blocks safely, and then get rid of the extra stuff */ -_get_AAD_rest8\@: - cmp $4, %r11 - jle _get_AAD_rest4\@ - movq (%r10), \T1 - add $8, %r10 - sub $8, %r11 - vpslldq $8, \T1, \T1 - vpsrldq $8, reg_i, reg_i - vpxor \T1, reg_i, reg_i - jmp _get_AAD_rest8\@ -_get_AAD_rest4\@: - cmp $0, %r11 - jle _get_AAD_rest0\@ - mov (%r10), %eax - movq %rax, \T1 - add $4, %r10 - sub $4, %r11 - vpslldq $12, \T1, \T1 - vpsrldq $4, reg_i, reg_i - vpxor \T1, reg_i, reg_i -_get_AAD_rest0\@: - /* finalize: shift out the extra bytes we read, and align - left. since pslldq can only shift by an immediate, we use - vpshufb and an array of shuffle masks */ - movq %r12, %r11 - salq $4, %r11 - movdqu aad_shift_arr(%r11), \T1 - vpshufb \T1, reg_i, reg_i -_get_AAD_rest_final\@: - vpshufb SHUF_MASK(%rip), reg_i, reg_i - vpxor reg_j, reg_i, reg_i - GHASH_MUL_AVX reg_i, \T2, \T1, \T3, \T4, \T5, \T6 - -_get_AAD_done\@: - # initialize the data pointer offset as zero - xor %r11d, %r11d + vmovdqu AadHash(arg2), reg_i # start AES for num_initial_blocks blocks - mov arg5, %rax # rax = *Y0 - vmovdqu (%rax), \CTR # CTR = Y0 - vpshufb SHUF_MASK(%rip), \CTR, \CTR - + vmovdqu CurCount(arg2), \CTR i = (9-\num_initial_blocks) setreg @@ -490,10 +1027,10 @@ _get_AAD_done\@: setreg .endr - j = 1 - setreg -.rep 9 - vmovdqa 16*j(arg1), \T_key + j = 1 + setreg +.rep \REP + vmovdqa 16*j(arg1), \T_key i = (9-\num_initial_blocks) setreg .rep \num_initial_blocks @@ -502,12 +1039,11 @@ _get_AAD_done\@: setreg .endr - j = (j+1) - setreg + j = (j+1) + setreg .endr - - vmovdqa 16*10(arg1), \T_key + vmovdqa 16*j(arg1), \T_key i = (9-\num_initial_blocks) setreg .rep \num_initial_blocks @@ -519,9 +1055,9 @@ _get_AAD_done\@: i = (9-\num_initial_blocks) setreg .rep \num_initial_blocks - vmovdqu (arg3, %r11), \T1 + vmovdqu (arg4, %r11), \T1 vpxor \T1, reg_i, reg_i - vmovdqu reg_i, (arg2 , %r11) # write back ciphertext for num_initial_blocks blocks + vmovdqu reg_i, (arg3 , %r11) # write back ciphertext for num_initial_blocks blocks add $16, %r11 .if \ENC_DEC == DEC vmovdqa \T1, reg_i @@ -595,9 +1131,9 @@ _get_AAD_done\@: vpxor \T_key, \XMM7, \XMM7 vpxor \T_key, \XMM8, \XMM8 - i = 1 - setreg -.rep 9 # do 9 rounds + i = 1 + setreg +.rep \REP # do REP rounds vmovdqa 16*i(arg1), \T_key vaesenc \T_key, \XMM1, \XMM1 vaesenc \T_key, \XMM2, \XMM2 @@ -607,11 +1143,10 @@ _get_AAD_done\@: vaesenc \T_key, \XMM6, \XMM6 vaesenc \T_key, \XMM7, \XMM7 vaesenc \T_key, \XMM8, \XMM8 - i = (i+1) - setreg + i = (i+1) + setreg .endr - vmovdqa 16*i(arg1), \T_key vaesenclast \T_key, \XMM1, \XMM1 vaesenclast \T_key, \XMM2, \XMM2 @@ -622,58 +1157,58 @@ _get_AAD_done\@: vaesenclast \T_key, \XMM7, \XMM7 vaesenclast \T_key, \XMM8, \XMM8 - vmovdqu (arg3, %r11), \T1 + vmovdqu (arg4, %r11), \T1 vpxor \T1, \XMM1, \XMM1 - vmovdqu \XMM1, (arg2 , %r11) + vmovdqu \XMM1, (arg3 , %r11) .if \ENC_DEC == DEC vmovdqa \T1, \XMM1 .endif - vmovdqu 16*1(arg3, %r11), \T1 + vmovdqu 16*1(arg4, %r11), \T1 vpxor \T1, \XMM2, \XMM2 - vmovdqu \XMM2, 16*1(arg2 , %r11) + vmovdqu \XMM2, 16*1(arg3 , %r11) .if \ENC_DEC == DEC vmovdqa \T1, \XMM2 .endif - vmovdqu 16*2(arg3, %r11), \T1 + vmovdqu 16*2(arg4, %r11), \T1 vpxor \T1, \XMM3, \XMM3 - vmovdqu \XMM3, 16*2(arg2 , %r11) + vmovdqu \XMM3, 16*2(arg3 , %r11) .if \ENC_DEC == DEC vmovdqa \T1, \XMM3 .endif - vmovdqu 16*3(arg3, %r11), \T1 + vmovdqu 16*3(arg4, %r11), \T1 vpxor \T1, \XMM4, \XMM4 - vmovdqu \XMM4, 16*3(arg2 , %r11) + vmovdqu \XMM4, 16*3(arg3 , %r11) .if \ENC_DEC == DEC vmovdqa \T1, \XMM4 .endif - vmovdqu 16*4(arg3, %r11), \T1 + vmovdqu 16*4(arg4, %r11), \T1 vpxor \T1, \XMM5, \XMM5 - vmovdqu \XMM5, 16*4(arg2 , %r11) + vmovdqu \XMM5, 16*4(arg3 , %r11) .if \ENC_DEC == DEC vmovdqa \T1, \XMM5 .endif - vmovdqu 16*5(arg3, %r11), \T1 + vmovdqu 16*5(arg4, %r11), \T1 vpxor \T1, \XMM6, \XMM6 - vmovdqu \XMM6, 16*5(arg2 , %r11) + vmovdqu \XMM6, 16*5(arg3 , %r11) .if \ENC_DEC == DEC vmovdqa \T1, \XMM6 .endif - vmovdqu 16*6(arg3, %r11), \T1 + vmovdqu 16*6(arg4, %r11), \T1 vpxor \T1, \XMM7, \XMM7 - vmovdqu \XMM7, 16*6(arg2 , %r11) + vmovdqu \XMM7, 16*6(arg3 , %r11) .if \ENC_DEC == DEC vmovdqa \T1, \XMM7 .endif - vmovdqu 16*7(arg3, %r11), \T1 + vmovdqu 16*7(arg4, %r11), \T1 vpxor \T1, \XMM8, \XMM8 - vmovdqu \XMM8, 16*7(arg2 , %r11) + vmovdqu \XMM8, 16*7(arg3 , %r11) .if \ENC_DEC == DEC vmovdqa \T1, \XMM8 .endif @@ -698,9 +1233,9 @@ _initial_blocks_done\@: # encrypt 8 blocks at a time # ghash the 8 previously encrypted ciphertext blocks -# arg1, arg2, arg3 are used as pointers only, not modified +# arg1, arg3, arg4 are used as pointers only, not modified # r11 is the data offset value -.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC +.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX REP T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC vmovdqa \XMM1, \T2 vmovdqa \XMM2, TMP2(%rsp) @@ -784,14 +1319,14 @@ _initial_blocks_done\@: ####################################################################### - vmovdqa HashKey_8(arg1), \T5 + vmovdqu HashKey_8(arg2), \T5 vpclmulqdq $0x11, \T5, \T2, \T4 # T4 = a1*b1 vpclmulqdq $0x00, \T5, \T2, \T7 # T7 = a0*b0 vpshufd $0b01001110, \T2, \T6 vpxor \T2, \T6, \T6 - vmovdqa HashKey_8_k(arg1), \T5 + vmovdqu HashKey_8_k(arg2), \T5 vpclmulqdq $0x00, \T5, \T6, \T6 vmovdqu 16*3(arg1), \T1 @@ -805,7 +1340,7 @@ _initial_blocks_done\@: vaesenc \T1, \XMM8, \XMM8 vmovdqa TMP2(%rsp), \T1 - vmovdqa HashKey_7(arg1), \T5 + vmovdqu HashKey_7(arg2), \T5 vpclmulqdq $0x11, \T5, \T1, \T3 vpxor \T3, \T4, \T4 vpclmulqdq $0x00, \T5, \T1, \T3 @@ -813,7 +1348,7 @@ _initial_blocks_done\@: vpshufd $0b01001110, \T1, \T3 vpxor \T1, \T3, \T3 - vmovdqa HashKey_7_k(arg1), \T5 + vmovdqu HashKey_7_k(arg2), \T5 vpclmulqdq $0x10, \T5, \T3, \T3 vpxor \T3, \T6, \T6 @@ -830,7 +1365,7 @@ _initial_blocks_done\@: ####################################################################### vmovdqa TMP3(%rsp), \T1 - vmovdqa HashKey_6(arg1), \T5 + vmovdqu HashKey_6(arg2), \T5 vpclmulqdq $0x11, \T5, \T1, \T3 vpxor \T3, \T4, \T4 vpclmulqdq $0x00, \T5, \T1, \T3 @@ -838,7 +1373,7 @@ _initial_blocks_done\@: vpshufd $0b01001110, \T1, \T3 vpxor \T1, \T3, \T3 - vmovdqa HashKey_6_k(arg1), \T5 + vmovdqu HashKey_6_k(arg2), \T5 vpclmulqdq $0x10, \T5, \T3, \T3 vpxor \T3, \T6, \T6 @@ -853,7 +1388,7 @@ _initial_blocks_done\@: vaesenc \T1, \XMM8, \XMM8 vmovdqa TMP4(%rsp), \T1 - vmovdqa HashKey_5(arg1), \T5 + vmovdqu HashKey_5(arg2), \T5 vpclmulqdq $0x11, \T5, \T1, \T3 vpxor \T3, \T4, \T4 vpclmulqdq $0x00, \T5, \T1, \T3 @@ -861,7 +1396,7 @@ _initial_blocks_done\@: vpshufd $0b01001110, \T1, \T3 vpxor \T1, \T3, \T3 - vmovdqa HashKey_5_k(arg1), \T5 + vmovdqu HashKey_5_k(arg2), \T5 vpclmulqdq $0x10, \T5, \T3, \T3 vpxor \T3, \T6, \T6 @@ -877,7 +1412,7 @@ _initial_blocks_done\@: vmovdqa TMP5(%rsp), \T1 - vmovdqa HashKey_4(arg1), \T5 + vmovdqu HashKey_4(arg2), \T5 vpclmulqdq $0x11, \T5, \T1, \T3 vpxor \T3, \T4, \T4 vpclmulqdq $0x00, \T5, \T1, \T3 @@ -885,7 +1420,7 @@ _initial_blocks_done\@: vpshufd $0b01001110, \T1, \T3 vpxor \T1, \T3, \T3 - vmovdqa HashKey_4_k(arg1), \T5 + vmovdqu HashKey_4_k(arg2), \T5 vpclmulqdq $0x10, \T5, \T3, \T3 vpxor \T3, \T6, \T6 @@ -900,7 +1435,7 @@ _initial_blocks_done\@: vaesenc \T1, \XMM8, \XMM8 vmovdqa TMP6(%rsp), \T1 - vmovdqa HashKey_3(arg1), \T5 + vmovdqu HashKey_3(arg2), \T5 vpclmulqdq $0x11, \T5, \T1, \T3 vpxor \T3, \T4, \T4 vpclmulqdq $0x00, \T5, \T1, \T3 @@ -908,7 +1443,7 @@ _initial_blocks_done\@: vpshufd $0b01001110, \T1, \T3 vpxor \T1, \T3, \T3 - vmovdqa HashKey_3_k(arg1), \T5 + vmovdqu HashKey_3_k(arg2), \T5 vpclmulqdq $0x10, \T5, \T3, \T3 vpxor \T3, \T6, \T6 @@ -924,7 +1459,7 @@ _initial_blocks_done\@: vaesenc \T1, \XMM8, \XMM8 vmovdqa TMP7(%rsp), \T1 - vmovdqa HashKey_2(arg1), \T5 + vmovdqu HashKey_2(arg2), \T5 vpclmulqdq $0x11, \T5, \T1, \T3 vpxor \T3, \T4, \T4 vpclmulqdq $0x00, \T5, \T1, \T3 @@ -932,7 +1467,7 @@ _initial_blocks_done\@: vpshufd $0b01001110, \T1, \T3 vpxor \T1, \T3, \T3 - vmovdqa HashKey_2_k(arg1), \T5 + vmovdqu HashKey_2_k(arg2), \T5 vpclmulqdq $0x10, \T5, \T3, \T3 vpxor \T3, \T6, \T6 @@ -949,7 +1484,7 @@ _initial_blocks_done\@: vaesenc \T5, \XMM8, \XMM8 vmovdqa TMP8(%rsp), \T1 - vmovdqa HashKey(arg1), \T5 + vmovdqu HashKey(arg2), \T5 vpclmulqdq $0x11, \T5, \T1, \T3 vpxor \T3, \T4, \T4 vpclmulqdq $0x00, \T5, \T1, \T3 @@ -957,7 +1492,7 @@ _initial_blocks_done\@: vpshufd $0b01001110, \T1, \T3 vpxor \T1, \T3, \T3 - vmovdqa HashKey_k(arg1), \T5 + vmovdqu HashKey_k(arg2), \T5 vpclmulqdq $0x10, \T5, \T3, \T3 vpxor \T3, \T6, \T6 @@ -966,17 +1501,35 @@ _initial_blocks_done\@: vmovdqu 16*10(arg1), \T5 + i = 11 + setreg +.rep (\REP-9) + + vaesenc \T5, \XMM1, \XMM1 + vaesenc \T5, \XMM2, \XMM2 + vaesenc \T5, \XMM3, \XMM3 + vaesenc \T5, \XMM4, \XMM4 + vaesenc \T5, \XMM5, \XMM5 + vaesenc \T5, \XMM6, \XMM6 + vaesenc \T5, \XMM7, \XMM7 + vaesenc \T5, \XMM8, \XMM8 + + vmovdqu 16*i(arg1), \T5 + i = i + 1 + setreg +.endr + i = 0 j = 1 setreg .rep 8 - vpxor 16*i(arg3, %r11), \T5, \T2 + vpxor 16*i(arg4, %r11), \T5, \T2 .if \ENC_DEC == ENC vaesenclast \T2, reg_j, reg_j .else vaesenclast \T2, reg_j, \T3 - vmovdqu 16*i(arg3, %r11), reg_j - vmovdqu \T3, 16*i(arg2, %r11) + vmovdqu 16*i(arg4, %r11), reg_j + vmovdqu \T3, 16*i(arg3, %r11) .endif i = (i+1) j = (j+1) @@ -1008,14 +1561,14 @@ _initial_blocks_done\@: vpxor \T2, \T7, \T7 # first phase of the reduction complete ####################################################################### .if \ENC_DEC == ENC - vmovdqu \XMM1, 16*0(arg2,%r11) # Write to the Ciphertext buffer - vmovdqu \XMM2, 16*1(arg2,%r11) # Write to the Ciphertext buffer - vmovdqu \XMM3, 16*2(arg2,%r11) # Write to the Ciphertext buffer - vmovdqu \XMM4, 16*3(arg2,%r11) # Write to the Ciphertext buffer - vmovdqu \XMM5, 16*4(arg2,%r11) # Write to the Ciphertext buffer - vmovdqu \XMM6, 16*5(arg2,%r11) # Write to the Ciphertext buffer - vmovdqu \XMM7, 16*6(arg2,%r11) # Write to the Ciphertext buffer - vmovdqu \XMM8, 16*7(arg2,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM1, 16*0(arg3,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM2, 16*1(arg3,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM3, 16*2(arg3,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM4, 16*3(arg3,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM5, 16*4(arg3,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM6, 16*5(arg3,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM7, 16*6(arg3,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM8, 16*7(arg3,%r11) # Write to the Ciphertext buffer .endif ####################################################################### @@ -1056,25 +1609,25 @@ _initial_blocks_done\@: vpshufd $0b01001110, \XMM1, \T2 vpxor \XMM1, \T2, \T2 - vmovdqa HashKey_8(arg1), \T5 + vmovdqu HashKey_8(arg2), \T5 vpclmulqdq $0x11, \T5, \XMM1, \T6 vpclmulqdq $0x00, \T5, \XMM1, \T7 - vmovdqa HashKey_8_k(arg1), \T3 + vmovdqu HashKey_8_k(arg2), \T3 vpclmulqdq $0x00, \T3, \T2, \XMM1 ###################### vpshufd $0b01001110, \XMM2, \T2 vpxor \XMM2, \T2, \T2 - vmovdqa HashKey_7(arg1), \T5 + vmovdqu HashKey_7(arg2), \T5 vpclmulqdq $0x11, \T5, \XMM2, \T4 vpxor \T4, \T6, \T6 vpclmulqdq $0x00, \T5, \XMM2, \T4 vpxor \T4, \T7, \T7 - vmovdqa HashKey_7_k(arg1), \T3 + vmovdqu HashKey_7_k(arg2), \T3 vpclmulqdq $0x00, \T3, \T2, \T2 vpxor \T2, \XMM1, \XMM1 @@ -1082,14 +1635,14 @@ _initial_blocks_done\@: vpshufd $0b01001110, \XMM3, \T2 vpxor \XMM3, \T2, \T2 - vmovdqa HashKey_6(arg1), \T5 + vmovdqu HashKey_6(arg2), \T5 vpclmulqdq $0x11, \T5, \XMM3, \T4 vpxor \T4, \T6, \T6 vpclmulqdq $0x00, \T5, \XMM3, \T4 vpxor \T4, \T7, \T7 - vmovdqa HashKey_6_k(arg1), \T3 + vmovdqu HashKey_6_k(arg2), \T3 vpclmulqdq $0x00, \T3, \T2, \T2 vpxor \T2, \XMM1, \XMM1 @@ -1097,14 +1650,14 @@ _initial_blocks_done\@: vpshufd $0b01001110, \XMM4, \T2 vpxor \XMM4, \T2, \T2 - vmovdqa HashKey_5(arg1), \T5 + vmovdqu HashKey_5(arg2), \T5 vpclmulqdq $0x11, \T5, \XMM4, \T4 vpxor \T4, \T6, \T6 vpclmulqdq $0x00, \T5, \XMM4, \T4 vpxor \T4, \T7, \T7 - vmovdqa HashKey_5_k(arg1), \T3 + vmovdqu HashKey_5_k(arg2), \T3 vpclmulqdq $0x00, \T3, \T2, \T2 vpxor \T2, \XMM1, \XMM1 @@ -1112,14 +1665,14 @@ _initial_blocks_done\@: vpshufd $0b01001110, \XMM5, \T2 vpxor \XMM5, \T2, \T2 - vmovdqa HashKey_4(arg1), \T5 + vmovdqu HashKey_4(arg2), \T5 vpclmulqdq $0x11, \T5, \XMM5, \T4 vpxor \T4, \T6, \T6 vpclmulqdq $0x00, \T5, \XMM5, \T4 vpxor \T4, \T7, \T7 - vmovdqa HashKey_4_k(arg1), \T3 + vmovdqu HashKey_4_k(arg2), \T3 vpclmulqdq $0x00, \T3, \T2, \T2 vpxor \T2, \XMM1, \XMM1 @@ -1127,14 +1680,14 @@ _initial_blocks_done\@: vpshufd $0b01001110, \XMM6, \T2 vpxor \XMM6, \T2, \T2 - vmovdqa HashKey_3(arg1), \T5 + vmovdqu HashKey_3(arg2), \T5 vpclmulqdq $0x11, \T5, \XMM6, \T4 vpxor \T4, \T6, \T6 vpclmulqdq $0x00, \T5, \XMM6, \T4 vpxor \T4, \T7, \T7 - vmovdqa HashKey_3_k(arg1), \T3 + vmovdqu HashKey_3_k(arg2), \T3 vpclmulqdq $0x00, \T3, \T2, \T2 vpxor \T2, \XMM1, \XMM1 @@ -1142,14 +1695,14 @@ _initial_blocks_done\@: vpshufd $0b01001110, \XMM7, \T2 vpxor \XMM7, \T2, \T2 - vmovdqa HashKey_2(arg1), \T5 + vmovdqu HashKey_2(arg2), \T5 vpclmulqdq $0x11, \T5, \XMM7, \T4 vpxor \T4, \T6, \T6 vpclmulqdq $0x00, \T5, \XMM7, \T4 vpxor \T4, \T7, \T7 - vmovdqa HashKey_2_k(arg1), \T3 + vmovdqu HashKey_2_k(arg2), \T3 vpclmulqdq $0x00, \T3, \T2, \T2 vpxor \T2, \XMM1, \XMM1 @@ -1157,14 +1710,14 @@ _initial_blocks_done\@: vpshufd $0b01001110, \XMM8, \T2 vpxor \XMM8, \T2, \T2 - vmovdqa HashKey(arg1), \T5 + vmovdqu HashKey(arg2), \T5 vpclmulqdq $0x11, \T5, \XMM8, \T4 vpxor \T4, \T6, \T6 vpclmulqdq $0x00, \T5, \XMM8, \T4 vpxor \T4, \T7, \T7 - vmovdqa HashKey_k(arg1), \T3 + vmovdqu HashKey_k(arg2), \T3 vpclmulqdq $0x00, \T3, \T2, \T2 vpxor \T2, \XMM1, \XMM1 @@ -1210,413 +1763,112 @@ _initial_blocks_done\@: .endm - -# combined for GCM encrypt and decrypt functions -# clobbering all xmm registers -# clobbering r10, r11, r12, r13, r14, r15 -.macro GCM_ENC_DEC_AVX ENC_DEC - - #the number of pushes must equal STACK_OFFSET - push %r12 - push %r13 - push %r14 - push %r15 - - mov %rsp, %r14 - - - - - sub $VARIABLE_OFFSET, %rsp - and $~63, %rsp # align rsp to 64 bytes - - - vmovdqu HashKey(arg1), %xmm13 # xmm13 = HashKey - - mov arg4, %r13 # save the number of bytes of plaintext/ciphertext - and $-16, %r13 # r13 = r13 - (r13 mod 16) - - mov %r13, %r12 - shr $4, %r12 - and $7, %r12 - jz _initial_num_blocks_is_0\@ - - cmp $7, %r12 - je _initial_num_blocks_is_7\@ - cmp $6, %r12 - je _initial_num_blocks_is_6\@ - cmp $5, %r12 - je _initial_num_blocks_is_5\@ - cmp $4, %r12 - je _initial_num_blocks_is_4\@ - cmp $3, %r12 - je _initial_num_blocks_is_3\@ - cmp $2, %r12 - je _initial_num_blocks_is_2\@ - - jmp _initial_num_blocks_is_1\@ - -_initial_num_blocks_is_7\@: - INITIAL_BLOCKS_AVX 7, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC - sub $16*7, %r13 - jmp _initial_blocks_encrypted\@ - -_initial_num_blocks_is_6\@: - INITIAL_BLOCKS_AVX 6, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC - sub $16*6, %r13 - jmp _initial_blocks_encrypted\@ - -_initial_num_blocks_is_5\@: - INITIAL_BLOCKS_AVX 5, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC - sub $16*5, %r13 - jmp _initial_blocks_encrypted\@ - -_initial_num_blocks_is_4\@: - INITIAL_BLOCKS_AVX 4, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC - sub $16*4, %r13 - jmp _initial_blocks_encrypted\@ - -_initial_num_blocks_is_3\@: - INITIAL_BLOCKS_AVX 3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC - sub $16*3, %r13 - jmp _initial_blocks_encrypted\@ - -_initial_num_blocks_is_2\@: - INITIAL_BLOCKS_AVX 2, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC - sub $16*2, %r13 - jmp _initial_blocks_encrypted\@ - -_initial_num_blocks_is_1\@: - INITIAL_BLOCKS_AVX 1, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC - sub $16*1, %r13 - jmp _initial_blocks_encrypted\@ - -_initial_num_blocks_is_0\@: - INITIAL_BLOCKS_AVX 0, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC - - -_initial_blocks_encrypted\@: - cmp $0, %r13 - je _zero_cipher_left\@ - - sub $128, %r13 - je _eight_cipher_left\@ - - - - - vmovd %xmm9, %r15d - and $255, %r15d - vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 - - -_encrypt_by_8_new\@: - cmp $(255-8), %r15d - jg _encrypt_by_8\@ - - - - add $8, %r15b - GHASH_8_ENCRYPT_8_PARALLEL_AVX %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, out_order, \ENC_DEC - add $128, %r11 - sub $128, %r13 - jne _encrypt_by_8_new\@ - - vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 - jmp _eight_cipher_left\@ - -_encrypt_by_8\@: - vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 - add $8, %r15b - GHASH_8_ENCRYPT_8_PARALLEL_AVX %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, in_order, \ENC_DEC - vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 - add $128, %r11 - sub $128, %r13 - jne _encrypt_by_8_new\@ - - vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 - - - - -_eight_cipher_left\@: - GHASH_LAST_8_AVX %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8 - - -_zero_cipher_left\@: - cmp $16, arg4 - jl _only_less_than_16\@ - - mov arg4, %r13 - and $15, %r13 # r13 = (arg4 mod 16) - - je _multiple_of_16_bytes\@ - - # handle the last <16 Byte block seperately - - - vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn - vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 - ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn) - - sub $16, %r11 - add %r13, %r11 - vmovdqu (arg3, %r11), %xmm1 # receive the last <16 Byte block - - lea SHIFT_MASK+16(%rip), %r12 - sub %r13, %r12 # adjust the shuffle mask pointer to be - # able to shift 16-r13 bytes (r13 is the - # number of bytes in plaintext mod 16) - vmovdqu (%r12), %xmm2 # get the appropriate shuffle mask - vpshufb %xmm2, %xmm1, %xmm1 # shift right 16-r13 bytes - jmp _final_ghash_mul\@ - -_only_less_than_16\@: - # check for 0 length - mov arg4, %r13 - and $15, %r13 # r13 = (arg4 mod 16) - - je _multiple_of_16_bytes\@ - - # handle the last <16 Byte block seperately - - - vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn - vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 - ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn) - - - lea SHIFT_MASK+16(%rip), %r12 - sub %r13, %r12 # adjust the shuffle mask pointer to be - # able to shift 16-r13 bytes (r13 is the - # number of bytes in plaintext mod 16) - -_get_last_16_byte_loop\@: - movb (arg3, %r11), %al - movb %al, TMP1 (%rsp , %r11) - add $1, %r11 - cmp %r13, %r11 - jne _get_last_16_byte_loop\@ - - vmovdqu TMP1(%rsp), %xmm1 - - sub $16, %r11 - -_final_ghash_mul\@: - .if \ENC_DEC == DEC - vmovdqa %xmm1, %xmm2 - vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) - vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to - # mask out top 16-r13 bytes of xmm9 - vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 - vpand %xmm1, %xmm2, %xmm2 - vpshufb SHUF_MASK(%rip), %xmm2, %xmm2 - vpxor %xmm2, %xmm14, %xmm14 - #GHASH computation for the last <16 Byte block - GHASH_MUL_AVX %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 - sub %r13, %r11 - add $16, %r11 - .else - vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) - vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to - # mask out top 16-r13 bytes of xmm9 - vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 - vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 - vpxor %xmm9, %xmm14, %xmm14 - #GHASH computation for the last <16 Byte block - GHASH_MUL_AVX %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 - sub %r13, %r11 - add $16, %r11 - vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 # shuffle xmm9 back to output as ciphertext - .endif - - - ############################# - # output r13 Bytes - vmovq %xmm9, %rax - cmp $8, %r13 - jle _less_than_8_bytes_left\@ - - mov %rax, (arg2 , %r11) - add $8, %r11 - vpsrldq $8, %xmm9, %xmm9 - vmovq %xmm9, %rax - sub $8, %r13 - -_less_than_8_bytes_left\@: - movb %al, (arg2 , %r11) - add $1, %r11 - shr $8, %rax - sub $1, %r13 - jne _less_than_8_bytes_left\@ - ############################# - -_multiple_of_16_bytes\@: - mov arg7, %r12 # r12 = aadLen (number of bytes) - shl $3, %r12 # convert into number of bits - vmovd %r12d, %xmm15 # len(A) in xmm15 - - shl $3, arg4 # len(C) in bits (*128) - vmovq arg4, %xmm1 - vpslldq $8, %xmm15, %xmm15 # xmm15 = len(A)|| 0x0000000000000000 - vpxor %xmm1, %xmm15, %xmm15 # xmm15 = len(A)||len(C) - - vpxor %xmm15, %xmm14, %xmm14 - GHASH_MUL_AVX %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 # final GHASH computation - vpshufb SHUF_MASK(%rip), %xmm14, %xmm14 # perform a 16Byte swap - - mov arg5, %rax # rax = *Y0 - vmovdqu (%rax), %xmm9 # xmm9 = Y0 - - ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Y0) - - vpxor %xmm14, %xmm9, %xmm9 - - - -_return_T\@: - mov arg8, %r10 # r10 = authTag - mov arg9, %r11 # r11 = auth_tag_len - - cmp $16, %r11 - je _T_16\@ - - cmp $8, %r11 - jl _T_4\@ - -_T_8\@: - vmovq %xmm9, %rax - mov %rax, (%r10) - add $8, %r10 - sub $8, %r11 - vpsrldq $8, %xmm9, %xmm9 - cmp $0, %r11 - je _return_T_done\@ -_T_4\@: - vmovd %xmm9, %eax - mov %eax, (%r10) - add $4, %r10 - sub $4, %r11 - vpsrldq $4, %xmm9, %xmm9 - cmp $0, %r11 - je _return_T_done\@ -_T_123\@: - vmovd %xmm9, %eax - cmp $2, %r11 - jl _T_1\@ - mov %ax, (%r10) - cmp $2, %r11 - je _return_T_done\@ - add $2, %r10 - sar $16, %eax -_T_1\@: - mov %al, (%r10) - jmp _return_T_done\@ - -_T_16\@: - vmovdqu %xmm9, (%r10) - -_return_T_done\@: - mov %r14, %rsp - - pop %r15 - pop %r14 - pop %r13 - pop %r12 -.endm - - ############################################################# #void aesni_gcm_precomp_avx_gen2 # (gcm_data *my_ctx_data, -# u8 *hash_subkey)# /* H, the Hash sub key input. Data starts on a 16-byte boundary. */ +# gcm_context_data *data, +# u8 *hash_subkey# /* H, the Hash sub key input. Data starts on a 16-byte boundary. */ +# u8 *iv, /* Pre-counter block j0: 4 byte salt +# (from Security Association) concatenated with 8 byte +# Initialisation Vector (from IPSec ESP Payload) +# concatenated with 0x00000001. 16-byte aligned pointer. */ +# const u8 *aad, /* Additional Authentication Data (AAD)*/ +# u64 aad_len) /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ ############################################################# -ENTRY(aesni_gcm_precomp_avx_gen2) - #the number of pushes must equal STACK_OFFSET - push %r12 - push %r13 - push %r14 - push %r15 - - mov %rsp, %r14 - - - - sub $VARIABLE_OFFSET, %rsp - and $~63, %rsp # align rsp to 64 bytes - - vmovdqu (arg2), %xmm6 # xmm6 = HashKey - - vpshufb SHUF_MASK(%rip), %xmm6, %xmm6 - ############### PRECOMPUTATION of HashKey<<1 mod poly from the HashKey - vmovdqa %xmm6, %xmm2 - vpsllq $1, %xmm6, %xmm6 - vpsrlq $63, %xmm2, %xmm2 - vmovdqa %xmm2, %xmm1 - vpslldq $8, %xmm2, %xmm2 - vpsrldq $8, %xmm1, %xmm1 - vpor %xmm2, %xmm6, %xmm6 - #reduction - vpshufd $0b00100100, %xmm1, %xmm2 - vpcmpeqd TWOONE(%rip), %xmm2, %xmm2 - vpand POLY(%rip), %xmm2, %xmm2 - vpxor %xmm2, %xmm6, %xmm6 # xmm6 holds the HashKey<<1 mod poly - ####################################################################### - vmovdqa %xmm6, HashKey(arg1) # store HashKey<<1 mod poly - - - PRECOMPUTE_AVX %xmm6, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5 - - mov %r14, %rsp - - pop %r15 - pop %r14 - pop %r13 - pop %r12 +ENTRY(aesni_gcm_init_avx_gen2) + FUNC_SAVE + INIT GHASH_MUL_AVX, PRECOMPUTE_AVX + FUNC_RESTORE ret -ENDPROC(aesni_gcm_precomp_avx_gen2) +ENDPROC(aesni_gcm_init_avx_gen2) ############################################################################### -#void aesni_gcm_enc_avx_gen2( +#void aesni_gcm_enc_update_avx_gen2( # gcm_data *my_ctx_data, /* aligned to 16 Bytes */ +# gcm_context_data *data, # u8 *out, /* Ciphertext output. Encrypt in-place is allowed. */ # const u8 *in, /* Plaintext input */ -# u64 plaintext_len, /* Length of data in Bytes for encryption. */ -# u8 *iv, /* Pre-counter block j0: 4 byte salt -# (from Security Association) concatenated with 8 byte -# Initialisation Vector (from IPSec ESP Payload) -# concatenated with 0x00000001. 16-byte aligned pointer. */ -# const u8 *aad, /* Additional Authentication Data (AAD)*/ -# u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ -# u8 *auth_tag, /* Authenticated Tag output. */ -# u64 auth_tag_len)# /* Authenticated Tag Length in bytes. -# Valid values are 16 (most likely), 12 or 8. */ +# u64 plaintext_len) /* Length of data in Bytes for encryption. */ ############################################################################### -ENTRY(aesni_gcm_enc_avx_gen2) - GCM_ENC_DEC_AVX ENC - ret -ENDPROC(aesni_gcm_enc_avx_gen2) +ENTRY(aesni_gcm_enc_update_avx_gen2) + FUNC_SAVE + mov keysize, %eax + cmp $32, %eax + je key_256_enc_update + cmp $16, %eax + je key_128_enc_update + # must be 192 + GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 11 + FUNC_RESTORE + ret +key_128_enc_update: + GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 9 + FUNC_RESTORE + ret +key_256_enc_update: + GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 13 + FUNC_RESTORE + ret +ENDPROC(aesni_gcm_enc_update_avx_gen2) ############################################################################### -#void aesni_gcm_dec_avx_gen2( +#void aesni_gcm_dec_update_avx_gen2( # gcm_data *my_ctx_data, /* aligned to 16 Bytes */ +# gcm_context_data *data, # u8 *out, /* Plaintext output. Decrypt in-place is allowed. */ # const u8 *in, /* Ciphertext input */ -# u64 plaintext_len, /* Length of data in Bytes for encryption. */ -# u8 *iv, /* Pre-counter block j0: 4 byte salt -# (from Security Association) concatenated with 8 byte -# Initialisation Vector (from IPSec ESP Payload) -# concatenated with 0x00000001. 16-byte aligned pointer. */ -# const u8 *aad, /* Additional Authentication Data (AAD)*/ -# u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ +# u64 plaintext_len) /* Length of data in Bytes for encryption. */ +############################################################################### +ENTRY(aesni_gcm_dec_update_avx_gen2) + FUNC_SAVE + mov keysize,%eax + cmp $32, %eax + je key_256_dec_update + cmp $16, %eax + je key_128_dec_update + # must be 192 + GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 11 + FUNC_RESTORE + ret +key_128_dec_update: + GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 9 + FUNC_RESTORE + ret +key_256_dec_update: + GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 13 + FUNC_RESTORE + ret +ENDPROC(aesni_gcm_dec_update_avx_gen2) + +############################################################################### +#void aesni_gcm_finalize_avx_gen2( +# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ +# gcm_context_data *data, # u8 *auth_tag, /* Authenticated Tag output. */ # u64 auth_tag_len)# /* Authenticated Tag Length in bytes. # Valid values are 16 (most likely), 12 or 8. */ ############################################################################### -ENTRY(aesni_gcm_dec_avx_gen2) - GCM_ENC_DEC_AVX DEC - ret -ENDPROC(aesni_gcm_dec_avx_gen2) +ENTRY(aesni_gcm_finalize_avx_gen2) + FUNC_SAVE + mov keysize,%eax + cmp $32, %eax + je key_256_finalize + cmp $16, %eax + je key_128_finalize + # must be 192 + GCM_COMPLETE GHASH_MUL_AVX, 11, arg3, arg4 + FUNC_RESTORE + ret +key_128_finalize: + GCM_COMPLETE GHASH_MUL_AVX, 9, arg3, arg4 + FUNC_RESTORE + ret +key_256_finalize: + GCM_COMPLETE GHASH_MUL_AVX, 13, arg3, arg4 + FUNC_RESTORE + ret +ENDPROC(aesni_gcm_finalize_avx_gen2) + #endif /* CONFIG_AS_AVX */ #ifdef CONFIG_AS_AVX2 @@ -1670,113 +1922,42 @@ ENDPROC(aesni_gcm_dec_avx_gen2) # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i vmovdqa \HK, \T5 GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^2<<1 mod poly - vmovdqa \T5, HashKey_2(arg1) # [HashKey_2] = HashKey^2<<1 mod poly + vmovdqu \T5, HashKey_2(arg2) # [HashKey_2] = HashKey^2<<1 mod poly GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^3<<1 mod poly - vmovdqa \T5, HashKey_3(arg1) + vmovdqu \T5, HashKey_3(arg2) GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^4<<1 mod poly - vmovdqa \T5, HashKey_4(arg1) + vmovdqu \T5, HashKey_4(arg2) GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^5<<1 mod poly - vmovdqa \T5, HashKey_5(arg1) + vmovdqu \T5, HashKey_5(arg2) GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^6<<1 mod poly - vmovdqa \T5, HashKey_6(arg1) + vmovdqu \T5, HashKey_6(arg2) GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^7<<1 mod poly - vmovdqa \T5, HashKey_7(arg1) + vmovdqu \T5, HashKey_7(arg2) GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^8<<1 mod poly - vmovdqa \T5, HashKey_8(arg1) + vmovdqu \T5, HashKey_8(arg2) .endm - ## if a = number of total plaintext bytes ## b = floor(a/16) ## num_initial_blocks = b mod 4# ## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext ## r10, r11, r12, rax are clobbered -## arg1, arg2, arg3, r14 are used as a pointer only, not modified +## arg1, arg3, arg4, r14 are used as a pointer only, not modified -.macro INITIAL_BLOCKS_AVX2 num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC VER +.macro INITIAL_BLOCKS_AVX2 REP num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC VER i = (8-\num_initial_blocks) - j = 0 setreg - - mov arg6, %r10 # r10 = AAD - mov arg7, %r12 # r12 = aadLen - - - mov %r12, %r11 - - vpxor reg_j, reg_j, reg_j - vpxor reg_i, reg_i, reg_i - - cmp $16, %r11 - jl _get_AAD_rest8\@ -_get_AAD_blocks\@: - vmovdqu (%r10), reg_i - vpshufb SHUF_MASK(%rip), reg_i, reg_i - vpxor reg_i, reg_j, reg_j - GHASH_MUL_AVX2 reg_j, \T2, \T1, \T3, \T4, \T5, \T6 - add $16, %r10 - sub $16, %r12 - sub $16, %r11 - cmp $16, %r11 - jge _get_AAD_blocks\@ - vmovdqu reg_j, reg_i - cmp $0, %r11 - je _get_AAD_done\@ - - vpxor reg_i, reg_i, reg_i - - /* read the last <16B of AAD. since we have at least 4B of - data right after the AAD (the ICV, and maybe some CT), we can - read 4B/8B blocks safely, and then get rid of the extra stuff */ -_get_AAD_rest8\@: - cmp $4, %r11 - jle _get_AAD_rest4\@ - movq (%r10), \T1 - add $8, %r10 - sub $8, %r11 - vpslldq $8, \T1, \T1 - vpsrldq $8, reg_i, reg_i - vpxor \T1, reg_i, reg_i - jmp _get_AAD_rest8\@ -_get_AAD_rest4\@: - cmp $0, %r11 - jle _get_AAD_rest0\@ - mov (%r10), %eax - movq %rax, \T1 - add $4, %r10 - sub $4, %r11 - vpslldq $12, \T1, \T1 - vpsrldq $4, reg_i, reg_i - vpxor \T1, reg_i, reg_i -_get_AAD_rest0\@: - /* finalize: shift out the extra bytes we read, and align - left. since pslldq can only shift by an immediate, we use - vpshufb and an array of shuffle masks */ - movq %r12, %r11 - salq $4, %r11 - movdqu aad_shift_arr(%r11), \T1 - vpshufb \T1, reg_i, reg_i -_get_AAD_rest_final\@: - vpshufb SHUF_MASK(%rip), reg_i, reg_i - vpxor reg_j, reg_i, reg_i - GHASH_MUL_AVX2 reg_i, \T2, \T1, \T3, \T4, \T5, \T6 - -_get_AAD_done\@: - # initialize the data pointer offset as zero - xor %r11d, %r11d + vmovdqu AadHash(arg2), reg_i # start AES for num_initial_blocks blocks - mov arg5, %rax # rax = *Y0 - vmovdqu (%rax), \CTR # CTR = Y0 - vpshufb SHUF_MASK(%rip), \CTR, \CTR - + vmovdqu CurCount(arg2), \CTR i = (9-\num_initial_blocks) setreg @@ -1799,7 +1980,7 @@ _get_AAD_done\@: j = 1 setreg -.rep 9 +.rep \REP vmovdqa 16*j(arg1), \T_key i = (9-\num_initial_blocks) setreg @@ -1814,7 +1995,7 @@ _get_AAD_done\@: .endr - vmovdqa 16*10(arg1), \T_key + vmovdqa 16*j(arg1), \T_key i = (9-\num_initial_blocks) setreg .rep \num_initial_blocks @@ -1826,9 +2007,9 @@ _get_AAD_done\@: i = (9-\num_initial_blocks) setreg .rep \num_initial_blocks - vmovdqu (arg3, %r11), \T1 + vmovdqu (arg4, %r11), \T1 vpxor \T1, reg_i, reg_i - vmovdqu reg_i, (arg2 , %r11) # write back ciphertext for + vmovdqu reg_i, (arg3 , %r11) # write back ciphertext for # num_initial_blocks blocks add $16, %r11 .if \ENC_DEC == DEC @@ -1905,7 +2086,7 @@ _get_AAD_done\@: i = 1 setreg -.rep 9 # do 9 rounds +.rep \REP # do REP rounds vmovdqa 16*i(arg1), \T_key vaesenc \T_key, \XMM1, \XMM1 vaesenc \T_key, \XMM2, \XMM2 @@ -1930,58 +2111,58 @@ _get_AAD_done\@: vaesenclast \T_key, \XMM7, \XMM7 vaesenclast \T_key, \XMM8, \XMM8 - vmovdqu (arg3, %r11), \T1 + vmovdqu (arg4, %r11), \T1 vpxor \T1, \XMM1, \XMM1 - vmovdqu \XMM1, (arg2 , %r11) + vmovdqu \XMM1, (arg3 , %r11) .if \ENC_DEC == DEC vmovdqa \T1, \XMM1 .endif - vmovdqu 16*1(arg3, %r11), \T1 + vmovdqu 16*1(arg4, %r11), \T1 vpxor \T1, \XMM2, \XMM2 - vmovdqu \XMM2, 16*1(arg2 , %r11) + vmovdqu \XMM2, 16*1(arg3 , %r11) .if \ENC_DEC == DEC vmovdqa \T1, \XMM2 .endif - vmovdqu 16*2(arg3, %r11), \T1 + vmovdqu 16*2(arg4, %r11), \T1 vpxor \T1, \XMM3, \XMM3 - vmovdqu \XMM3, 16*2(arg2 , %r11) + vmovdqu \XMM3, 16*2(arg3 , %r11) .if \ENC_DEC == DEC vmovdqa \T1, \XMM3 .endif - vmovdqu 16*3(arg3, %r11), \T1 + vmovdqu 16*3(arg4, %r11), \T1 vpxor \T1, \XMM4, \XMM4 - vmovdqu \XMM4, 16*3(arg2 , %r11) + vmovdqu \XMM4, 16*3(arg3 , %r11) .if \ENC_DEC == DEC vmovdqa \T1, \XMM4 .endif - vmovdqu 16*4(arg3, %r11), \T1 + vmovdqu 16*4(arg4, %r11), \T1 vpxor \T1, \XMM5, \XMM5 - vmovdqu \XMM5, 16*4(arg2 , %r11) + vmovdqu \XMM5, 16*4(arg3 , %r11) .if \ENC_DEC == DEC vmovdqa \T1, \XMM5 .endif - vmovdqu 16*5(arg3, %r11), \T1 + vmovdqu 16*5(arg4, %r11), \T1 vpxor \T1, \XMM6, \XMM6 - vmovdqu \XMM6, 16*5(arg2 , %r11) + vmovdqu \XMM6, 16*5(arg3 , %r11) .if \ENC_DEC == DEC vmovdqa \T1, \XMM6 .endif - vmovdqu 16*6(arg3, %r11), \T1 + vmovdqu 16*6(arg4, %r11), \T1 vpxor \T1, \XMM7, \XMM7 - vmovdqu \XMM7, 16*6(arg2 , %r11) + vmovdqu \XMM7, 16*6(arg3 , %r11) .if \ENC_DEC == DEC vmovdqa \T1, \XMM7 .endif - vmovdqu 16*7(arg3, %r11), \T1 + vmovdqu 16*7(arg4, %r11), \T1 vpxor \T1, \XMM8, \XMM8 - vmovdqu \XMM8, 16*7(arg2 , %r11) + vmovdqu \XMM8, 16*7(arg3 , %r11) .if \ENC_DEC == DEC vmovdqa \T1, \XMM8 .endif @@ -2010,9 +2191,9 @@ _initial_blocks_done\@: # encrypt 8 blocks at a time # ghash the 8 previously encrypted ciphertext blocks -# arg1, arg2, arg3 are used as pointers only, not modified +# arg1, arg3, arg4 are used as pointers only, not modified # r11 is the data offset value -.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX2 T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC +.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX2 REP T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC vmovdqa \XMM1, \T2 vmovdqa \XMM2, TMP2(%rsp) @@ -2096,7 +2277,7 @@ _initial_blocks_done\@: ####################################################################### - vmovdqa HashKey_8(arg1), \T5 + vmovdqu HashKey_8(arg2), \T5 vpclmulqdq $0x11, \T5, \T2, \T4 # T4 = a1*b1 vpclmulqdq $0x00, \T5, \T2, \T7 # T7 = a0*b0 vpclmulqdq $0x01, \T5, \T2, \T6 # T6 = a1*b0 @@ -2114,7 +2295,7 @@ _initial_blocks_done\@: vaesenc \T1, \XMM8, \XMM8 vmovdqa TMP2(%rsp), \T1 - vmovdqa HashKey_7(arg1), \T5 + vmovdqu HashKey_7(arg2), \T5 vpclmulqdq $0x11, \T5, \T1, \T3 vpxor \T3, \T4, \T4 @@ -2140,7 +2321,7 @@ _initial_blocks_done\@: ####################################################################### vmovdqa TMP3(%rsp), \T1 - vmovdqa HashKey_6(arg1), \T5 + vmovdqu HashKey_6(arg2), \T5 vpclmulqdq $0x11, \T5, \T1, \T3 vpxor \T3, \T4, \T4 @@ -2164,7 +2345,7 @@ _initial_blocks_done\@: vaesenc \T1, \XMM8, \XMM8 vmovdqa TMP4(%rsp), \T1 - vmovdqa HashKey_5(arg1), \T5 + vmovdqu HashKey_5(arg2), \T5 vpclmulqdq $0x11, \T5, \T1, \T3 vpxor \T3, \T4, \T4 @@ -2189,7 +2370,7 @@ _initial_blocks_done\@: vmovdqa TMP5(%rsp), \T1 - vmovdqa HashKey_4(arg1), \T5 + vmovdqu HashKey_4(arg2), \T5 vpclmulqdq $0x11, \T5, \T1, \T3 vpxor \T3, \T4, \T4 @@ -2213,7 +2394,7 @@ _initial_blocks_done\@: vaesenc \T1, \XMM8, \XMM8 vmovdqa TMP6(%rsp), \T1 - vmovdqa HashKey_3(arg1), \T5 + vmovdqu HashKey_3(arg2), \T5 vpclmulqdq $0x11, \T5, \T1, \T3 vpxor \T3, \T4, \T4 @@ -2237,7 +2418,7 @@ _initial_blocks_done\@: vaesenc \T1, \XMM8, \XMM8 vmovdqa TMP7(%rsp), \T1 - vmovdqa HashKey_2(arg1), \T5 + vmovdqu HashKey_2(arg2), \T5 vpclmulqdq $0x11, \T5, \T1, \T3 vpxor \T3, \T4, \T4 @@ -2264,7 +2445,7 @@ _initial_blocks_done\@: vaesenc \T5, \XMM8, \XMM8 vmovdqa TMP8(%rsp), \T1 - vmovdqa HashKey(arg1), \T5 + vmovdqu HashKey(arg2), \T5 vpclmulqdq $0x00, \T5, \T1, \T3 vpxor \T3, \T7, \T7 @@ -2281,17 +2462,34 @@ _initial_blocks_done\@: vmovdqu 16*10(arg1), \T5 + i = 11 + setreg +.rep (\REP-9) + vaesenc \T5, \XMM1, \XMM1 + vaesenc \T5, \XMM2, \XMM2 + vaesenc \T5, \XMM3, \XMM3 + vaesenc \T5, \XMM4, \XMM4 + vaesenc \T5, \XMM5, \XMM5 + vaesenc \T5, \XMM6, \XMM6 + vaesenc \T5, \XMM7, \XMM7 + vaesenc \T5, \XMM8, \XMM8 + + vmovdqu 16*i(arg1), \T5 + i = i + 1 + setreg +.endr + i = 0 j = 1 setreg .rep 8 - vpxor 16*i(arg3, %r11), \T5, \T2 + vpxor 16*i(arg4, %r11), \T5, \T2 .if \ENC_DEC == ENC vaesenclast \T2, reg_j, reg_j .else vaesenclast \T2, reg_j, \T3 - vmovdqu 16*i(arg3, %r11), reg_j - vmovdqu \T3, 16*i(arg2, %r11) + vmovdqu 16*i(arg4, %r11), reg_j + vmovdqu \T3, 16*i(arg3, %r11) .endif i = (i+1) j = (j+1) @@ -2317,14 +2515,14 @@ _initial_blocks_done\@: vpxor \T2, \T7, \T7 # first phase of the reduction complete ####################################################################### .if \ENC_DEC == ENC - vmovdqu \XMM1, 16*0(arg2,%r11) # Write to the Ciphertext buffer - vmovdqu \XMM2, 16*1(arg2,%r11) # Write to the Ciphertext buffer - vmovdqu \XMM3, 16*2(arg2,%r11) # Write to the Ciphertext buffer - vmovdqu \XMM4, 16*3(arg2,%r11) # Write to the Ciphertext buffer - vmovdqu \XMM5, 16*4(arg2,%r11) # Write to the Ciphertext buffer - vmovdqu \XMM6, 16*5(arg2,%r11) # Write to the Ciphertext buffer - vmovdqu \XMM7, 16*6(arg2,%r11) # Write to the Ciphertext buffer - vmovdqu \XMM8, 16*7(arg2,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM1, 16*0(arg3,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM2, 16*1(arg3,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM3, 16*2(arg3,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM4, 16*3(arg3,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM5, 16*4(arg3,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM6, 16*5(arg3,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM7, 16*6(arg3,%r11) # Write to the Ciphertext buffer + vmovdqu \XMM8, 16*7(arg3,%r11) # Write to the Ciphertext buffer .endif ####################################################################### @@ -2361,7 +2559,7 @@ _initial_blocks_done\@: ## Karatsuba Method - vmovdqa HashKey_8(arg1), \T5 + vmovdqu HashKey_8(arg2), \T5 vpshufd $0b01001110, \XMM1, \T2 vpshufd $0b01001110, \T5, \T3 @@ -2375,7 +2573,7 @@ _initial_blocks_done\@: ###################### - vmovdqa HashKey_7(arg1), \T5 + vmovdqu HashKey_7(arg2), \T5 vpshufd $0b01001110, \XMM2, \T2 vpshufd $0b01001110, \T5, \T3 vpxor \XMM2, \T2, \T2 @@ -2393,7 +2591,7 @@ _initial_blocks_done\@: ###################### - vmovdqa HashKey_6(arg1), \T5 + vmovdqu HashKey_6(arg2), \T5 vpshufd $0b01001110, \XMM3, \T2 vpshufd $0b01001110, \T5, \T3 vpxor \XMM3, \T2, \T2 @@ -2411,7 +2609,7 @@ _initial_blocks_done\@: ###################### - vmovdqa HashKey_5(arg1), \T5 + vmovdqu HashKey_5(arg2), \T5 vpshufd $0b01001110, \XMM4, \T2 vpshufd $0b01001110, \T5, \T3 vpxor \XMM4, \T2, \T2 @@ -2429,7 +2627,7 @@ _initial_blocks_done\@: ###################### - vmovdqa HashKey_4(arg1), \T5 + vmovdqu HashKey_4(arg2), \T5 vpshufd $0b01001110, \XMM5, \T2 vpshufd $0b01001110, \T5, \T3 vpxor \XMM5, \T2, \T2 @@ -2447,7 +2645,7 @@ _initial_blocks_done\@: ###################### - vmovdqa HashKey_3(arg1), \T5 + vmovdqu HashKey_3(arg2), \T5 vpshufd $0b01001110, \XMM6, \T2 vpshufd $0b01001110, \T5, \T3 vpxor \XMM6, \T2, \T2 @@ -2465,7 +2663,7 @@ _initial_blocks_done\@: ###################### - vmovdqa HashKey_2(arg1), \T5 + vmovdqu HashKey_2(arg2), \T5 vpshufd $0b01001110, \XMM7, \T2 vpshufd $0b01001110, \T5, \T3 vpxor \XMM7, \T2, \T2 @@ -2483,7 +2681,7 @@ _initial_blocks_done\@: ###################### - vmovdqa HashKey(arg1), \T5 + vmovdqu HashKey(arg2), \T5 vpshufd $0b01001110, \XMM8, \T2 vpshufd $0b01001110, \T5, \T3 vpxor \XMM8, \T2, \T2 @@ -2536,411 +2734,110 @@ _initial_blocks_done\@: -# combined for GCM encrypt and decrypt functions -# clobbering all xmm registers -# clobbering r10, r11, r12, r13, r14, r15 -.macro GCM_ENC_DEC_AVX2 ENC_DEC - - #the number of pushes must equal STACK_OFFSET - push %r12 - push %r13 - push %r14 - push %r15 - - mov %rsp, %r14 - - - - - sub $VARIABLE_OFFSET, %rsp - and $~63, %rsp # align rsp to 64 bytes - - - vmovdqu HashKey(arg1), %xmm13 # xmm13 = HashKey - - mov arg4, %r13 # save the number of bytes of plaintext/ciphertext - and $-16, %r13 # r13 = r13 - (r13 mod 16) - - mov %r13, %r12 - shr $4, %r12 - and $7, %r12 - jz _initial_num_blocks_is_0\@ - - cmp $7, %r12 - je _initial_num_blocks_is_7\@ - cmp $6, %r12 - je _initial_num_blocks_is_6\@ - cmp $5, %r12 - je _initial_num_blocks_is_5\@ - cmp $4, %r12 - je _initial_num_blocks_is_4\@ - cmp $3, %r12 - je _initial_num_blocks_is_3\@ - cmp $2, %r12 - je _initial_num_blocks_is_2\@ - - jmp _initial_num_blocks_is_1\@ - -_initial_num_blocks_is_7\@: - INITIAL_BLOCKS_AVX2 7, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC - sub $16*7, %r13 - jmp _initial_blocks_encrypted\@ - -_initial_num_blocks_is_6\@: - INITIAL_BLOCKS_AVX2 6, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC - sub $16*6, %r13 - jmp _initial_blocks_encrypted\@ - -_initial_num_blocks_is_5\@: - INITIAL_BLOCKS_AVX2 5, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC - sub $16*5, %r13 - jmp _initial_blocks_encrypted\@ - -_initial_num_blocks_is_4\@: - INITIAL_BLOCKS_AVX2 4, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC - sub $16*4, %r13 - jmp _initial_blocks_encrypted\@ - -_initial_num_blocks_is_3\@: - INITIAL_BLOCKS_AVX2 3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC - sub $16*3, %r13 - jmp _initial_blocks_encrypted\@ - -_initial_num_blocks_is_2\@: - INITIAL_BLOCKS_AVX2 2, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC - sub $16*2, %r13 - jmp _initial_blocks_encrypted\@ - -_initial_num_blocks_is_1\@: - INITIAL_BLOCKS_AVX2 1, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC - sub $16*1, %r13 - jmp _initial_blocks_encrypted\@ - -_initial_num_blocks_is_0\@: - INITIAL_BLOCKS_AVX2 0, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC - - -_initial_blocks_encrypted\@: - cmp $0, %r13 - je _zero_cipher_left\@ - - sub $128, %r13 - je _eight_cipher_left\@ - - - - - vmovd %xmm9, %r15d - and $255, %r15d - vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 - - -_encrypt_by_8_new\@: - cmp $(255-8), %r15d - jg _encrypt_by_8\@ - - - - add $8, %r15b - GHASH_8_ENCRYPT_8_PARALLEL_AVX2 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, out_order, \ENC_DEC - add $128, %r11 - sub $128, %r13 - jne _encrypt_by_8_new\@ - - vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 - jmp _eight_cipher_left\@ - -_encrypt_by_8\@: - vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 - add $8, %r15b - GHASH_8_ENCRYPT_8_PARALLEL_AVX2 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, in_order, \ENC_DEC - vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 - add $128, %r11 - sub $128, %r13 - jne _encrypt_by_8_new\@ - - vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 - - - - -_eight_cipher_left\@: - GHASH_LAST_8_AVX2 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8 - - -_zero_cipher_left\@: - cmp $16, arg4 - jl _only_less_than_16\@ - - mov arg4, %r13 - and $15, %r13 # r13 = (arg4 mod 16) - - je _multiple_of_16_bytes\@ - - # handle the last <16 Byte block seperately - - - vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn - vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 - ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn) - - sub $16, %r11 - add %r13, %r11 - vmovdqu (arg3, %r11), %xmm1 # receive the last <16 Byte block - - lea SHIFT_MASK+16(%rip), %r12 - sub %r13, %r12 # adjust the shuffle mask pointer - # to be able to shift 16-r13 bytes - # (r13 is the number of bytes in plaintext mod 16) - vmovdqu (%r12), %xmm2 # get the appropriate shuffle mask - vpshufb %xmm2, %xmm1, %xmm1 # shift right 16-r13 bytes - jmp _final_ghash_mul\@ - -_only_less_than_16\@: - # check for 0 length - mov arg4, %r13 - and $15, %r13 # r13 = (arg4 mod 16) - - je _multiple_of_16_bytes\@ - - # handle the last <16 Byte block seperately - - - vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn - vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 - ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn) - - - lea SHIFT_MASK+16(%rip), %r12 - sub %r13, %r12 # adjust the shuffle mask pointer to be - # able to shift 16-r13 bytes (r13 is the - # number of bytes in plaintext mod 16) - -_get_last_16_byte_loop\@: - movb (arg3, %r11), %al - movb %al, TMP1 (%rsp , %r11) - add $1, %r11 - cmp %r13, %r11 - jne _get_last_16_byte_loop\@ - - vmovdqu TMP1(%rsp), %xmm1 - - sub $16, %r11 - -_final_ghash_mul\@: - .if \ENC_DEC == DEC - vmovdqa %xmm1, %xmm2 - vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) - vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to mask out top 16-r13 bytes of xmm9 - vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 - vpand %xmm1, %xmm2, %xmm2 - vpshufb SHUF_MASK(%rip), %xmm2, %xmm2 - vpxor %xmm2, %xmm14, %xmm14 - #GHASH computation for the last <16 Byte block - GHASH_MUL_AVX2 %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 - sub %r13, %r11 - add $16, %r11 - .else - vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) - vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to mask out top 16-r13 bytes of xmm9 - vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 - vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 - vpxor %xmm9, %xmm14, %xmm14 - #GHASH computation for the last <16 Byte block - GHASH_MUL_AVX2 %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 - sub %r13, %r11 - add $16, %r11 - vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 # shuffle xmm9 back to output as ciphertext - .endif - - - ############################# - # output r13 Bytes - vmovq %xmm9, %rax - cmp $8, %r13 - jle _less_than_8_bytes_left\@ - - mov %rax, (arg2 , %r11) - add $8, %r11 - vpsrldq $8, %xmm9, %xmm9 - vmovq %xmm9, %rax - sub $8, %r13 - -_less_than_8_bytes_left\@: - movb %al, (arg2 , %r11) - add $1, %r11 - shr $8, %rax - sub $1, %r13 - jne _less_than_8_bytes_left\@ - ############################# - -_multiple_of_16_bytes\@: - mov arg7, %r12 # r12 = aadLen (number of bytes) - shl $3, %r12 # convert into number of bits - vmovd %r12d, %xmm15 # len(A) in xmm15 - - shl $3, arg4 # len(C) in bits (*128) - vmovq arg4, %xmm1 - vpslldq $8, %xmm15, %xmm15 # xmm15 = len(A)|| 0x0000000000000000 - vpxor %xmm1, %xmm15, %xmm15 # xmm15 = len(A)||len(C) - - vpxor %xmm15, %xmm14, %xmm14 - GHASH_MUL_AVX2 %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 # final GHASH computation - vpshufb SHUF_MASK(%rip), %xmm14, %xmm14 # perform a 16Byte swap - - mov arg5, %rax # rax = *Y0 - vmovdqu (%rax), %xmm9 # xmm9 = Y0 - - ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Y0) - - vpxor %xmm14, %xmm9, %xmm9 - - - -_return_T\@: - mov arg8, %r10 # r10 = authTag - mov arg9, %r11 # r11 = auth_tag_len - - cmp $16, %r11 - je _T_16\@ - - cmp $8, %r11 - jl _T_4\@ - -_T_8\@: - vmovq %xmm9, %rax - mov %rax, (%r10) - add $8, %r10 - sub $8, %r11 - vpsrldq $8, %xmm9, %xmm9 - cmp $0, %r11 - je _return_T_done\@ -_T_4\@: - vmovd %xmm9, %eax - mov %eax, (%r10) - add $4, %r10 - sub $4, %r11 - vpsrldq $4, %xmm9, %xmm9 - cmp $0, %r11 - je _return_T_done\@ -_T_123\@: - vmovd %xmm9, %eax - cmp $2, %r11 - jl _T_1\@ - mov %ax, (%r10) - cmp $2, %r11 - je _return_T_done\@ - add $2, %r10 - sar $16, %eax -_T_1\@: - mov %al, (%r10) - jmp _return_T_done\@ - -_T_16\@: - vmovdqu %xmm9, (%r10) - -_return_T_done\@: - mov %r14, %rsp - - pop %r15 - pop %r14 - pop %r13 - pop %r12 -.endm - - ############################################################# -#void aesni_gcm_precomp_avx_gen4 +#void aesni_gcm_init_avx_gen4 # (gcm_data *my_ctx_data, -# u8 *hash_subkey)# /* H, the Hash sub key input. -# Data starts on a 16-byte boundary. */ -############################################################# -ENTRY(aesni_gcm_precomp_avx_gen4) - #the number of pushes must equal STACK_OFFSET - push %r12 - push %r13 - push %r14 - push %r15 - - mov %rsp, %r14 - - - - sub $VARIABLE_OFFSET, %rsp - and $~63, %rsp # align rsp to 64 bytes - - vmovdqu (arg2), %xmm6 # xmm6 = HashKey - - vpshufb SHUF_MASK(%rip), %xmm6, %xmm6 - ############### PRECOMPUTATION of HashKey<<1 mod poly from the HashKey - vmovdqa %xmm6, %xmm2 - vpsllq $1, %xmm6, %xmm6 - vpsrlq $63, %xmm2, %xmm2 - vmovdqa %xmm2, %xmm1 - vpslldq $8, %xmm2, %xmm2 - vpsrldq $8, %xmm1, %xmm1 - vpor %xmm2, %xmm6, %xmm6 - #reduction - vpshufd $0b00100100, %xmm1, %xmm2 - vpcmpeqd TWOONE(%rip), %xmm2, %xmm2 - vpand POLY(%rip), %xmm2, %xmm2 - vpxor %xmm2, %xmm6, %xmm6 # xmm6 holds the HashKey<<1 mod poly - ####################################################################### - vmovdqa %xmm6, HashKey(arg1) # store HashKey<<1 mod poly - - - PRECOMPUTE_AVX2 %xmm6, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5 - - mov %r14, %rsp - - pop %r15 - pop %r14 - pop %r13 - pop %r12 - ret -ENDPROC(aesni_gcm_precomp_avx_gen4) - - -############################################################################### -#void aesni_gcm_enc_avx_gen4( -# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ -# u8 *out, /* Ciphertext output. Encrypt in-place is allowed. */ -# const u8 *in, /* Plaintext input */ -# u64 plaintext_len, /* Length of data in Bytes for encryption. */ -# u8 *iv, /* Pre-counter block j0: 4 byte salt -# (from Security Association) concatenated with 8 byte -# Initialisation Vector (from IPSec ESP Payload) -# concatenated with 0x00000001. 16-byte aligned pointer. */ -# const u8 *aad, /* Additional Authentication Data (AAD)*/ -# u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ -# u8 *auth_tag, /* Authenticated Tag output. */ -# u64 auth_tag_len)# /* Authenticated Tag Length in bytes. -# Valid values are 16 (most likely), 12 or 8. */ -############################################################################### -ENTRY(aesni_gcm_enc_avx_gen4) - GCM_ENC_DEC_AVX2 ENC - ret -ENDPROC(aesni_gcm_enc_avx_gen4) - -############################################################################### -#void aesni_gcm_dec_avx_gen4( -# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ -# u8 *out, /* Plaintext output. Decrypt in-place is allowed. */ -# const u8 *in, /* Ciphertext input */ -# u64 plaintext_len, /* Length of data in Bytes for encryption. */ +# gcm_context_data *data, # u8 *iv, /* Pre-counter block j0: 4 byte salt # (from Security Association) concatenated with 8 byte # Initialisation Vector (from IPSec ESP Payload) # concatenated with 0x00000001. 16-byte aligned pointer. */ +# u8 *hash_subkey# /* H, the Hash sub key input. Data starts on a 16-byte boundary. */ # const u8 *aad, /* Additional Authentication Data (AAD)*/ -# u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ +# u64 aad_len) /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ +############################################################# +ENTRY(aesni_gcm_init_avx_gen4) + FUNC_SAVE + INIT GHASH_MUL_AVX2, PRECOMPUTE_AVX2 + FUNC_RESTORE + ret +ENDPROC(aesni_gcm_init_avx_gen4) + +############################################################################### +#void aesni_gcm_enc_avx_gen4( +# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ +# gcm_context_data *data, +# u8 *out, /* Ciphertext output. Encrypt in-place is allowed. */ +# const u8 *in, /* Plaintext input */ +# u64 plaintext_len) /* Length of data in Bytes for encryption. */ +############################################################################### +ENTRY(aesni_gcm_enc_update_avx_gen4) + FUNC_SAVE + mov keysize,%eax + cmp $32, %eax + je key_256_enc_update4 + cmp $16, %eax + je key_128_enc_update4 + # must be 192 + GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 11 + FUNC_RESTORE + ret +key_128_enc_update4: + GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 9 + FUNC_RESTORE + ret +key_256_enc_update4: + GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 13 + FUNC_RESTORE + ret +ENDPROC(aesni_gcm_enc_update_avx_gen4) + +############################################################################### +#void aesni_gcm_dec_update_avx_gen4( +# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ +# gcm_context_data *data, +# u8 *out, /* Plaintext output. Decrypt in-place is allowed. */ +# const u8 *in, /* Ciphertext input */ +# u64 plaintext_len) /* Length of data in Bytes for encryption. */ +############################################################################### +ENTRY(aesni_gcm_dec_update_avx_gen4) + FUNC_SAVE + mov keysize,%eax + cmp $32, %eax + je key_256_dec_update4 + cmp $16, %eax + je key_128_dec_update4 + # must be 192 + GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 11 + FUNC_RESTORE + ret +key_128_dec_update4: + GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 9 + FUNC_RESTORE + ret +key_256_dec_update4: + GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 13 + FUNC_RESTORE + ret +ENDPROC(aesni_gcm_dec_update_avx_gen4) + +############################################################################### +#void aesni_gcm_finalize_avx_gen4( +# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ +# gcm_context_data *data, # u8 *auth_tag, /* Authenticated Tag output. */ # u64 auth_tag_len)# /* Authenticated Tag Length in bytes. -# Valid values are 16 (most likely), 12 or 8. */ +# Valid values are 16 (most likely), 12 or 8. */ ############################################################################### -ENTRY(aesni_gcm_dec_avx_gen4) - GCM_ENC_DEC_AVX2 DEC - ret -ENDPROC(aesni_gcm_dec_avx_gen4) +ENTRY(aesni_gcm_finalize_avx_gen4) + FUNC_SAVE + mov keysize,%eax + cmp $32, %eax + je key_256_finalize4 + cmp $16, %eax + je key_128_finalize4 + # must be 192 + GCM_COMPLETE GHASH_MUL_AVX2, 11, arg3, arg4 + FUNC_RESTORE + ret +key_128_finalize4: + GCM_COMPLETE GHASH_MUL_AVX2, 9, arg3, arg4 + FUNC_RESTORE + ret +key_256_finalize4: + GCM_COMPLETE GHASH_MUL_AVX2, 13, arg3, arg4 + FUNC_RESTORE + ret +ENDPROC(aesni_gcm_finalize_avx_gen4) #endif /* CONFIG_AS_AVX2 */ diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 661f7daf43da..1321700d6647 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -84,7 +84,7 @@ struct gcm_context_data { u8 current_counter[GCM_BLOCK_LEN]; u64 partial_block_len; u64 unused; - u8 hash_keys[GCM_BLOCK_LEN * 8]; + u8 hash_keys[GCM_BLOCK_LEN * 16]; }; asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, @@ -175,6 +175,32 @@ asmlinkage void aesni_gcm_finalize(void *ctx, struct gcm_context_data *gdata, u8 *auth_tag, unsigned long auth_tag_len); +static struct aesni_gcm_tfm_s { +void (*init)(void *ctx, + struct gcm_context_data *gdata, + u8 *iv, + u8 *hash_subkey, const u8 *aad, + unsigned long aad_len); +void (*enc_update)(void *ctx, + struct gcm_context_data *gdata, u8 *out, + const u8 *in, + unsigned long plaintext_len); +void (*dec_update)(void *ctx, + struct gcm_context_data *gdata, u8 *out, + const u8 *in, + unsigned long ciphertext_len); +void (*finalize)(void *ctx, + struct gcm_context_data *gdata, + u8 *auth_tag, unsigned long auth_tag_len); +} *aesni_gcm_tfm; + +struct aesni_gcm_tfm_s aesni_gcm_tfm_sse = { + .init = &aesni_gcm_init, + .enc_update = &aesni_gcm_enc_update, + .dec_update = &aesni_gcm_dec_update, + .finalize = &aesni_gcm_finalize, +}; + #ifdef CONFIG_AS_AVX asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv, void *keys, u8 *out, unsigned int num_bytes); @@ -183,136 +209,94 @@ asmlinkage void aes_ctr_enc_192_avx_by8(const u8 *in, u8 *iv, asmlinkage void aes_ctr_enc_256_avx_by8(const u8 *in, u8 *iv, void *keys, u8 *out, unsigned int num_bytes); /* - * asmlinkage void aesni_gcm_precomp_avx_gen2() + * asmlinkage void aesni_gcm_init_avx_gen2() * gcm_data *my_ctx_data, context data * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. */ -asmlinkage void aesni_gcm_precomp_avx_gen2(void *my_ctx_data, u8 *hash_subkey); +asmlinkage void aesni_gcm_init_avx_gen2(void *my_ctx_data, + struct gcm_context_data *gdata, + u8 *iv, + u8 *hash_subkey, + const u8 *aad, + unsigned long aad_len); -asmlinkage void aesni_gcm_enc_avx_gen2(void *ctx, u8 *out, +asmlinkage void aesni_gcm_enc_update_avx_gen2(void *ctx, + struct gcm_context_data *gdata, u8 *out, + const u8 *in, unsigned long plaintext_len); +asmlinkage void aesni_gcm_dec_update_avx_gen2(void *ctx, + struct gcm_context_data *gdata, u8 *out, + const u8 *in, + unsigned long ciphertext_len); +asmlinkage void aesni_gcm_finalize_avx_gen2(void *ctx, + struct gcm_context_data *gdata, + u8 *auth_tag, unsigned long auth_tag_len); + +asmlinkage void aesni_gcm_enc_avx_gen2(void *ctx, + struct gcm_context_data *gdata, u8 *out, const u8 *in, unsigned long plaintext_len, u8 *iv, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len); -asmlinkage void aesni_gcm_dec_avx_gen2(void *ctx, u8 *out, +asmlinkage void aesni_gcm_dec_avx_gen2(void *ctx, + struct gcm_context_data *gdata, u8 *out, const u8 *in, unsigned long ciphertext_len, u8 *iv, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len); -static void aesni_gcm_enc_avx(void *ctx, - struct gcm_context_data *data, u8 *out, - const u8 *in, unsigned long plaintext_len, u8 *iv, - u8 *hash_subkey, const u8 *aad, unsigned long aad_len, - u8 *auth_tag, unsigned long auth_tag_len) -{ - struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx; - if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)){ - aesni_gcm_enc(ctx, data, out, in, - plaintext_len, iv, hash_subkey, aad, - aad_len, auth_tag, auth_tag_len); - } else { - aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); - aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad, - aad_len, auth_tag, auth_tag_len); - } -} +struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen2 = { + .init = &aesni_gcm_init_avx_gen2, + .enc_update = &aesni_gcm_enc_update_avx_gen2, + .dec_update = &aesni_gcm_dec_update_avx_gen2, + .finalize = &aesni_gcm_finalize_avx_gen2, +}; -static void aesni_gcm_dec_avx(void *ctx, - struct gcm_context_data *data, u8 *out, - const u8 *in, unsigned long ciphertext_len, u8 *iv, - u8 *hash_subkey, const u8 *aad, unsigned long aad_len, - u8 *auth_tag, unsigned long auth_tag_len) -{ - struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx; - if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) { - aesni_gcm_dec(ctx, data, out, in, - ciphertext_len, iv, hash_subkey, aad, - aad_len, auth_tag, auth_tag_len); - } else { - aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); - aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad, - aad_len, auth_tag, auth_tag_len); - } -} #endif #ifdef CONFIG_AS_AVX2 /* - * asmlinkage void aesni_gcm_precomp_avx_gen4() + * asmlinkage void aesni_gcm_init_avx_gen4() * gcm_data *my_ctx_data, context data * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. */ -asmlinkage void aesni_gcm_precomp_avx_gen4(void *my_ctx_data, u8 *hash_subkey); +asmlinkage void aesni_gcm_init_avx_gen4(void *my_ctx_data, + struct gcm_context_data *gdata, + u8 *iv, + u8 *hash_subkey, + const u8 *aad, + unsigned long aad_len); -asmlinkage void aesni_gcm_enc_avx_gen4(void *ctx, u8 *out, +asmlinkage void aesni_gcm_enc_update_avx_gen4(void *ctx, + struct gcm_context_data *gdata, u8 *out, + const u8 *in, unsigned long plaintext_len); +asmlinkage void aesni_gcm_dec_update_avx_gen4(void *ctx, + struct gcm_context_data *gdata, u8 *out, + const u8 *in, + unsigned long ciphertext_len); +asmlinkage void aesni_gcm_finalize_avx_gen4(void *ctx, + struct gcm_context_data *gdata, + u8 *auth_tag, unsigned long auth_tag_len); + +asmlinkage void aesni_gcm_enc_avx_gen4(void *ctx, + struct gcm_context_data *gdata, u8 *out, const u8 *in, unsigned long plaintext_len, u8 *iv, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len); -asmlinkage void aesni_gcm_dec_avx_gen4(void *ctx, u8 *out, +asmlinkage void aesni_gcm_dec_avx_gen4(void *ctx, + struct gcm_context_data *gdata, u8 *out, const u8 *in, unsigned long ciphertext_len, u8 *iv, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len); -static void aesni_gcm_enc_avx2(void *ctx, - struct gcm_context_data *data, u8 *out, - const u8 *in, unsigned long plaintext_len, u8 *iv, - u8 *hash_subkey, const u8 *aad, unsigned long aad_len, - u8 *auth_tag, unsigned long auth_tag_len) -{ - struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx; - if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) { - aesni_gcm_enc(ctx, data, out, in, - plaintext_len, iv, hash_subkey, aad, - aad_len, auth_tag, auth_tag_len); - } else if (plaintext_len < AVX_GEN4_OPTSIZE) { - aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); - aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad, - aad_len, auth_tag, auth_tag_len); - } else { - aesni_gcm_precomp_avx_gen4(ctx, hash_subkey); - aesni_gcm_enc_avx_gen4(ctx, out, in, plaintext_len, iv, aad, - aad_len, auth_tag, auth_tag_len); - } -} +struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen4 = { + .init = &aesni_gcm_init_avx_gen4, + .enc_update = &aesni_gcm_enc_update_avx_gen4, + .dec_update = &aesni_gcm_dec_update_avx_gen4, + .finalize = &aesni_gcm_finalize_avx_gen4, +}; -static void aesni_gcm_dec_avx2(void *ctx, - struct gcm_context_data *data, u8 *out, - const u8 *in, unsigned long ciphertext_len, u8 *iv, - u8 *hash_subkey, const u8 *aad, unsigned long aad_len, - u8 *auth_tag, unsigned long auth_tag_len) -{ - struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx; - if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) { - aesni_gcm_dec(ctx, data, out, in, - ciphertext_len, iv, hash_subkey, - aad, aad_len, auth_tag, auth_tag_len); - } else if (ciphertext_len < AVX_GEN4_OPTSIZE) { - aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); - aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad, - aad_len, auth_tag, auth_tag_len); - } else { - aesni_gcm_precomp_avx_gen4(ctx, hash_subkey); - aesni_gcm_dec_avx_gen4(ctx, out, in, ciphertext_len, iv, aad, - aad_len, auth_tag, auth_tag_len); - } -} #endif -static void (*aesni_gcm_enc_tfm)(void *ctx, - struct gcm_context_data *data, u8 *out, - const u8 *in, unsigned long plaintext_len, - u8 *iv, u8 *hash_subkey, const u8 *aad, - unsigned long aad_len, u8 *auth_tag, - unsigned long auth_tag_len); - -static void (*aesni_gcm_dec_tfm)(void *ctx, - struct gcm_context_data *data, u8 *out, - const u8 *in, unsigned long ciphertext_len, - u8 *iv, u8 *hash_subkey, const u8 *aad, - unsigned long aad_len, u8 *auth_tag, - unsigned long auth_tag_len); - static inline struct aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) { @@ -794,6 +778,7 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, { struct crypto_aead *tfm = crypto_aead_reqtfm(req); unsigned long auth_tag_len = crypto_aead_authsize(tfm); + struct aesni_gcm_tfm_s *gcm_tfm = aesni_gcm_tfm; struct gcm_context_data data AESNI_ALIGN_ATTR; struct scatter_walk dst_sg_walk = {}; unsigned long left = req->cryptlen; @@ -811,6 +796,15 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, if (!enc) left -= auth_tag_len; +#ifdef CONFIG_AS_AVX2 + if (left < AVX_GEN4_OPTSIZE && gcm_tfm == &aesni_gcm_tfm_avx_gen4) + gcm_tfm = &aesni_gcm_tfm_avx_gen2; +#endif +#ifdef CONFIG_AS_AVX + if (left < AVX_GEN2_OPTSIZE && gcm_tfm == &aesni_gcm_tfm_avx_gen2) + gcm_tfm = &aesni_gcm_tfm_sse; +#endif + /* Linearize assoc, if not already linear */ if (req->src->length >= assoclen && req->src->length && (!PageHighMem(sg_page(req->src)) || @@ -835,7 +829,7 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, } kernel_fpu_begin(); - aesni_gcm_init(aes_ctx, &data, iv, + gcm_tfm->init(aes_ctx, &data, iv, hash_subkey, assoc, assoclen); if (req->src != req->dst) { while (left) { @@ -846,10 +840,10 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, len = min(srclen, dstlen); if (len) { if (enc) - aesni_gcm_enc_update(aes_ctx, &data, + gcm_tfm->enc_update(aes_ctx, &data, dst, src, len); else - aesni_gcm_dec_update(aes_ctx, &data, + gcm_tfm->dec_update(aes_ctx, &data, dst, src, len); } left -= len; @@ -867,10 +861,10 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, len = scatterwalk_clamp(&src_sg_walk, left); if (len) { if (enc) - aesni_gcm_enc_update(aes_ctx, &data, + gcm_tfm->enc_update(aes_ctx, &data, src, src, len); else - aesni_gcm_dec_update(aes_ctx, &data, + gcm_tfm->dec_update(aes_ctx, &data, src, src, len); } left -= len; @@ -879,7 +873,7 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, scatterwalk_done(&src_sg_walk, 1, left); } } - aesni_gcm_finalize(aes_ctx, &data, authTag, auth_tag_len); + gcm_tfm->finalize(aes_ctx, &data, authTag, auth_tag_len); kernel_fpu_end(); if (!assocmem) @@ -912,147 +906,15 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen, u8 *hash_subkey, u8 *iv, void *aes_ctx) { - u8 one_entry_in_sg = 0; - u8 *src, *dst, *assoc; - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - unsigned long auth_tag_len = crypto_aead_authsize(tfm); - struct scatter_walk src_sg_walk; - struct scatter_walk dst_sg_walk = {}; - struct gcm_context_data data AESNI_ALIGN_ATTR; - - if (((struct crypto_aes_ctx *)aes_ctx)->key_length != AES_KEYSIZE_128 || - aesni_gcm_enc_tfm == aesni_gcm_enc || - req->cryptlen < AVX_GEN2_OPTSIZE) { - return gcmaes_crypt_by_sg(true, req, assoclen, hash_subkey, iv, - aes_ctx); - } - if (sg_is_last(req->src) && - (!PageHighMem(sg_page(req->src)) || - req->src->offset + req->src->length <= PAGE_SIZE) && - sg_is_last(req->dst) && - (!PageHighMem(sg_page(req->dst)) || - req->dst->offset + req->dst->length <= PAGE_SIZE)) { - one_entry_in_sg = 1; - scatterwalk_start(&src_sg_walk, req->src); - assoc = scatterwalk_map(&src_sg_walk); - src = assoc + req->assoclen; - dst = src; - if (unlikely(req->src != req->dst)) { - scatterwalk_start(&dst_sg_walk, req->dst); - dst = scatterwalk_map(&dst_sg_walk) + req->assoclen; - } - } else { - /* Allocate memory for src, dst, assoc */ - assoc = kmalloc(req->cryptlen + auth_tag_len + req->assoclen, - GFP_ATOMIC); - if (unlikely(!assoc)) - return -ENOMEM; - scatterwalk_map_and_copy(assoc, req->src, 0, - req->assoclen + req->cryptlen, 0); - src = assoc + req->assoclen; - dst = src; - } - - kernel_fpu_begin(); - aesni_gcm_enc_tfm(aes_ctx, &data, dst, src, req->cryptlen, iv, - hash_subkey, assoc, assoclen, - dst + req->cryptlen, auth_tag_len); - kernel_fpu_end(); - - /* The authTag (aka the Integrity Check Value) needs to be written - * back to the packet. */ - if (one_entry_in_sg) { - if (unlikely(req->src != req->dst)) { - scatterwalk_unmap(dst - req->assoclen); - scatterwalk_advance(&dst_sg_walk, req->dst->length); - scatterwalk_done(&dst_sg_walk, 1, 0); - } - scatterwalk_unmap(assoc); - scatterwalk_advance(&src_sg_walk, req->src->length); - scatterwalk_done(&src_sg_walk, req->src == req->dst, 0); - } else { - scatterwalk_map_and_copy(dst, req->dst, req->assoclen, - req->cryptlen + auth_tag_len, 1); - kfree(assoc); - } - return 0; + return gcmaes_crypt_by_sg(true, req, assoclen, hash_subkey, iv, + aes_ctx); } static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen, u8 *hash_subkey, u8 *iv, void *aes_ctx) { - u8 one_entry_in_sg = 0; - u8 *src, *dst, *assoc; - unsigned long tempCipherLen = 0; - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - unsigned long auth_tag_len = crypto_aead_authsize(tfm); - u8 authTag[16]; - struct scatter_walk src_sg_walk; - struct scatter_walk dst_sg_walk = {}; - struct gcm_context_data data AESNI_ALIGN_ATTR; - int retval = 0; - - if (((struct crypto_aes_ctx *)aes_ctx)->key_length != AES_KEYSIZE_128 || - aesni_gcm_enc_tfm == aesni_gcm_enc || - req->cryptlen < AVX_GEN2_OPTSIZE) { - return gcmaes_crypt_by_sg(false, req, assoclen, hash_subkey, iv, - aes_ctx); - } - tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len); - - if (sg_is_last(req->src) && - (!PageHighMem(sg_page(req->src)) || - req->src->offset + req->src->length <= PAGE_SIZE) && - sg_is_last(req->dst) && req->dst->length && - (!PageHighMem(sg_page(req->dst)) || - req->dst->offset + req->dst->length <= PAGE_SIZE)) { - one_entry_in_sg = 1; - scatterwalk_start(&src_sg_walk, req->src); - assoc = scatterwalk_map(&src_sg_walk); - src = assoc + req->assoclen; - dst = src; - if (unlikely(req->src != req->dst)) { - scatterwalk_start(&dst_sg_walk, req->dst); - dst = scatterwalk_map(&dst_sg_walk) + req->assoclen; - } - } else { - /* Allocate memory for src, dst, assoc */ - assoc = kmalloc(req->cryptlen + req->assoclen, GFP_ATOMIC); - if (!assoc) - return -ENOMEM; - scatterwalk_map_and_copy(assoc, req->src, 0, - req->assoclen + req->cryptlen, 0); - src = assoc + req->assoclen; - dst = src; - } - - - kernel_fpu_begin(); - aesni_gcm_dec_tfm(aes_ctx, &data, dst, src, tempCipherLen, iv, - hash_subkey, assoc, assoclen, - authTag, auth_tag_len); - kernel_fpu_end(); - - /* Compare generated tag with passed in tag. */ - retval = crypto_memneq(src + tempCipherLen, authTag, auth_tag_len) ? - -EBADMSG : 0; - - if (one_entry_in_sg) { - if (unlikely(req->src != req->dst)) { - scatterwalk_unmap(dst - req->assoclen); - scatterwalk_advance(&dst_sg_walk, req->dst->length); - scatterwalk_done(&dst_sg_walk, 1, 0); - } - scatterwalk_unmap(assoc); - scatterwalk_advance(&src_sg_walk, req->src->length); - scatterwalk_done(&src_sg_walk, req->src == req->dst, 0); - } else { - scatterwalk_map_and_copy(dst, req->dst, req->assoclen, - tempCipherLen, 1); - kfree(assoc); - } - return retval; - + return gcmaes_crypt_by_sg(false, req, assoclen, hash_subkey, iv, + aes_ctx); } static int helper_rfc4106_encrypt(struct aead_request *req) @@ -1420,21 +1282,18 @@ static int __init aesni_init(void) #ifdef CONFIG_AS_AVX2 if (boot_cpu_has(X86_FEATURE_AVX2)) { pr_info("AVX2 version of gcm_enc/dec engaged.\n"); - aesni_gcm_enc_tfm = aesni_gcm_enc_avx2; - aesni_gcm_dec_tfm = aesni_gcm_dec_avx2; + aesni_gcm_tfm = &aesni_gcm_tfm_avx_gen4; } else #endif #ifdef CONFIG_AS_AVX if (boot_cpu_has(X86_FEATURE_AVX)) { pr_info("AVX version of gcm_enc/dec engaged.\n"); - aesni_gcm_enc_tfm = aesni_gcm_enc_avx; - aesni_gcm_dec_tfm = aesni_gcm_dec_avx; + aesni_gcm_tfm = &aesni_gcm_tfm_avx_gen2; } else #endif { pr_info("SSE version of gcm_enc/dec engaged.\n"); - aesni_gcm_enc_tfm = aesni_gcm_enc; - aesni_gcm_dec_tfm = aesni_gcm_dec; + aesni_gcm_tfm = &aesni_gcm_tfm_sse; } aesni_ctr_enc_tfm = aesni_ctr_enc; #ifdef CONFIG_AS_AVX diff --git a/arch/x86/crypto/chacha-avx2-x86_64.S b/arch/x86/crypto/chacha-avx2-x86_64.S new file mode 100644 index 000000000000..32903fd450af --- /dev/null +++ b/arch/x86/crypto/chacha-avx2-x86_64.S @@ -0,0 +1,1025 @@ +/* + * ChaCha 256-bit cipher algorithm, x64 AVX2 functions + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include + +.section .rodata.cst32.ROT8, "aM", @progbits, 32 +.align 32 +ROT8: .octa 0x0e0d0c0f0a09080b0605040702010003 + .octa 0x0e0d0c0f0a09080b0605040702010003 + +.section .rodata.cst32.ROT16, "aM", @progbits, 32 +.align 32 +ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302 + .octa 0x0d0c0f0e09080b0a0504070601000302 + +.section .rodata.cst32.CTRINC, "aM", @progbits, 32 +.align 32 +CTRINC: .octa 0x00000003000000020000000100000000 + .octa 0x00000007000000060000000500000004 + +.section .rodata.cst32.CTR2BL, "aM", @progbits, 32 +.align 32 +CTR2BL: .octa 0x00000000000000000000000000000000 + .octa 0x00000000000000000000000000000001 + +.section .rodata.cst32.CTR4BL, "aM", @progbits, 32 +.align 32 +CTR4BL: .octa 0x00000000000000000000000000000002 + .octa 0x00000000000000000000000000000003 + +.text + +ENTRY(chacha_2block_xor_avx2) + # %rdi: Input state matrix, s + # %rsi: up to 2 data blocks output, o + # %rdx: up to 2 data blocks input, i + # %rcx: input/output length in bytes + # %r8d: nrounds + + # This function encrypts two ChaCha blocks by loading the state + # matrix twice across four AVX registers. It performs matrix operations + # on four words in each matrix in parallel, but requires shuffling to + # rearrange the words after each round. + + vzeroupper + + # x0..3[0-2] = s0..3 + vbroadcasti128 0x00(%rdi),%ymm0 + vbroadcasti128 0x10(%rdi),%ymm1 + vbroadcasti128 0x20(%rdi),%ymm2 + vbroadcasti128 0x30(%rdi),%ymm3 + + vpaddd CTR2BL(%rip),%ymm3,%ymm3 + + vmovdqa %ymm0,%ymm8 + vmovdqa %ymm1,%ymm9 + vmovdqa %ymm2,%ymm10 + vmovdqa %ymm3,%ymm11 + + vmovdqa ROT8(%rip),%ymm4 + vmovdqa ROT16(%rip),%ymm5 + + mov %rcx,%rax + +.Ldoubleround: + + # x0 += x1, x3 = rotl32(x3 ^ x0, 16) + vpaddd %ymm1,%ymm0,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vpshufb %ymm5,%ymm3,%ymm3 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 12) + vpaddd %ymm3,%ymm2,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vmovdqa %ymm1,%ymm6 + vpslld $12,%ymm6,%ymm6 + vpsrld $20,%ymm1,%ymm1 + vpor %ymm6,%ymm1,%ymm1 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 8) + vpaddd %ymm1,%ymm0,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vpshufb %ymm4,%ymm3,%ymm3 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 7) + vpaddd %ymm3,%ymm2,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vmovdqa %ymm1,%ymm7 + vpslld $7,%ymm7,%ymm7 + vpsrld $25,%ymm1,%ymm1 + vpor %ymm7,%ymm1,%ymm1 + + # x1 = shuffle32(x1, MASK(0, 3, 2, 1)) + vpshufd $0x39,%ymm1,%ymm1 + # x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + vpshufd $0x4e,%ymm2,%ymm2 + # x3 = shuffle32(x3, MASK(2, 1, 0, 3)) + vpshufd $0x93,%ymm3,%ymm3 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 16) + vpaddd %ymm1,%ymm0,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vpshufb %ymm5,%ymm3,%ymm3 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 12) + vpaddd %ymm3,%ymm2,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vmovdqa %ymm1,%ymm6 + vpslld $12,%ymm6,%ymm6 + vpsrld $20,%ymm1,%ymm1 + vpor %ymm6,%ymm1,%ymm1 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 8) + vpaddd %ymm1,%ymm0,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vpshufb %ymm4,%ymm3,%ymm3 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 7) + vpaddd %ymm3,%ymm2,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vmovdqa %ymm1,%ymm7 + vpslld $7,%ymm7,%ymm7 + vpsrld $25,%ymm1,%ymm1 + vpor %ymm7,%ymm1,%ymm1 + + # x1 = shuffle32(x1, MASK(2, 1, 0, 3)) + vpshufd $0x93,%ymm1,%ymm1 + # x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + vpshufd $0x4e,%ymm2,%ymm2 + # x3 = shuffle32(x3, MASK(0, 3, 2, 1)) + vpshufd $0x39,%ymm3,%ymm3 + + sub $2,%r8d + jnz .Ldoubleround + + # o0 = i0 ^ (x0 + s0) + vpaddd %ymm8,%ymm0,%ymm7 + cmp $0x10,%rax + jl .Lxorpart2 + vpxor 0x00(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x00(%rsi) + vextracti128 $1,%ymm7,%xmm0 + # o1 = i1 ^ (x1 + s1) + vpaddd %ymm9,%ymm1,%ymm7 + cmp $0x20,%rax + jl .Lxorpart2 + vpxor 0x10(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x10(%rsi) + vextracti128 $1,%ymm7,%xmm1 + # o2 = i2 ^ (x2 + s2) + vpaddd %ymm10,%ymm2,%ymm7 + cmp $0x30,%rax + jl .Lxorpart2 + vpxor 0x20(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x20(%rsi) + vextracti128 $1,%ymm7,%xmm2 + # o3 = i3 ^ (x3 + s3) + vpaddd %ymm11,%ymm3,%ymm7 + cmp $0x40,%rax + jl .Lxorpart2 + vpxor 0x30(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x30(%rsi) + vextracti128 $1,%ymm7,%xmm3 + + # xor and write second block + vmovdqa %xmm0,%xmm7 + cmp $0x50,%rax + jl .Lxorpart2 + vpxor 0x40(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x40(%rsi) + + vmovdqa %xmm1,%xmm7 + cmp $0x60,%rax + jl .Lxorpart2 + vpxor 0x50(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x50(%rsi) + + vmovdqa %xmm2,%xmm7 + cmp $0x70,%rax + jl .Lxorpart2 + vpxor 0x60(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x60(%rsi) + + vmovdqa %xmm3,%xmm7 + cmp $0x80,%rax + jl .Lxorpart2 + vpxor 0x70(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x70(%rsi) + +.Ldone2: + vzeroupper + ret + +.Lxorpart2: + # xor remaining bytes from partial register into output + mov %rax,%r9 + and $0x0f,%r9 + jz .Ldone2 + and $~0x0f,%rax + + mov %rsi,%r11 + + lea 8(%rsp),%r10 + sub $0x10,%rsp + and $~31,%rsp + + lea (%rdx,%rax),%rsi + mov %rsp,%rdi + mov %r9,%rcx + rep movsb + + vpxor 0x00(%rsp),%xmm7,%xmm7 + vmovdqa %xmm7,0x00(%rsp) + + mov %rsp,%rsi + lea (%r11,%rax),%rdi + mov %r9,%rcx + rep movsb + + lea -8(%r10),%rsp + jmp .Ldone2 + +ENDPROC(chacha_2block_xor_avx2) + +ENTRY(chacha_4block_xor_avx2) + # %rdi: Input state matrix, s + # %rsi: up to 4 data blocks output, o + # %rdx: up to 4 data blocks input, i + # %rcx: input/output length in bytes + # %r8d: nrounds + + # This function encrypts four ChaCha blocks by loading the state + # matrix four times across eight AVX registers. It performs matrix + # operations on four words in two matrices in parallel, sequentially + # to the operations on the four words of the other two matrices. The + # required word shuffling has a rather high latency, we can do the + # arithmetic on two matrix-pairs without much slowdown. + + vzeroupper + + # x0..3[0-4] = s0..3 + vbroadcasti128 0x00(%rdi),%ymm0 + vbroadcasti128 0x10(%rdi),%ymm1 + vbroadcasti128 0x20(%rdi),%ymm2 + vbroadcasti128 0x30(%rdi),%ymm3 + + vmovdqa %ymm0,%ymm4 + vmovdqa %ymm1,%ymm5 + vmovdqa %ymm2,%ymm6 + vmovdqa %ymm3,%ymm7 + + vpaddd CTR2BL(%rip),%ymm3,%ymm3 + vpaddd CTR4BL(%rip),%ymm7,%ymm7 + + vmovdqa %ymm0,%ymm11 + vmovdqa %ymm1,%ymm12 + vmovdqa %ymm2,%ymm13 + vmovdqa %ymm3,%ymm14 + vmovdqa %ymm7,%ymm15 + + vmovdqa ROT8(%rip),%ymm8 + vmovdqa ROT16(%rip),%ymm9 + + mov %rcx,%rax + +.Ldoubleround4: + + # x0 += x1, x3 = rotl32(x3 ^ x0, 16) + vpaddd %ymm1,%ymm0,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vpshufb %ymm9,%ymm3,%ymm3 + + vpaddd %ymm5,%ymm4,%ymm4 + vpxor %ymm4,%ymm7,%ymm7 + vpshufb %ymm9,%ymm7,%ymm7 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 12) + vpaddd %ymm3,%ymm2,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vmovdqa %ymm1,%ymm10 + vpslld $12,%ymm10,%ymm10 + vpsrld $20,%ymm1,%ymm1 + vpor %ymm10,%ymm1,%ymm1 + + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm6,%ymm5,%ymm5 + vmovdqa %ymm5,%ymm10 + vpslld $12,%ymm10,%ymm10 + vpsrld $20,%ymm5,%ymm5 + vpor %ymm10,%ymm5,%ymm5 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 8) + vpaddd %ymm1,%ymm0,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vpshufb %ymm8,%ymm3,%ymm3 + + vpaddd %ymm5,%ymm4,%ymm4 + vpxor %ymm4,%ymm7,%ymm7 + vpshufb %ymm8,%ymm7,%ymm7 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 7) + vpaddd %ymm3,%ymm2,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vmovdqa %ymm1,%ymm10 + vpslld $7,%ymm10,%ymm10 + vpsrld $25,%ymm1,%ymm1 + vpor %ymm10,%ymm1,%ymm1 + + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm6,%ymm5,%ymm5 + vmovdqa %ymm5,%ymm10 + vpslld $7,%ymm10,%ymm10 + vpsrld $25,%ymm5,%ymm5 + vpor %ymm10,%ymm5,%ymm5 + + # x1 = shuffle32(x1, MASK(0, 3, 2, 1)) + vpshufd $0x39,%ymm1,%ymm1 + vpshufd $0x39,%ymm5,%ymm5 + # x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + vpshufd $0x4e,%ymm2,%ymm2 + vpshufd $0x4e,%ymm6,%ymm6 + # x3 = shuffle32(x3, MASK(2, 1, 0, 3)) + vpshufd $0x93,%ymm3,%ymm3 + vpshufd $0x93,%ymm7,%ymm7 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 16) + vpaddd %ymm1,%ymm0,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vpshufb %ymm9,%ymm3,%ymm3 + + vpaddd %ymm5,%ymm4,%ymm4 + vpxor %ymm4,%ymm7,%ymm7 + vpshufb %ymm9,%ymm7,%ymm7 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 12) + vpaddd %ymm3,%ymm2,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vmovdqa %ymm1,%ymm10 + vpslld $12,%ymm10,%ymm10 + vpsrld $20,%ymm1,%ymm1 + vpor %ymm10,%ymm1,%ymm1 + + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm6,%ymm5,%ymm5 + vmovdqa %ymm5,%ymm10 + vpslld $12,%ymm10,%ymm10 + vpsrld $20,%ymm5,%ymm5 + vpor %ymm10,%ymm5,%ymm5 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 8) + vpaddd %ymm1,%ymm0,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vpshufb %ymm8,%ymm3,%ymm3 + + vpaddd %ymm5,%ymm4,%ymm4 + vpxor %ymm4,%ymm7,%ymm7 + vpshufb %ymm8,%ymm7,%ymm7 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 7) + vpaddd %ymm3,%ymm2,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vmovdqa %ymm1,%ymm10 + vpslld $7,%ymm10,%ymm10 + vpsrld $25,%ymm1,%ymm1 + vpor %ymm10,%ymm1,%ymm1 + + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm6,%ymm5,%ymm5 + vmovdqa %ymm5,%ymm10 + vpslld $7,%ymm10,%ymm10 + vpsrld $25,%ymm5,%ymm5 + vpor %ymm10,%ymm5,%ymm5 + + # x1 = shuffle32(x1, MASK(2, 1, 0, 3)) + vpshufd $0x93,%ymm1,%ymm1 + vpshufd $0x93,%ymm5,%ymm5 + # x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + vpshufd $0x4e,%ymm2,%ymm2 + vpshufd $0x4e,%ymm6,%ymm6 + # x3 = shuffle32(x3, MASK(0, 3, 2, 1)) + vpshufd $0x39,%ymm3,%ymm3 + vpshufd $0x39,%ymm7,%ymm7 + + sub $2,%r8d + jnz .Ldoubleround4 + + # o0 = i0 ^ (x0 + s0), first block + vpaddd %ymm11,%ymm0,%ymm10 + cmp $0x10,%rax + jl .Lxorpart4 + vpxor 0x00(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x00(%rsi) + vextracti128 $1,%ymm10,%xmm0 + # o1 = i1 ^ (x1 + s1), first block + vpaddd %ymm12,%ymm1,%ymm10 + cmp $0x20,%rax + jl .Lxorpart4 + vpxor 0x10(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x10(%rsi) + vextracti128 $1,%ymm10,%xmm1 + # o2 = i2 ^ (x2 + s2), first block + vpaddd %ymm13,%ymm2,%ymm10 + cmp $0x30,%rax + jl .Lxorpart4 + vpxor 0x20(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x20(%rsi) + vextracti128 $1,%ymm10,%xmm2 + # o3 = i3 ^ (x3 + s3), first block + vpaddd %ymm14,%ymm3,%ymm10 + cmp $0x40,%rax + jl .Lxorpart4 + vpxor 0x30(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x30(%rsi) + vextracti128 $1,%ymm10,%xmm3 + + # xor and write second block + vmovdqa %xmm0,%xmm10 + cmp $0x50,%rax + jl .Lxorpart4 + vpxor 0x40(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x40(%rsi) + + vmovdqa %xmm1,%xmm10 + cmp $0x60,%rax + jl .Lxorpart4 + vpxor 0x50(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x50(%rsi) + + vmovdqa %xmm2,%xmm10 + cmp $0x70,%rax + jl .Lxorpart4 + vpxor 0x60(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x60(%rsi) + + vmovdqa %xmm3,%xmm10 + cmp $0x80,%rax + jl .Lxorpart4 + vpxor 0x70(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x70(%rsi) + + # o0 = i0 ^ (x0 + s0), third block + vpaddd %ymm11,%ymm4,%ymm10 + cmp $0x90,%rax + jl .Lxorpart4 + vpxor 0x80(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x80(%rsi) + vextracti128 $1,%ymm10,%xmm4 + # o1 = i1 ^ (x1 + s1), third block + vpaddd %ymm12,%ymm5,%ymm10 + cmp $0xa0,%rax + jl .Lxorpart4 + vpxor 0x90(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x90(%rsi) + vextracti128 $1,%ymm10,%xmm5 + # o2 = i2 ^ (x2 + s2), third block + vpaddd %ymm13,%ymm6,%ymm10 + cmp $0xb0,%rax + jl .Lxorpart4 + vpxor 0xa0(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0xa0(%rsi) + vextracti128 $1,%ymm10,%xmm6 + # o3 = i3 ^ (x3 + s3), third block + vpaddd %ymm15,%ymm7,%ymm10 + cmp $0xc0,%rax + jl .Lxorpart4 + vpxor 0xb0(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0xb0(%rsi) + vextracti128 $1,%ymm10,%xmm7 + + # xor and write fourth block + vmovdqa %xmm4,%xmm10 + cmp $0xd0,%rax + jl .Lxorpart4 + vpxor 0xc0(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0xc0(%rsi) + + vmovdqa %xmm5,%xmm10 + cmp $0xe0,%rax + jl .Lxorpart4 + vpxor 0xd0(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0xd0(%rsi) + + vmovdqa %xmm6,%xmm10 + cmp $0xf0,%rax + jl .Lxorpart4 + vpxor 0xe0(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0xe0(%rsi) + + vmovdqa %xmm7,%xmm10 + cmp $0x100,%rax + jl .Lxorpart4 + vpxor 0xf0(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0xf0(%rsi) + +.Ldone4: + vzeroupper + ret + +.Lxorpart4: + # xor remaining bytes from partial register into output + mov %rax,%r9 + and $0x0f,%r9 + jz .Ldone4 + and $~0x0f,%rax + + mov %rsi,%r11 + + lea 8(%rsp),%r10 + sub $0x10,%rsp + and $~31,%rsp + + lea (%rdx,%rax),%rsi + mov %rsp,%rdi + mov %r9,%rcx + rep movsb + + vpxor 0x00(%rsp),%xmm10,%xmm10 + vmovdqa %xmm10,0x00(%rsp) + + mov %rsp,%rsi + lea (%r11,%rax),%rdi + mov %r9,%rcx + rep movsb + + lea -8(%r10),%rsp + jmp .Ldone4 + +ENDPROC(chacha_4block_xor_avx2) + +ENTRY(chacha_8block_xor_avx2) + # %rdi: Input state matrix, s + # %rsi: up to 8 data blocks output, o + # %rdx: up to 8 data blocks input, i + # %rcx: input/output length in bytes + # %r8d: nrounds + + # This function encrypts eight consecutive ChaCha blocks by loading + # the state matrix in AVX registers eight times. As we need some + # scratch registers, we save the first four registers on the stack. The + # algorithm performs each operation on the corresponding word of each + # state matrix, hence requires no word shuffling. For final XORing step + # we transpose the matrix by interleaving 32-, 64- and then 128-bit + # words, which allows us to do XOR in AVX registers. 8/16-bit word + # rotation is done with the slightly better performing byte shuffling, + # 7/12-bit word rotation uses traditional shift+OR. + + vzeroupper + # 4 * 32 byte stack, 32-byte aligned + lea 8(%rsp),%r10 + and $~31, %rsp + sub $0x80, %rsp + mov %rcx,%rax + + # x0..15[0-7] = s[0..15] + vpbroadcastd 0x00(%rdi),%ymm0 + vpbroadcastd 0x04(%rdi),%ymm1 + vpbroadcastd 0x08(%rdi),%ymm2 + vpbroadcastd 0x0c(%rdi),%ymm3 + vpbroadcastd 0x10(%rdi),%ymm4 + vpbroadcastd 0x14(%rdi),%ymm5 + vpbroadcastd 0x18(%rdi),%ymm6 + vpbroadcastd 0x1c(%rdi),%ymm7 + vpbroadcastd 0x20(%rdi),%ymm8 + vpbroadcastd 0x24(%rdi),%ymm9 + vpbroadcastd 0x28(%rdi),%ymm10 + vpbroadcastd 0x2c(%rdi),%ymm11 + vpbroadcastd 0x30(%rdi),%ymm12 + vpbroadcastd 0x34(%rdi),%ymm13 + vpbroadcastd 0x38(%rdi),%ymm14 + vpbroadcastd 0x3c(%rdi),%ymm15 + # x0..3 on stack + vmovdqa %ymm0,0x00(%rsp) + vmovdqa %ymm1,0x20(%rsp) + vmovdqa %ymm2,0x40(%rsp) + vmovdqa %ymm3,0x60(%rsp) + + vmovdqa CTRINC(%rip),%ymm1 + vmovdqa ROT8(%rip),%ymm2 + vmovdqa ROT16(%rip),%ymm3 + + # x12 += counter values 0-3 + vpaddd %ymm1,%ymm12,%ymm12 + +.Ldoubleround8: + # x0 += x4, x12 = rotl32(x12 ^ x0, 16) + vpaddd 0x00(%rsp),%ymm4,%ymm0 + vmovdqa %ymm0,0x00(%rsp) + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm3,%ymm12,%ymm12 + # x1 += x5, x13 = rotl32(x13 ^ x1, 16) + vpaddd 0x20(%rsp),%ymm5,%ymm0 + vmovdqa %ymm0,0x20(%rsp) + vpxor %ymm0,%ymm13,%ymm13 + vpshufb %ymm3,%ymm13,%ymm13 + # x2 += x6, x14 = rotl32(x14 ^ x2, 16) + vpaddd 0x40(%rsp),%ymm6,%ymm0 + vmovdqa %ymm0,0x40(%rsp) + vpxor %ymm0,%ymm14,%ymm14 + vpshufb %ymm3,%ymm14,%ymm14 + # x3 += x7, x15 = rotl32(x15 ^ x3, 16) + vpaddd 0x60(%rsp),%ymm7,%ymm0 + vmovdqa %ymm0,0x60(%rsp) + vpxor %ymm0,%ymm15,%ymm15 + vpshufb %ymm3,%ymm15,%ymm15 + + # x8 += x12, x4 = rotl32(x4 ^ x8, 12) + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $12,%ymm4,%ymm0 + vpsrld $20,%ymm4,%ymm4 + vpor %ymm0,%ymm4,%ymm4 + # x9 += x13, x5 = rotl32(x5 ^ x9, 12) + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $12,%ymm5,%ymm0 + vpsrld $20,%ymm5,%ymm5 + vpor %ymm0,%ymm5,%ymm5 + # x10 += x14, x6 = rotl32(x6 ^ x10, 12) + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpslld $12,%ymm6,%ymm0 + vpsrld $20,%ymm6,%ymm6 + vpor %ymm0,%ymm6,%ymm6 + # x11 += x15, x7 = rotl32(x7 ^ x11, 12) + vpaddd %ymm15,%ymm11,%ymm11 + vpxor %ymm11,%ymm7,%ymm7 + vpslld $12,%ymm7,%ymm0 + vpsrld $20,%ymm7,%ymm7 + vpor %ymm0,%ymm7,%ymm7 + + # x0 += x4, x12 = rotl32(x12 ^ x0, 8) + vpaddd 0x00(%rsp),%ymm4,%ymm0 + vmovdqa %ymm0,0x00(%rsp) + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm2,%ymm12,%ymm12 + # x1 += x5, x13 = rotl32(x13 ^ x1, 8) + vpaddd 0x20(%rsp),%ymm5,%ymm0 + vmovdqa %ymm0,0x20(%rsp) + vpxor %ymm0,%ymm13,%ymm13 + vpshufb %ymm2,%ymm13,%ymm13 + # x2 += x6, x14 = rotl32(x14 ^ x2, 8) + vpaddd 0x40(%rsp),%ymm6,%ymm0 + vmovdqa %ymm0,0x40(%rsp) + vpxor %ymm0,%ymm14,%ymm14 + vpshufb %ymm2,%ymm14,%ymm14 + # x3 += x7, x15 = rotl32(x15 ^ x3, 8) + vpaddd 0x60(%rsp),%ymm7,%ymm0 + vmovdqa %ymm0,0x60(%rsp) + vpxor %ymm0,%ymm15,%ymm15 + vpshufb %ymm2,%ymm15,%ymm15 + + # x8 += x12, x4 = rotl32(x4 ^ x8, 7) + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm0 + vpsrld $25,%ymm4,%ymm4 + vpor %ymm0,%ymm4,%ymm4 + # x9 += x13, x5 = rotl32(x5 ^ x9, 7) + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm0 + vpsrld $25,%ymm5,%ymm5 + vpor %ymm0,%ymm5,%ymm5 + # x10 += x14, x6 = rotl32(x6 ^ x10, 7) + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpslld $7,%ymm6,%ymm0 + vpsrld $25,%ymm6,%ymm6 + vpor %ymm0,%ymm6,%ymm6 + # x11 += x15, x7 = rotl32(x7 ^ x11, 7) + vpaddd %ymm15,%ymm11,%ymm11 + vpxor %ymm11,%ymm7,%ymm7 + vpslld $7,%ymm7,%ymm0 + vpsrld $25,%ymm7,%ymm7 + vpor %ymm0,%ymm7,%ymm7 + + # x0 += x5, x15 = rotl32(x15 ^ x0, 16) + vpaddd 0x00(%rsp),%ymm5,%ymm0 + vmovdqa %ymm0,0x00(%rsp) + vpxor %ymm0,%ymm15,%ymm15 + vpshufb %ymm3,%ymm15,%ymm15 + # x1 += x6, x12 = rotl32(x12 ^ x1, 16)%ymm0 + vpaddd 0x20(%rsp),%ymm6,%ymm0 + vmovdqa %ymm0,0x20(%rsp) + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm3,%ymm12,%ymm12 + # x2 += x7, x13 = rotl32(x13 ^ x2, 16) + vpaddd 0x40(%rsp),%ymm7,%ymm0 + vmovdqa %ymm0,0x40(%rsp) + vpxor %ymm0,%ymm13,%ymm13 + vpshufb %ymm3,%ymm13,%ymm13 + # x3 += x4, x14 = rotl32(x14 ^ x3, 16) + vpaddd 0x60(%rsp),%ymm4,%ymm0 + vmovdqa %ymm0,0x60(%rsp) + vpxor %ymm0,%ymm14,%ymm14 + vpshufb %ymm3,%ymm14,%ymm14 + + # x10 += x15, x5 = rotl32(x5 ^ x10, 12) + vpaddd %ymm15,%ymm10,%ymm10 + vpxor %ymm10,%ymm5,%ymm5 + vpslld $12,%ymm5,%ymm0 + vpsrld $20,%ymm5,%ymm5 + vpor %ymm0,%ymm5,%ymm5 + # x11 += x12, x6 = rotl32(x6 ^ x11, 12) + vpaddd %ymm12,%ymm11,%ymm11 + vpxor %ymm11,%ymm6,%ymm6 + vpslld $12,%ymm6,%ymm0 + vpsrld $20,%ymm6,%ymm6 + vpor %ymm0,%ymm6,%ymm6 + # x8 += x13, x7 = rotl32(x7 ^ x8, 12) + vpaddd %ymm13,%ymm8,%ymm8 + vpxor %ymm8,%ymm7,%ymm7 + vpslld $12,%ymm7,%ymm0 + vpsrld $20,%ymm7,%ymm7 + vpor %ymm0,%ymm7,%ymm7 + # x9 += x14, x4 = rotl32(x4 ^ x9, 12) + vpaddd %ymm14,%ymm9,%ymm9 + vpxor %ymm9,%ymm4,%ymm4 + vpslld $12,%ymm4,%ymm0 + vpsrld $20,%ymm4,%ymm4 + vpor %ymm0,%ymm4,%ymm4 + + # x0 += x5, x15 = rotl32(x15 ^ x0, 8) + vpaddd 0x00(%rsp),%ymm5,%ymm0 + vmovdqa %ymm0,0x00(%rsp) + vpxor %ymm0,%ymm15,%ymm15 + vpshufb %ymm2,%ymm15,%ymm15 + # x1 += x6, x12 = rotl32(x12 ^ x1, 8) + vpaddd 0x20(%rsp),%ymm6,%ymm0 + vmovdqa %ymm0,0x20(%rsp) + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm2,%ymm12,%ymm12 + # x2 += x7, x13 = rotl32(x13 ^ x2, 8) + vpaddd 0x40(%rsp),%ymm7,%ymm0 + vmovdqa %ymm0,0x40(%rsp) + vpxor %ymm0,%ymm13,%ymm13 + vpshufb %ymm2,%ymm13,%ymm13 + # x3 += x4, x14 = rotl32(x14 ^ x3, 8) + vpaddd 0x60(%rsp),%ymm4,%ymm0 + vmovdqa %ymm0,0x60(%rsp) + vpxor %ymm0,%ymm14,%ymm14 + vpshufb %ymm2,%ymm14,%ymm14 + + # x10 += x15, x5 = rotl32(x5 ^ x10, 7) + vpaddd %ymm15,%ymm10,%ymm10 + vpxor %ymm10,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm0 + vpsrld $25,%ymm5,%ymm5 + vpor %ymm0,%ymm5,%ymm5 + # x11 += x12, x6 = rotl32(x6 ^ x11, 7) + vpaddd %ymm12,%ymm11,%ymm11 + vpxor %ymm11,%ymm6,%ymm6 + vpslld $7,%ymm6,%ymm0 + vpsrld $25,%ymm6,%ymm6 + vpor %ymm0,%ymm6,%ymm6 + # x8 += x13, x7 = rotl32(x7 ^ x8, 7) + vpaddd %ymm13,%ymm8,%ymm8 + vpxor %ymm8,%ymm7,%ymm7 + vpslld $7,%ymm7,%ymm0 + vpsrld $25,%ymm7,%ymm7 + vpor %ymm0,%ymm7,%ymm7 + # x9 += x14, x4 = rotl32(x4 ^ x9, 7) + vpaddd %ymm14,%ymm9,%ymm9 + vpxor %ymm9,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm0 + vpsrld $25,%ymm4,%ymm4 + vpor %ymm0,%ymm4,%ymm4 + + sub $2,%r8d + jnz .Ldoubleround8 + + # x0..15[0-3] += s[0..15] + vpbroadcastd 0x00(%rdi),%ymm0 + vpaddd 0x00(%rsp),%ymm0,%ymm0 + vmovdqa %ymm0,0x00(%rsp) + vpbroadcastd 0x04(%rdi),%ymm0 + vpaddd 0x20(%rsp),%ymm0,%ymm0 + vmovdqa %ymm0,0x20(%rsp) + vpbroadcastd 0x08(%rdi),%ymm0 + vpaddd 0x40(%rsp),%ymm0,%ymm0 + vmovdqa %ymm0,0x40(%rsp) + vpbroadcastd 0x0c(%rdi),%ymm0 + vpaddd 0x60(%rsp),%ymm0,%ymm0 + vmovdqa %ymm0,0x60(%rsp) + vpbroadcastd 0x10(%rdi),%ymm0 + vpaddd %ymm0,%ymm4,%ymm4 + vpbroadcastd 0x14(%rdi),%ymm0 + vpaddd %ymm0,%ymm5,%ymm5 + vpbroadcastd 0x18(%rdi),%ymm0 + vpaddd %ymm0,%ymm6,%ymm6 + vpbroadcastd 0x1c(%rdi),%ymm0 + vpaddd %ymm0,%ymm7,%ymm7 + vpbroadcastd 0x20(%rdi),%ymm0 + vpaddd %ymm0,%ymm8,%ymm8 + vpbroadcastd 0x24(%rdi),%ymm0 + vpaddd %ymm0,%ymm9,%ymm9 + vpbroadcastd 0x28(%rdi),%ymm0 + vpaddd %ymm0,%ymm10,%ymm10 + vpbroadcastd 0x2c(%rdi),%ymm0 + vpaddd %ymm0,%ymm11,%ymm11 + vpbroadcastd 0x30(%rdi),%ymm0 + vpaddd %ymm0,%ymm12,%ymm12 + vpbroadcastd 0x34(%rdi),%ymm0 + vpaddd %ymm0,%ymm13,%ymm13 + vpbroadcastd 0x38(%rdi),%ymm0 + vpaddd %ymm0,%ymm14,%ymm14 + vpbroadcastd 0x3c(%rdi),%ymm0 + vpaddd %ymm0,%ymm15,%ymm15 + + # x12 += counter values 0-3 + vpaddd %ymm1,%ymm12,%ymm12 + + # interleave 32-bit words in state n, n+1 + vmovdqa 0x00(%rsp),%ymm0 + vmovdqa 0x20(%rsp),%ymm1 + vpunpckldq %ymm1,%ymm0,%ymm2 + vpunpckhdq %ymm1,%ymm0,%ymm1 + vmovdqa %ymm2,0x00(%rsp) + vmovdqa %ymm1,0x20(%rsp) + vmovdqa 0x40(%rsp),%ymm0 + vmovdqa 0x60(%rsp),%ymm1 + vpunpckldq %ymm1,%ymm0,%ymm2 + vpunpckhdq %ymm1,%ymm0,%ymm1 + vmovdqa %ymm2,0x40(%rsp) + vmovdqa %ymm1,0x60(%rsp) + vmovdqa %ymm4,%ymm0 + vpunpckldq %ymm5,%ymm0,%ymm4 + vpunpckhdq %ymm5,%ymm0,%ymm5 + vmovdqa %ymm6,%ymm0 + vpunpckldq %ymm7,%ymm0,%ymm6 + vpunpckhdq %ymm7,%ymm0,%ymm7 + vmovdqa %ymm8,%ymm0 + vpunpckldq %ymm9,%ymm0,%ymm8 + vpunpckhdq %ymm9,%ymm0,%ymm9 + vmovdqa %ymm10,%ymm0 + vpunpckldq %ymm11,%ymm0,%ymm10 + vpunpckhdq %ymm11,%ymm0,%ymm11 + vmovdqa %ymm12,%ymm0 + vpunpckldq %ymm13,%ymm0,%ymm12 + vpunpckhdq %ymm13,%ymm0,%ymm13 + vmovdqa %ymm14,%ymm0 + vpunpckldq %ymm15,%ymm0,%ymm14 + vpunpckhdq %ymm15,%ymm0,%ymm15 + + # interleave 64-bit words in state n, n+2 + vmovdqa 0x00(%rsp),%ymm0 + vmovdqa 0x40(%rsp),%ymm2 + vpunpcklqdq %ymm2,%ymm0,%ymm1 + vpunpckhqdq %ymm2,%ymm0,%ymm2 + vmovdqa %ymm1,0x00(%rsp) + vmovdqa %ymm2,0x40(%rsp) + vmovdqa 0x20(%rsp),%ymm0 + vmovdqa 0x60(%rsp),%ymm2 + vpunpcklqdq %ymm2,%ymm0,%ymm1 + vpunpckhqdq %ymm2,%ymm0,%ymm2 + vmovdqa %ymm1,0x20(%rsp) + vmovdqa %ymm2,0x60(%rsp) + vmovdqa %ymm4,%ymm0 + vpunpcklqdq %ymm6,%ymm0,%ymm4 + vpunpckhqdq %ymm6,%ymm0,%ymm6 + vmovdqa %ymm5,%ymm0 + vpunpcklqdq %ymm7,%ymm0,%ymm5 + vpunpckhqdq %ymm7,%ymm0,%ymm7 + vmovdqa %ymm8,%ymm0 + vpunpcklqdq %ymm10,%ymm0,%ymm8 + vpunpckhqdq %ymm10,%ymm0,%ymm10 + vmovdqa %ymm9,%ymm0 + vpunpcklqdq %ymm11,%ymm0,%ymm9 + vpunpckhqdq %ymm11,%ymm0,%ymm11 + vmovdqa %ymm12,%ymm0 + vpunpcklqdq %ymm14,%ymm0,%ymm12 + vpunpckhqdq %ymm14,%ymm0,%ymm14 + vmovdqa %ymm13,%ymm0 + vpunpcklqdq %ymm15,%ymm0,%ymm13 + vpunpckhqdq %ymm15,%ymm0,%ymm15 + + # interleave 128-bit words in state n, n+4 + # xor/write first four blocks + vmovdqa 0x00(%rsp),%ymm1 + vperm2i128 $0x20,%ymm4,%ymm1,%ymm0 + cmp $0x0020,%rax + jl .Lxorpart8 + vpxor 0x0000(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x0000(%rsi) + vperm2i128 $0x31,%ymm4,%ymm1,%ymm4 + + vperm2i128 $0x20,%ymm12,%ymm8,%ymm0 + cmp $0x0040,%rax + jl .Lxorpart8 + vpxor 0x0020(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x0020(%rsi) + vperm2i128 $0x31,%ymm12,%ymm8,%ymm12 + + vmovdqa 0x40(%rsp),%ymm1 + vperm2i128 $0x20,%ymm6,%ymm1,%ymm0 + cmp $0x0060,%rax + jl .Lxorpart8 + vpxor 0x0040(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x0040(%rsi) + vperm2i128 $0x31,%ymm6,%ymm1,%ymm6 + + vperm2i128 $0x20,%ymm14,%ymm10,%ymm0 + cmp $0x0080,%rax + jl .Lxorpart8 + vpxor 0x0060(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x0060(%rsi) + vperm2i128 $0x31,%ymm14,%ymm10,%ymm14 + + vmovdqa 0x20(%rsp),%ymm1 + vperm2i128 $0x20,%ymm5,%ymm1,%ymm0 + cmp $0x00a0,%rax + jl .Lxorpart8 + vpxor 0x0080(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x0080(%rsi) + vperm2i128 $0x31,%ymm5,%ymm1,%ymm5 + + vperm2i128 $0x20,%ymm13,%ymm9,%ymm0 + cmp $0x00c0,%rax + jl .Lxorpart8 + vpxor 0x00a0(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x00a0(%rsi) + vperm2i128 $0x31,%ymm13,%ymm9,%ymm13 + + vmovdqa 0x60(%rsp),%ymm1 + vperm2i128 $0x20,%ymm7,%ymm1,%ymm0 + cmp $0x00e0,%rax + jl .Lxorpart8 + vpxor 0x00c0(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x00c0(%rsi) + vperm2i128 $0x31,%ymm7,%ymm1,%ymm7 + + vperm2i128 $0x20,%ymm15,%ymm11,%ymm0 + cmp $0x0100,%rax + jl .Lxorpart8 + vpxor 0x00e0(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x00e0(%rsi) + vperm2i128 $0x31,%ymm15,%ymm11,%ymm15 + + # xor remaining blocks, write to output + vmovdqa %ymm4,%ymm0 + cmp $0x0120,%rax + jl .Lxorpart8 + vpxor 0x0100(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x0100(%rsi) + + vmovdqa %ymm12,%ymm0 + cmp $0x0140,%rax + jl .Lxorpart8 + vpxor 0x0120(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x0120(%rsi) + + vmovdqa %ymm6,%ymm0 + cmp $0x0160,%rax + jl .Lxorpart8 + vpxor 0x0140(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x0140(%rsi) + + vmovdqa %ymm14,%ymm0 + cmp $0x0180,%rax + jl .Lxorpart8 + vpxor 0x0160(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x0160(%rsi) + + vmovdqa %ymm5,%ymm0 + cmp $0x01a0,%rax + jl .Lxorpart8 + vpxor 0x0180(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x0180(%rsi) + + vmovdqa %ymm13,%ymm0 + cmp $0x01c0,%rax + jl .Lxorpart8 + vpxor 0x01a0(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x01a0(%rsi) + + vmovdqa %ymm7,%ymm0 + cmp $0x01e0,%rax + jl .Lxorpart8 + vpxor 0x01c0(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x01c0(%rsi) + + vmovdqa %ymm15,%ymm0 + cmp $0x0200,%rax + jl .Lxorpart8 + vpxor 0x01e0(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x01e0(%rsi) + +.Ldone8: + vzeroupper + lea -8(%r10),%rsp + ret + +.Lxorpart8: + # xor remaining bytes from partial register into output + mov %rax,%r9 + and $0x1f,%r9 + jz .Ldone8 + and $~0x1f,%rax + + mov %rsi,%r11 + + lea (%rdx,%rax),%rsi + mov %rsp,%rdi + mov %r9,%rcx + rep movsb + + vpxor 0x00(%rsp),%ymm0,%ymm0 + vmovdqa %ymm0,0x00(%rsp) + + mov %rsp,%rsi + lea (%r11,%rax),%rdi + mov %r9,%rcx + rep movsb + + jmp .Ldone8 + +ENDPROC(chacha_8block_xor_avx2) diff --git a/arch/x86/crypto/chacha-avx512vl-x86_64.S b/arch/x86/crypto/chacha-avx512vl-x86_64.S new file mode 100644 index 000000000000..848f9c75fd4f --- /dev/null +++ b/arch/x86/crypto/chacha-avx512vl-x86_64.S @@ -0,0 +1,836 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * ChaCha 256-bit cipher algorithm, x64 AVX-512VL functions + * + * Copyright (C) 2018 Martin Willi + */ + +#include + +.section .rodata.cst32.CTR2BL, "aM", @progbits, 32 +.align 32 +CTR2BL: .octa 0x00000000000000000000000000000000 + .octa 0x00000000000000000000000000000001 + +.section .rodata.cst32.CTR4BL, "aM", @progbits, 32 +.align 32 +CTR4BL: .octa 0x00000000000000000000000000000002 + .octa 0x00000000000000000000000000000003 + +.section .rodata.cst32.CTR8BL, "aM", @progbits, 32 +.align 32 +CTR8BL: .octa 0x00000003000000020000000100000000 + .octa 0x00000007000000060000000500000004 + +.text + +ENTRY(chacha_2block_xor_avx512vl) + # %rdi: Input state matrix, s + # %rsi: up to 2 data blocks output, o + # %rdx: up to 2 data blocks input, i + # %rcx: input/output length in bytes + # %r8d: nrounds + + # This function encrypts two ChaCha blocks by loading the state + # matrix twice across four AVX registers. It performs matrix operations + # on four words in each matrix in parallel, but requires shuffling to + # rearrange the words after each round. + + vzeroupper + + # x0..3[0-2] = s0..3 + vbroadcasti128 0x00(%rdi),%ymm0 + vbroadcasti128 0x10(%rdi),%ymm1 + vbroadcasti128 0x20(%rdi),%ymm2 + vbroadcasti128 0x30(%rdi),%ymm3 + + vpaddd CTR2BL(%rip),%ymm3,%ymm3 + + vmovdqa %ymm0,%ymm8 + vmovdqa %ymm1,%ymm9 + vmovdqa %ymm2,%ymm10 + vmovdqa %ymm3,%ymm11 + +.Ldoubleround: + + # x0 += x1, x3 = rotl32(x3 ^ x0, 16) + vpaddd %ymm1,%ymm0,%ymm0 + vpxord %ymm0,%ymm3,%ymm3 + vprold $16,%ymm3,%ymm3 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 12) + vpaddd %ymm3,%ymm2,%ymm2 + vpxord %ymm2,%ymm1,%ymm1 + vprold $12,%ymm1,%ymm1 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 8) + vpaddd %ymm1,%ymm0,%ymm0 + vpxord %ymm0,%ymm3,%ymm3 + vprold $8,%ymm3,%ymm3 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 7) + vpaddd %ymm3,%ymm2,%ymm2 + vpxord %ymm2,%ymm1,%ymm1 + vprold $7,%ymm1,%ymm1 + + # x1 = shuffle32(x1, MASK(0, 3, 2, 1)) + vpshufd $0x39,%ymm1,%ymm1 + # x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + vpshufd $0x4e,%ymm2,%ymm2 + # x3 = shuffle32(x3, MASK(2, 1, 0, 3)) + vpshufd $0x93,%ymm3,%ymm3 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 16) + vpaddd %ymm1,%ymm0,%ymm0 + vpxord %ymm0,%ymm3,%ymm3 + vprold $16,%ymm3,%ymm3 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 12) + vpaddd %ymm3,%ymm2,%ymm2 + vpxord %ymm2,%ymm1,%ymm1 + vprold $12,%ymm1,%ymm1 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 8) + vpaddd %ymm1,%ymm0,%ymm0 + vpxord %ymm0,%ymm3,%ymm3 + vprold $8,%ymm3,%ymm3 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 7) + vpaddd %ymm3,%ymm2,%ymm2 + vpxord %ymm2,%ymm1,%ymm1 + vprold $7,%ymm1,%ymm1 + + # x1 = shuffle32(x1, MASK(2, 1, 0, 3)) + vpshufd $0x93,%ymm1,%ymm1 + # x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + vpshufd $0x4e,%ymm2,%ymm2 + # x3 = shuffle32(x3, MASK(0, 3, 2, 1)) + vpshufd $0x39,%ymm3,%ymm3 + + sub $2,%r8d + jnz .Ldoubleround + + # o0 = i0 ^ (x0 + s0) + vpaddd %ymm8,%ymm0,%ymm7 + cmp $0x10,%rcx + jl .Lxorpart2 + vpxord 0x00(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x00(%rsi) + vextracti128 $1,%ymm7,%xmm0 + # o1 = i1 ^ (x1 + s1) + vpaddd %ymm9,%ymm1,%ymm7 + cmp $0x20,%rcx + jl .Lxorpart2 + vpxord 0x10(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x10(%rsi) + vextracti128 $1,%ymm7,%xmm1 + # o2 = i2 ^ (x2 + s2) + vpaddd %ymm10,%ymm2,%ymm7 + cmp $0x30,%rcx + jl .Lxorpart2 + vpxord 0x20(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x20(%rsi) + vextracti128 $1,%ymm7,%xmm2 + # o3 = i3 ^ (x3 + s3) + vpaddd %ymm11,%ymm3,%ymm7 + cmp $0x40,%rcx + jl .Lxorpart2 + vpxord 0x30(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x30(%rsi) + vextracti128 $1,%ymm7,%xmm3 + + # xor and write second block + vmovdqa %xmm0,%xmm7 + cmp $0x50,%rcx + jl .Lxorpart2 + vpxord 0x40(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x40(%rsi) + + vmovdqa %xmm1,%xmm7 + cmp $0x60,%rcx + jl .Lxorpart2 + vpxord 0x50(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x50(%rsi) + + vmovdqa %xmm2,%xmm7 + cmp $0x70,%rcx + jl .Lxorpart2 + vpxord 0x60(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x60(%rsi) + + vmovdqa %xmm3,%xmm7 + cmp $0x80,%rcx + jl .Lxorpart2 + vpxord 0x70(%rdx),%xmm7,%xmm6 + vmovdqu %xmm6,0x70(%rsi) + +.Ldone2: + vzeroupper + ret + +.Lxorpart2: + # xor remaining bytes from partial register into output + mov %rcx,%rax + and $0xf,%rcx + jz .Ldone8 + mov %rax,%r9 + and $~0xf,%r9 + + mov $1,%rax + shld %cl,%rax,%rax + sub $1,%rax + kmovq %rax,%k1 + + vmovdqu8 (%rdx,%r9),%xmm1{%k1}{z} + vpxord %xmm7,%xmm1,%xmm1 + vmovdqu8 %xmm1,(%rsi,%r9){%k1} + + jmp .Ldone2 + +ENDPROC(chacha_2block_xor_avx512vl) + +ENTRY(chacha_4block_xor_avx512vl) + # %rdi: Input state matrix, s + # %rsi: up to 4 data blocks output, o + # %rdx: up to 4 data blocks input, i + # %rcx: input/output length in bytes + # %r8d: nrounds + + # This function encrypts four ChaCha blocks by loading the state + # matrix four times across eight AVX registers. It performs matrix + # operations on four words in two matrices in parallel, sequentially + # to the operations on the four words of the other two matrices. The + # required word shuffling has a rather high latency, we can do the + # arithmetic on two matrix-pairs without much slowdown. + + vzeroupper + + # x0..3[0-4] = s0..3 + vbroadcasti128 0x00(%rdi),%ymm0 + vbroadcasti128 0x10(%rdi),%ymm1 + vbroadcasti128 0x20(%rdi),%ymm2 + vbroadcasti128 0x30(%rdi),%ymm3 + + vmovdqa %ymm0,%ymm4 + vmovdqa %ymm1,%ymm5 + vmovdqa %ymm2,%ymm6 + vmovdqa %ymm3,%ymm7 + + vpaddd CTR2BL(%rip),%ymm3,%ymm3 + vpaddd CTR4BL(%rip),%ymm7,%ymm7 + + vmovdqa %ymm0,%ymm11 + vmovdqa %ymm1,%ymm12 + vmovdqa %ymm2,%ymm13 + vmovdqa %ymm3,%ymm14 + vmovdqa %ymm7,%ymm15 + +.Ldoubleround4: + + # x0 += x1, x3 = rotl32(x3 ^ x0, 16) + vpaddd %ymm1,%ymm0,%ymm0 + vpxord %ymm0,%ymm3,%ymm3 + vprold $16,%ymm3,%ymm3 + + vpaddd %ymm5,%ymm4,%ymm4 + vpxord %ymm4,%ymm7,%ymm7 + vprold $16,%ymm7,%ymm7 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 12) + vpaddd %ymm3,%ymm2,%ymm2 + vpxord %ymm2,%ymm1,%ymm1 + vprold $12,%ymm1,%ymm1 + + vpaddd %ymm7,%ymm6,%ymm6 + vpxord %ymm6,%ymm5,%ymm5 + vprold $12,%ymm5,%ymm5 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 8) + vpaddd %ymm1,%ymm0,%ymm0 + vpxord %ymm0,%ymm3,%ymm3 + vprold $8,%ymm3,%ymm3 + + vpaddd %ymm5,%ymm4,%ymm4 + vpxord %ymm4,%ymm7,%ymm7 + vprold $8,%ymm7,%ymm7 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 7) + vpaddd %ymm3,%ymm2,%ymm2 + vpxord %ymm2,%ymm1,%ymm1 + vprold $7,%ymm1,%ymm1 + + vpaddd %ymm7,%ymm6,%ymm6 + vpxord %ymm6,%ymm5,%ymm5 + vprold $7,%ymm5,%ymm5 + + # x1 = shuffle32(x1, MASK(0, 3, 2, 1)) + vpshufd $0x39,%ymm1,%ymm1 + vpshufd $0x39,%ymm5,%ymm5 + # x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + vpshufd $0x4e,%ymm2,%ymm2 + vpshufd $0x4e,%ymm6,%ymm6 + # x3 = shuffle32(x3, MASK(2, 1, 0, 3)) + vpshufd $0x93,%ymm3,%ymm3 + vpshufd $0x93,%ymm7,%ymm7 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 16) + vpaddd %ymm1,%ymm0,%ymm0 + vpxord %ymm0,%ymm3,%ymm3 + vprold $16,%ymm3,%ymm3 + + vpaddd %ymm5,%ymm4,%ymm4 + vpxord %ymm4,%ymm7,%ymm7 + vprold $16,%ymm7,%ymm7 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 12) + vpaddd %ymm3,%ymm2,%ymm2 + vpxord %ymm2,%ymm1,%ymm1 + vprold $12,%ymm1,%ymm1 + + vpaddd %ymm7,%ymm6,%ymm6 + vpxord %ymm6,%ymm5,%ymm5 + vprold $12,%ymm5,%ymm5 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 8) + vpaddd %ymm1,%ymm0,%ymm0 + vpxord %ymm0,%ymm3,%ymm3 + vprold $8,%ymm3,%ymm3 + + vpaddd %ymm5,%ymm4,%ymm4 + vpxord %ymm4,%ymm7,%ymm7 + vprold $8,%ymm7,%ymm7 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 7) + vpaddd %ymm3,%ymm2,%ymm2 + vpxord %ymm2,%ymm1,%ymm1 + vprold $7,%ymm1,%ymm1 + + vpaddd %ymm7,%ymm6,%ymm6 + vpxord %ymm6,%ymm5,%ymm5 + vprold $7,%ymm5,%ymm5 + + # x1 = shuffle32(x1, MASK(2, 1, 0, 3)) + vpshufd $0x93,%ymm1,%ymm1 + vpshufd $0x93,%ymm5,%ymm5 + # x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + vpshufd $0x4e,%ymm2,%ymm2 + vpshufd $0x4e,%ymm6,%ymm6 + # x3 = shuffle32(x3, MASK(0, 3, 2, 1)) + vpshufd $0x39,%ymm3,%ymm3 + vpshufd $0x39,%ymm7,%ymm7 + + sub $2,%r8d + jnz .Ldoubleround4 + + # o0 = i0 ^ (x0 + s0), first block + vpaddd %ymm11,%ymm0,%ymm10 + cmp $0x10,%rcx + jl .Lxorpart4 + vpxord 0x00(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x00(%rsi) + vextracti128 $1,%ymm10,%xmm0 + # o1 = i1 ^ (x1 + s1), first block + vpaddd %ymm12,%ymm1,%ymm10 + cmp $0x20,%rcx + jl .Lxorpart4 + vpxord 0x10(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x10(%rsi) + vextracti128 $1,%ymm10,%xmm1 + # o2 = i2 ^ (x2 + s2), first block + vpaddd %ymm13,%ymm2,%ymm10 + cmp $0x30,%rcx + jl .Lxorpart4 + vpxord 0x20(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x20(%rsi) + vextracti128 $1,%ymm10,%xmm2 + # o3 = i3 ^ (x3 + s3), first block + vpaddd %ymm14,%ymm3,%ymm10 + cmp $0x40,%rcx + jl .Lxorpart4 + vpxord 0x30(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x30(%rsi) + vextracti128 $1,%ymm10,%xmm3 + + # xor and write second block + vmovdqa %xmm0,%xmm10 + cmp $0x50,%rcx + jl .Lxorpart4 + vpxord 0x40(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x40(%rsi) + + vmovdqa %xmm1,%xmm10 + cmp $0x60,%rcx + jl .Lxorpart4 + vpxord 0x50(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x50(%rsi) + + vmovdqa %xmm2,%xmm10 + cmp $0x70,%rcx + jl .Lxorpart4 + vpxord 0x60(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x60(%rsi) + + vmovdqa %xmm3,%xmm10 + cmp $0x80,%rcx + jl .Lxorpart4 + vpxord 0x70(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x70(%rsi) + + # o0 = i0 ^ (x0 + s0), third block + vpaddd %ymm11,%ymm4,%ymm10 + cmp $0x90,%rcx + jl .Lxorpart4 + vpxord 0x80(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x80(%rsi) + vextracti128 $1,%ymm10,%xmm4 + # o1 = i1 ^ (x1 + s1), third block + vpaddd %ymm12,%ymm5,%ymm10 + cmp $0xa0,%rcx + jl .Lxorpart4 + vpxord 0x90(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0x90(%rsi) + vextracti128 $1,%ymm10,%xmm5 + # o2 = i2 ^ (x2 + s2), third block + vpaddd %ymm13,%ymm6,%ymm10 + cmp $0xb0,%rcx + jl .Lxorpart4 + vpxord 0xa0(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0xa0(%rsi) + vextracti128 $1,%ymm10,%xmm6 + # o3 = i3 ^ (x3 + s3), third block + vpaddd %ymm15,%ymm7,%ymm10 + cmp $0xc0,%rcx + jl .Lxorpart4 + vpxord 0xb0(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0xb0(%rsi) + vextracti128 $1,%ymm10,%xmm7 + + # xor and write fourth block + vmovdqa %xmm4,%xmm10 + cmp $0xd0,%rcx + jl .Lxorpart4 + vpxord 0xc0(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0xc0(%rsi) + + vmovdqa %xmm5,%xmm10 + cmp $0xe0,%rcx + jl .Lxorpart4 + vpxord 0xd0(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0xd0(%rsi) + + vmovdqa %xmm6,%xmm10 + cmp $0xf0,%rcx + jl .Lxorpart4 + vpxord 0xe0(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0xe0(%rsi) + + vmovdqa %xmm7,%xmm10 + cmp $0x100,%rcx + jl .Lxorpart4 + vpxord 0xf0(%rdx),%xmm10,%xmm9 + vmovdqu %xmm9,0xf0(%rsi) + +.Ldone4: + vzeroupper + ret + +.Lxorpart4: + # xor remaining bytes from partial register into output + mov %rcx,%rax + and $0xf,%rcx + jz .Ldone8 + mov %rax,%r9 + and $~0xf,%r9 + + mov $1,%rax + shld %cl,%rax,%rax + sub $1,%rax + kmovq %rax,%k1 + + vmovdqu8 (%rdx,%r9),%xmm1{%k1}{z} + vpxord %xmm10,%xmm1,%xmm1 + vmovdqu8 %xmm1,(%rsi,%r9){%k1} + + jmp .Ldone4 + +ENDPROC(chacha_4block_xor_avx512vl) + +ENTRY(chacha_8block_xor_avx512vl) + # %rdi: Input state matrix, s + # %rsi: up to 8 data blocks output, o + # %rdx: up to 8 data blocks input, i + # %rcx: input/output length in bytes + # %r8d: nrounds + + # This function encrypts eight consecutive ChaCha blocks by loading + # the state matrix in AVX registers eight times. Compared to AVX2, this + # mostly benefits from the new rotate instructions in VL and the + # additional registers. + + vzeroupper + + # x0..15[0-7] = s[0..15] + vpbroadcastd 0x00(%rdi),%ymm0 + vpbroadcastd 0x04(%rdi),%ymm1 + vpbroadcastd 0x08(%rdi),%ymm2 + vpbroadcastd 0x0c(%rdi),%ymm3 + vpbroadcastd 0x10(%rdi),%ymm4 + vpbroadcastd 0x14(%rdi),%ymm5 + vpbroadcastd 0x18(%rdi),%ymm6 + vpbroadcastd 0x1c(%rdi),%ymm7 + vpbroadcastd 0x20(%rdi),%ymm8 + vpbroadcastd 0x24(%rdi),%ymm9 + vpbroadcastd 0x28(%rdi),%ymm10 + vpbroadcastd 0x2c(%rdi),%ymm11 + vpbroadcastd 0x30(%rdi),%ymm12 + vpbroadcastd 0x34(%rdi),%ymm13 + vpbroadcastd 0x38(%rdi),%ymm14 + vpbroadcastd 0x3c(%rdi),%ymm15 + + # x12 += counter values 0-3 + vpaddd CTR8BL(%rip),%ymm12,%ymm12 + + vmovdqa64 %ymm0,%ymm16 + vmovdqa64 %ymm1,%ymm17 + vmovdqa64 %ymm2,%ymm18 + vmovdqa64 %ymm3,%ymm19 + vmovdqa64 %ymm4,%ymm20 + vmovdqa64 %ymm5,%ymm21 + vmovdqa64 %ymm6,%ymm22 + vmovdqa64 %ymm7,%ymm23 + vmovdqa64 %ymm8,%ymm24 + vmovdqa64 %ymm9,%ymm25 + vmovdqa64 %ymm10,%ymm26 + vmovdqa64 %ymm11,%ymm27 + vmovdqa64 %ymm12,%ymm28 + vmovdqa64 %ymm13,%ymm29 + vmovdqa64 %ymm14,%ymm30 + vmovdqa64 %ymm15,%ymm31 + +.Ldoubleround8: + # x0 += x4, x12 = rotl32(x12 ^ x0, 16) + vpaddd %ymm0,%ymm4,%ymm0 + vpxord %ymm0,%ymm12,%ymm12 + vprold $16,%ymm12,%ymm12 + # x1 += x5, x13 = rotl32(x13 ^ x1, 16) + vpaddd %ymm1,%ymm5,%ymm1 + vpxord %ymm1,%ymm13,%ymm13 + vprold $16,%ymm13,%ymm13 + # x2 += x6, x14 = rotl32(x14 ^ x2, 16) + vpaddd %ymm2,%ymm6,%ymm2 + vpxord %ymm2,%ymm14,%ymm14 + vprold $16,%ymm14,%ymm14 + # x3 += x7, x15 = rotl32(x15 ^ x3, 16) + vpaddd %ymm3,%ymm7,%ymm3 + vpxord %ymm3,%ymm15,%ymm15 + vprold $16,%ymm15,%ymm15 + + # x8 += x12, x4 = rotl32(x4 ^ x8, 12) + vpaddd %ymm12,%ymm8,%ymm8 + vpxord %ymm8,%ymm4,%ymm4 + vprold $12,%ymm4,%ymm4 + # x9 += x13, x5 = rotl32(x5 ^ x9, 12) + vpaddd %ymm13,%ymm9,%ymm9 + vpxord %ymm9,%ymm5,%ymm5 + vprold $12,%ymm5,%ymm5 + # x10 += x14, x6 = rotl32(x6 ^ x10, 12) + vpaddd %ymm14,%ymm10,%ymm10 + vpxord %ymm10,%ymm6,%ymm6 + vprold $12,%ymm6,%ymm6 + # x11 += x15, x7 = rotl32(x7 ^ x11, 12) + vpaddd %ymm15,%ymm11,%ymm11 + vpxord %ymm11,%ymm7,%ymm7 + vprold $12,%ymm7,%ymm7 + + # x0 += x4, x12 = rotl32(x12 ^ x0, 8) + vpaddd %ymm0,%ymm4,%ymm0 + vpxord %ymm0,%ymm12,%ymm12 + vprold $8,%ymm12,%ymm12 + # x1 += x5, x13 = rotl32(x13 ^ x1, 8) + vpaddd %ymm1,%ymm5,%ymm1 + vpxord %ymm1,%ymm13,%ymm13 + vprold $8,%ymm13,%ymm13 + # x2 += x6, x14 = rotl32(x14 ^ x2, 8) + vpaddd %ymm2,%ymm6,%ymm2 + vpxord %ymm2,%ymm14,%ymm14 + vprold $8,%ymm14,%ymm14 + # x3 += x7, x15 = rotl32(x15 ^ x3, 8) + vpaddd %ymm3,%ymm7,%ymm3 + vpxord %ymm3,%ymm15,%ymm15 + vprold $8,%ymm15,%ymm15 + + # x8 += x12, x4 = rotl32(x4 ^ x8, 7) + vpaddd %ymm12,%ymm8,%ymm8 + vpxord %ymm8,%ymm4,%ymm4 + vprold $7,%ymm4,%ymm4 + # x9 += x13, x5 = rotl32(x5 ^ x9, 7) + vpaddd %ymm13,%ymm9,%ymm9 + vpxord %ymm9,%ymm5,%ymm5 + vprold $7,%ymm5,%ymm5 + # x10 += x14, x6 = rotl32(x6 ^ x10, 7) + vpaddd %ymm14,%ymm10,%ymm10 + vpxord %ymm10,%ymm6,%ymm6 + vprold $7,%ymm6,%ymm6 + # x11 += x15, x7 = rotl32(x7 ^ x11, 7) + vpaddd %ymm15,%ymm11,%ymm11 + vpxord %ymm11,%ymm7,%ymm7 + vprold $7,%ymm7,%ymm7 + + # x0 += x5, x15 = rotl32(x15 ^ x0, 16) + vpaddd %ymm0,%ymm5,%ymm0 + vpxord %ymm0,%ymm15,%ymm15 + vprold $16,%ymm15,%ymm15 + # x1 += x6, x12 = rotl32(x12 ^ x1, 16) + vpaddd %ymm1,%ymm6,%ymm1 + vpxord %ymm1,%ymm12,%ymm12 + vprold $16,%ymm12,%ymm12 + # x2 += x7, x13 = rotl32(x13 ^ x2, 16) + vpaddd %ymm2,%ymm7,%ymm2 + vpxord %ymm2,%ymm13,%ymm13 + vprold $16,%ymm13,%ymm13 + # x3 += x4, x14 = rotl32(x14 ^ x3, 16) + vpaddd %ymm3,%ymm4,%ymm3 + vpxord %ymm3,%ymm14,%ymm14 + vprold $16,%ymm14,%ymm14 + + # x10 += x15, x5 = rotl32(x5 ^ x10, 12) + vpaddd %ymm15,%ymm10,%ymm10 + vpxord %ymm10,%ymm5,%ymm5 + vprold $12,%ymm5,%ymm5 + # x11 += x12, x6 = rotl32(x6 ^ x11, 12) + vpaddd %ymm12,%ymm11,%ymm11 + vpxord %ymm11,%ymm6,%ymm6 + vprold $12,%ymm6,%ymm6 + # x8 += x13, x7 = rotl32(x7 ^ x8, 12) + vpaddd %ymm13,%ymm8,%ymm8 + vpxord %ymm8,%ymm7,%ymm7 + vprold $12,%ymm7,%ymm7 + # x9 += x14, x4 = rotl32(x4 ^ x9, 12) + vpaddd %ymm14,%ymm9,%ymm9 + vpxord %ymm9,%ymm4,%ymm4 + vprold $12,%ymm4,%ymm4 + + # x0 += x5, x15 = rotl32(x15 ^ x0, 8) + vpaddd %ymm0,%ymm5,%ymm0 + vpxord %ymm0,%ymm15,%ymm15 + vprold $8,%ymm15,%ymm15 + # x1 += x6, x12 = rotl32(x12 ^ x1, 8) + vpaddd %ymm1,%ymm6,%ymm1 + vpxord %ymm1,%ymm12,%ymm12 + vprold $8,%ymm12,%ymm12 + # x2 += x7, x13 = rotl32(x13 ^ x2, 8) + vpaddd %ymm2,%ymm7,%ymm2 + vpxord %ymm2,%ymm13,%ymm13 + vprold $8,%ymm13,%ymm13 + # x3 += x4, x14 = rotl32(x14 ^ x3, 8) + vpaddd %ymm3,%ymm4,%ymm3 + vpxord %ymm3,%ymm14,%ymm14 + vprold $8,%ymm14,%ymm14 + + # x10 += x15, x5 = rotl32(x5 ^ x10, 7) + vpaddd %ymm15,%ymm10,%ymm10 + vpxord %ymm10,%ymm5,%ymm5 + vprold $7,%ymm5,%ymm5 + # x11 += x12, x6 = rotl32(x6 ^ x11, 7) + vpaddd %ymm12,%ymm11,%ymm11 + vpxord %ymm11,%ymm6,%ymm6 + vprold $7,%ymm6,%ymm6 + # x8 += x13, x7 = rotl32(x7 ^ x8, 7) + vpaddd %ymm13,%ymm8,%ymm8 + vpxord %ymm8,%ymm7,%ymm7 + vprold $7,%ymm7,%ymm7 + # x9 += x14, x4 = rotl32(x4 ^ x9, 7) + vpaddd %ymm14,%ymm9,%ymm9 + vpxord %ymm9,%ymm4,%ymm4 + vprold $7,%ymm4,%ymm4 + + sub $2,%r8d + jnz .Ldoubleround8 + + # x0..15[0-3] += s[0..15] + vpaddd %ymm16,%ymm0,%ymm0 + vpaddd %ymm17,%ymm1,%ymm1 + vpaddd %ymm18,%ymm2,%ymm2 + vpaddd %ymm19,%ymm3,%ymm3 + vpaddd %ymm20,%ymm4,%ymm4 + vpaddd %ymm21,%ymm5,%ymm5 + vpaddd %ymm22,%ymm6,%ymm6 + vpaddd %ymm23,%ymm7,%ymm7 + vpaddd %ymm24,%ymm8,%ymm8 + vpaddd %ymm25,%ymm9,%ymm9 + vpaddd %ymm26,%ymm10,%ymm10 + vpaddd %ymm27,%ymm11,%ymm11 + vpaddd %ymm28,%ymm12,%ymm12 + vpaddd %ymm29,%ymm13,%ymm13 + vpaddd %ymm30,%ymm14,%ymm14 + vpaddd %ymm31,%ymm15,%ymm15 + + # interleave 32-bit words in state n, n+1 + vpunpckldq %ymm1,%ymm0,%ymm16 + vpunpckhdq %ymm1,%ymm0,%ymm17 + vpunpckldq %ymm3,%ymm2,%ymm18 + vpunpckhdq %ymm3,%ymm2,%ymm19 + vpunpckldq %ymm5,%ymm4,%ymm20 + vpunpckhdq %ymm5,%ymm4,%ymm21 + vpunpckldq %ymm7,%ymm6,%ymm22 + vpunpckhdq %ymm7,%ymm6,%ymm23 + vpunpckldq %ymm9,%ymm8,%ymm24 + vpunpckhdq %ymm9,%ymm8,%ymm25 + vpunpckldq %ymm11,%ymm10,%ymm26 + vpunpckhdq %ymm11,%ymm10,%ymm27 + vpunpckldq %ymm13,%ymm12,%ymm28 + vpunpckhdq %ymm13,%ymm12,%ymm29 + vpunpckldq %ymm15,%ymm14,%ymm30 + vpunpckhdq %ymm15,%ymm14,%ymm31 + + # interleave 64-bit words in state n, n+2 + vpunpcklqdq %ymm18,%ymm16,%ymm0 + vpunpcklqdq %ymm19,%ymm17,%ymm1 + vpunpckhqdq %ymm18,%ymm16,%ymm2 + vpunpckhqdq %ymm19,%ymm17,%ymm3 + vpunpcklqdq %ymm22,%ymm20,%ymm4 + vpunpcklqdq %ymm23,%ymm21,%ymm5 + vpunpckhqdq %ymm22,%ymm20,%ymm6 + vpunpckhqdq %ymm23,%ymm21,%ymm7 + vpunpcklqdq %ymm26,%ymm24,%ymm8 + vpunpcklqdq %ymm27,%ymm25,%ymm9 + vpunpckhqdq %ymm26,%ymm24,%ymm10 + vpunpckhqdq %ymm27,%ymm25,%ymm11 + vpunpcklqdq %ymm30,%ymm28,%ymm12 + vpunpcklqdq %ymm31,%ymm29,%ymm13 + vpunpckhqdq %ymm30,%ymm28,%ymm14 + vpunpckhqdq %ymm31,%ymm29,%ymm15 + + # interleave 128-bit words in state n, n+4 + # xor/write first four blocks + vmovdqa64 %ymm0,%ymm16 + vperm2i128 $0x20,%ymm4,%ymm0,%ymm0 + cmp $0x0020,%rcx + jl .Lxorpart8 + vpxord 0x0000(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x0000(%rsi) + vmovdqa64 %ymm16,%ymm0 + vperm2i128 $0x31,%ymm4,%ymm0,%ymm4 + + vperm2i128 $0x20,%ymm12,%ymm8,%ymm0 + cmp $0x0040,%rcx + jl .Lxorpart8 + vpxord 0x0020(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x0020(%rsi) + vperm2i128 $0x31,%ymm12,%ymm8,%ymm12 + + vperm2i128 $0x20,%ymm6,%ymm2,%ymm0 + cmp $0x0060,%rcx + jl .Lxorpart8 + vpxord 0x0040(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x0040(%rsi) + vperm2i128 $0x31,%ymm6,%ymm2,%ymm6 + + vperm2i128 $0x20,%ymm14,%ymm10,%ymm0 + cmp $0x0080,%rcx + jl .Lxorpart8 + vpxord 0x0060(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x0060(%rsi) + vperm2i128 $0x31,%ymm14,%ymm10,%ymm14 + + vperm2i128 $0x20,%ymm5,%ymm1,%ymm0 + cmp $0x00a0,%rcx + jl .Lxorpart8 + vpxord 0x0080(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x0080(%rsi) + vperm2i128 $0x31,%ymm5,%ymm1,%ymm5 + + vperm2i128 $0x20,%ymm13,%ymm9,%ymm0 + cmp $0x00c0,%rcx + jl .Lxorpart8 + vpxord 0x00a0(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x00a0(%rsi) + vperm2i128 $0x31,%ymm13,%ymm9,%ymm13 + + vperm2i128 $0x20,%ymm7,%ymm3,%ymm0 + cmp $0x00e0,%rcx + jl .Lxorpart8 + vpxord 0x00c0(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x00c0(%rsi) + vperm2i128 $0x31,%ymm7,%ymm3,%ymm7 + + vperm2i128 $0x20,%ymm15,%ymm11,%ymm0 + cmp $0x0100,%rcx + jl .Lxorpart8 + vpxord 0x00e0(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x00e0(%rsi) + vperm2i128 $0x31,%ymm15,%ymm11,%ymm15 + + # xor remaining blocks, write to output + vmovdqa64 %ymm4,%ymm0 + cmp $0x0120,%rcx + jl .Lxorpart8 + vpxord 0x0100(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x0100(%rsi) + + vmovdqa64 %ymm12,%ymm0 + cmp $0x0140,%rcx + jl .Lxorpart8 + vpxord 0x0120(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x0120(%rsi) + + vmovdqa64 %ymm6,%ymm0 + cmp $0x0160,%rcx + jl .Lxorpart8 + vpxord 0x0140(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x0140(%rsi) + + vmovdqa64 %ymm14,%ymm0 + cmp $0x0180,%rcx + jl .Lxorpart8 + vpxord 0x0160(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x0160(%rsi) + + vmovdqa64 %ymm5,%ymm0 + cmp $0x01a0,%rcx + jl .Lxorpart8 + vpxord 0x0180(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x0180(%rsi) + + vmovdqa64 %ymm13,%ymm0 + cmp $0x01c0,%rcx + jl .Lxorpart8 + vpxord 0x01a0(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x01a0(%rsi) + + vmovdqa64 %ymm7,%ymm0 + cmp $0x01e0,%rcx + jl .Lxorpart8 + vpxord 0x01c0(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x01c0(%rsi) + + vmovdqa64 %ymm15,%ymm0 + cmp $0x0200,%rcx + jl .Lxorpart8 + vpxord 0x01e0(%rdx),%ymm0,%ymm0 + vmovdqu64 %ymm0,0x01e0(%rsi) + +.Ldone8: + vzeroupper + ret + +.Lxorpart8: + # xor remaining bytes from partial register into output + mov %rcx,%rax + and $0x1f,%rcx + jz .Ldone8 + mov %rax,%r9 + and $~0x1f,%r9 + + mov $1,%rax + shld %cl,%rax,%rax + sub $1,%rax + kmovq %rax,%k1 + + vmovdqu8 (%rdx,%r9),%ymm1{%k1}{z} + vpxord %ymm0,%ymm1,%ymm1 + vmovdqu8 %ymm1,(%rsi,%r9){%k1} + + jmp .Ldone8 + +ENDPROC(chacha_8block_xor_avx512vl) diff --git a/arch/x86/crypto/chacha20-ssse3-x86_64.S b/arch/x86/crypto/chacha-ssse3-x86_64.S similarity index 76% rename from arch/x86/crypto/chacha20-ssse3-x86_64.S rename to arch/x86/crypto/chacha-ssse3-x86_64.S index 512a2b500fd1..c05a7a963dc3 100644 --- a/arch/x86/crypto/chacha20-ssse3-x86_64.S +++ b/arch/x86/crypto/chacha-ssse3-x86_64.S @@ -1,5 +1,5 @@ /* - * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions + * ChaCha 256-bit cipher algorithm, x64 SSSE3 functions * * Copyright (C) 2015 Martin Willi * @@ -10,6 +10,7 @@ */ #include +#include .section .rodata.cst16.ROT8, "aM", @progbits, 16 .align 16 @@ -23,35 +24,25 @@ CTRINC: .octa 0x00000003000000020000000100000000 .text -ENTRY(chacha20_block_xor_ssse3) - # %rdi: Input state matrix, s - # %rsi: 1 data block output, o - # %rdx: 1 data block input, i - - # This function encrypts one ChaCha20 block by loading the state matrix - # in four SSE registers. It performs matrix operation on four words in - # parallel, but requireds shuffling to rearrange the words after each - # round. 8/16-bit word rotation is done with the slightly better - # performing SSSE3 byte shuffling, 7/12-bit word rotation uses - # traditional shift+OR. - - # x0..3 = s0..3 - movdqa 0x00(%rdi),%xmm0 - movdqa 0x10(%rdi),%xmm1 - movdqa 0x20(%rdi),%xmm2 - movdqa 0x30(%rdi),%xmm3 - movdqa %xmm0,%xmm8 - movdqa %xmm1,%xmm9 - movdqa %xmm2,%xmm10 - movdqa %xmm3,%xmm11 +/* + * chacha_permute - permute one block + * + * Permute one 64-byte block where the state matrix is in %xmm0-%xmm3. This + * function performs matrix operations on four words in parallel, but requires + * shuffling to rearrange the words after each round. 8/16-bit word rotation is + * done with the slightly better performing SSSE3 byte shuffling, 7/12-bit word + * rotation uses traditional shift+OR. + * + * The round count is given in %r8d. + * + * Clobbers: %r8d, %xmm4-%xmm7 + */ +chacha_permute: movdqa ROT8(%rip),%xmm4 movdqa ROT16(%rip),%xmm5 - mov $10,%ecx - .Ldoubleround: - # x0 += x1, x3 = rotl32(x3 ^ x0, 16) paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 @@ -118,39 +109,129 @@ ENTRY(chacha20_block_xor_ssse3) # x3 = shuffle32(x3, MASK(0, 3, 2, 1)) pshufd $0x39,%xmm3,%xmm3 - dec %ecx + sub $2,%r8d jnz .Ldoubleround + ret +ENDPROC(chacha_permute) + +ENTRY(chacha_block_xor_ssse3) + # %rdi: Input state matrix, s + # %rsi: up to 1 data block output, o + # %rdx: up to 1 data block input, i + # %rcx: input/output length in bytes + # %r8d: nrounds + FRAME_BEGIN + + # x0..3 = s0..3 + movdqa 0x00(%rdi),%xmm0 + movdqa 0x10(%rdi),%xmm1 + movdqa 0x20(%rdi),%xmm2 + movdqa 0x30(%rdi),%xmm3 + movdqa %xmm0,%xmm8 + movdqa %xmm1,%xmm9 + movdqa %xmm2,%xmm10 + movdqa %xmm3,%xmm11 + + mov %rcx,%rax + call chacha_permute + # o0 = i0 ^ (x0 + s0) - movdqu 0x00(%rdx),%xmm4 paddd %xmm8,%xmm0 + cmp $0x10,%rax + jl .Lxorpart + movdqu 0x00(%rdx),%xmm4 pxor %xmm4,%xmm0 movdqu %xmm0,0x00(%rsi) # o1 = i1 ^ (x1 + s1) - movdqu 0x10(%rdx),%xmm5 paddd %xmm9,%xmm1 - pxor %xmm5,%xmm1 - movdqu %xmm1,0x10(%rsi) + movdqa %xmm1,%xmm0 + cmp $0x20,%rax + jl .Lxorpart + movdqu 0x10(%rdx),%xmm0 + pxor %xmm1,%xmm0 + movdqu %xmm0,0x10(%rsi) # o2 = i2 ^ (x2 + s2) - movdqu 0x20(%rdx),%xmm6 paddd %xmm10,%xmm2 - pxor %xmm6,%xmm2 - movdqu %xmm2,0x20(%rsi) + movdqa %xmm2,%xmm0 + cmp $0x30,%rax + jl .Lxorpart + movdqu 0x20(%rdx),%xmm0 + pxor %xmm2,%xmm0 + movdqu %xmm0,0x20(%rsi) # o3 = i3 ^ (x3 + s3) - movdqu 0x30(%rdx),%xmm7 paddd %xmm11,%xmm3 - pxor %xmm7,%xmm3 - movdqu %xmm3,0x30(%rsi) + movdqa %xmm3,%xmm0 + cmp $0x40,%rax + jl .Lxorpart + movdqu 0x30(%rdx),%xmm0 + pxor %xmm3,%xmm0 + movdqu %xmm0,0x30(%rsi) +.Ldone: + FRAME_END ret -ENDPROC(chacha20_block_xor_ssse3) -ENTRY(chacha20_4block_xor_ssse3) +.Lxorpart: + # xor remaining bytes from partial register into output + mov %rax,%r9 + and $0x0f,%r9 + jz .Ldone + and $~0x0f,%rax + + mov %rsi,%r11 + + lea 8(%rsp),%r10 + sub $0x10,%rsp + and $~31,%rsp + + lea (%rdx,%rax),%rsi + mov %rsp,%rdi + mov %r9,%rcx + rep movsb + + pxor 0x00(%rsp),%xmm0 + movdqa %xmm0,0x00(%rsp) + + mov %rsp,%rsi + lea (%r11,%rax),%rdi + mov %r9,%rcx + rep movsb + + lea -8(%r10),%rsp + jmp .Ldone + +ENDPROC(chacha_block_xor_ssse3) + +ENTRY(hchacha_block_ssse3) # %rdi: Input state matrix, s - # %rsi: 4 data blocks output, o - # %rdx: 4 data blocks input, i + # %rsi: output (8 32-bit words) + # %edx: nrounds + FRAME_BEGIN - # This function encrypts four consecutive ChaCha20 blocks by loading the + movdqa 0x00(%rdi),%xmm0 + movdqa 0x10(%rdi),%xmm1 + movdqa 0x20(%rdi),%xmm2 + movdqa 0x30(%rdi),%xmm3 + + mov %edx,%r8d + call chacha_permute + + movdqu %xmm0,0x00(%rsi) + movdqu %xmm3,0x10(%rsi) + + FRAME_END + ret +ENDPROC(hchacha_block_ssse3) + +ENTRY(chacha_4block_xor_ssse3) + # %rdi: Input state matrix, s + # %rsi: up to 4 data blocks output, o + # %rdx: up to 4 data blocks input, i + # %rcx: input/output length in bytes + # %r8d: nrounds + + # This function encrypts four consecutive ChaCha blocks by loading the # the state matrix in SSE registers four times. As we need some scratch # registers, we save the first four registers on the stack. The # algorithm performs each operation on the corresponding word of each @@ -163,6 +244,7 @@ ENTRY(chacha20_4block_xor_ssse3) lea 8(%rsp),%r10 sub $0x80,%rsp and $~63,%rsp + mov %rcx,%rax # x0..15[0-3] = s0..3[0..3] movq 0x00(%rdi),%xmm1 @@ -202,8 +284,6 @@ ENTRY(chacha20_4block_xor_ssse3) # x12 += counter values 0-3 paddd %xmm1,%xmm12 - mov $10,%ecx - .Ldoubleround4: # x0 += x4, x12 = rotl32(x12 ^ x0, 16) movdqa 0x00(%rsp),%xmm0 @@ -421,7 +501,7 @@ ENTRY(chacha20_4block_xor_ssse3) psrld $25,%xmm4 por %xmm0,%xmm4 - dec %ecx + sub $2,%r8d jnz .Ldoubleround4 # x0[0-3] += s0[0] @@ -573,58 +653,143 @@ ENTRY(chacha20_4block_xor_ssse3) # xor with corresponding input, write to output movdqa 0x00(%rsp),%xmm0 + cmp $0x10,%rax + jl .Lxorpart4 movdqu 0x00(%rdx),%xmm1 pxor %xmm1,%xmm0 movdqu %xmm0,0x00(%rsi) - movdqa 0x10(%rsp),%xmm0 - movdqu 0x80(%rdx),%xmm1 + + movdqu %xmm4,%xmm0 + cmp $0x20,%rax + jl .Lxorpart4 + movdqu 0x10(%rdx),%xmm1 pxor %xmm1,%xmm0 - movdqu %xmm0,0x80(%rsi) + movdqu %xmm0,0x10(%rsi) + + movdqu %xmm8,%xmm0 + cmp $0x30,%rax + jl .Lxorpart4 + movdqu 0x20(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0x20(%rsi) + + movdqu %xmm12,%xmm0 + cmp $0x40,%rax + jl .Lxorpart4 + movdqu 0x30(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0x30(%rsi) + movdqa 0x20(%rsp),%xmm0 + cmp $0x50,%rax + jl .Lxorpart4 movdqu 0x40(%rdx),%xmm1 pxor %xmm1,%xmm0 movdqu %xmm0,0x40(%rsi) + + movdqu %xmm6,%xmm0 + cmp $0x60,%rax + jl .Lxorpart4 + movdqu 0x50(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0x50(%rsi) + + movdqu %xmm10,%xmm0 + cmp $0x70,%rax + jl .Lxorpart4 + movdqu 0x60(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0x60(%rsi) + + movdqu %xmm14,%xmm0 + cmp $0x80,%rax + jl .Lxorpart4 + movdqu 0x70(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0x70(%rsi) + + movdqa 0x10(%rsp),%xmm0 + cmp $0x90,%rax + jl .Lxorpart4 + movdqu 0x80(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0x80(%rsi) + + movdqu %xmm5,%xmm0 + cmp $0xa0,%rax + jl .Lxorpart4 + movdqu 0x90(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0x90(%rsi) + + movdqu %xmm9,%xmm0 + cmp $0xb0,%rax + jl .Lxorpart4 + movdqu 0xa0(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0xa0(%rsi) + + movdqu %xmm13,%xmm0 + cmp $0xc0,%rax + jl .Lxorpart4 + movdqu 0xb0(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0xb0(%rsi) + movdqa 0x30(%rsp),%xmm0 + cmp $0xd0,%rax + jl .Lxorpart4 movdqu 0xc0(%rdx),%xmm1 pxor %xmm1,%xmm0 movdqu %xmm0,0xc0(%rsi) - movdqu 0x10(%rdx),%xmm1 - pxor %xmm1,%xmm4 - movdqu %xmm4,0x10(%rsi) - movdqu 0x90(%rdx),%xmm1 - pxor %xmm1,%xmm5 - movdqu %xmm5,0x90(%rsi) - movdqu 0x50(%rdx),%xmm1 - pxor %xmm1,%xmm6 - movdqu %xmm6,0x50(%rsi) - movdqu 0xd0(%rdx),%xmm1 - pxor %xmm1,%xmm7 - movdqu %xmm7,0xd0(%rsi) - movdqu 0x20(%rdx),%xmm1 - pxor %xmm1,%xmm8 - movdqu %xmm8,0x20(%rsi) - movdqu 0xa0(%rdx),%xmm1 - pxor %xmm1,%xmm9 - movdqu %xmm9,0xa0(%rsi) - movdqu 0x60(%rdx),%xmm1 - pxor %xmm1,%xmm10 - movdqu %xmm10,0x60(%rsi) - movdqu 0xe0(%rdx),%xmm1 - pxor %xmm1,%xmm11 - movdqu %xmm11,0xe0(%rsi) - movdqu 0x30(%rdx),%xmm1 - pxor %xmm1,%xmm12 - movdqu %xmm12,0x30(%rsi) - movdqu 0xb0(%rdx),%xmm1 - pxor %xmm1,%xmm13 - movdqu %xmm13,0xb0(%rsi) - movdqu 0x70(%rdx),%xmm1 - pxor %xmm1,%xmm14 - movdqu %xmm14,0x70(%rsi) - movdqu 0xf0(%rdx),%xmm1 - pxor %xmm1,%xmm15 - movdqu %xmm15,0xf0(%rsi) + movdqu %xmm7,%xmm0 + cmp $0xe0,%rax + jl .Lxorpart4 + movdqu 0xd0(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0xd0(%rsi) + + movdqu %xmm11,%xmm0 + cmp $0xf0,%rax + jl .Lxorpart4 + movdqu 0xe0(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0xe0(%rsi) + + movdqu %xmm15,%xmm0 + cmp $0x100,%rax + jl .Lxorpart4 + movdqu 0xf0(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0xf0(%rsi) + +.Ldone4: lea -8(%r10),%rsp ret -ENDPROC(chacha20_4block_xor_ssse3) + +.Lxorpart4: + # xor remaining bytes from partial register into output + mov %rax,%r9 + and $0x0f,%r9 + jz .Ldone4 + and $~0x0f,%rax + + mov %rsi,%r11 + + lea (%rdx,%rax),%rsi + mov %rsp,%rdi + mov %r9,%rcx + rep movsb + + pxor 0x00(%rsp),%xmm0 + movdqa %xmm0,0x00(%rsp) + + mov %rsp,%rsi + lea (%r11,%rax),%rdi + mov %r9,%rcx + rep movsb + + jmp .Ldone4 + +ENDPROC(chacha_4block_xor_ssse3) diff --git a/arch/x86/crypto/chacha20-avx2-x86_64.S b/arch/x86/crypto/chacha20-avx2-x86_64.S deleted file mode 100644 index f3cd26f48332..000000000000 --- a/arch/x86/crypto/chacha20-avx2-x86_64.S +++ /dev/null @@ -1,448 +0,0 @@ -/* - * ChaCha20 256-bit cipher algorithm, RFC7539, x64 AVX2 functions - * - * Copyright (C) 2015 Martin Willi - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include - -.section .rodata.cst32.ROT8, "aM", @progbits, 32 -.align 32 -ROT8: .octa 0x0e0d0c0f0a09080b0605040702010003 - .octa 0x0e0d0c0f0a09080b0605040702010003 - -.section .rodata.cst32.ROT16, "aM", @progbits, 32 -.align 32 -ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302 - .octa 0x0d0c0f0e09080b0a0504070601000302 - -.section .rodata.cst32.CTRINC, "aM", @progbits, 32 -.align 32 -CTRINC: .octa 0x00000003000000020000000100000000 - .octa 0x00000007000000060000000500000004 - -.text - -ENTRY(chacha20_8block_xor_avx2) - # %rdi: Input state matrix, s - # %rsi: 8 data blocks output, o - # %rdx: 8 data blocks input, i - - # This function encrypts eight consecutive ChaCha20 blocks by loading - # the state matrix in AVX registers eight times. As we need some - # scratch registers, we save the first four registers on the stack. The - # algorithm performs each operation on the corresponding word of each - # state matrix, hence requires no word shuffling. For final XORing step - # we transpose the matrix by interleaving 32-, 64- and then 128-bit - # words, which allows us to do XOR in AVX registers. 8/16-bit word - # rotation is done with the slightly better performing byte shuffling, - # 7/12-bit word rotation uses traditional shift+OR. - - vzeroupper - # 4 * 32 byte stack, 32-byte aligned - lea 8(%rsp),%r10 - and $~31, %rsp - sub $0x80, %rsp - - # x0..15[0-7] = s[0..15] - vpbroadcastd 0x00(%rdi),%ymm0 - vpbroadcastd 0x04(%rdi),%ymm1 - vpbroadcastd 0x08(%rdi),%ymm2 - vpbroadcastd 0x0c(%rdi),%ymm3 - vpbroadcastd 0x10(%rdi),%ymm4 - vpbroadcastd 0x14(%rdi),%ymm5 - vpbroadcastd 0x18(%rdi),%ymm6 - vpbroadcastd 0x1c(%rdi),%ymm7 - vpbroadcastd 0x20(%rdi),%ymm8 - vpbroadcastd 0x24(%rdi),%ymm9 - vpbroadcastd 0x28(%rdi),%ymm10 - vpbroadcastd 0x2c(%rdi),%ymm11 - vpbroadcastd 0x30(%rdi),%ymm12 - vpbroadcastd 0x34(%rdi),%ymm13 - vpbroadcastd 0x38(%rdi),%ymm14 - vpbroadcastd 0x3c(%rdi),%ymm15 - # x0..3 on stack - vmovdqa %ymm0,0x00(%rsp) - vmovdqa %ymm1,0x20(%rsp) - vmovdqa %ymm2,0x40(%rsp) - vmovdqa %ymm3,0x60(%rsp) - - vmovdqa CTRINC(%rip),%ymm1 - vmovdqa ROT8(%rip),%ymm2 - vmovdqa ROT16(%rip),%ymm3 - - # x12 += counter values 0-3 - vpaddd %ymm1,%ymm12,%ymm12 - - mov $10,%ecx - -.Ldoubleround8: - # x0 += x4, x12 = rotl32(x12 ^ x0, 16) - vpaddd 0x00(%rsp),%ymm4,%ymm0 - vmovdqa %ymm0,0x00(%rsp) - vpxor %ymm0,%ymm12,%ymm12 - vpshufb %ymm3,%ymm12,%ymm12 - # x1 += x5, x13 = rotl32(x13 ^ x1, 16) - vpaddd 0x20(%rsp),%ymm5,%ymm0 - vmovdqa %ymm0,0x20(%rsp) - vpxor %ymm0,%ymm13,%ymm13 - vpshufb %ymm3,%ymm13,%ymm13 - # x2 += x6, x14 = rotl32(x14 ^ x2, 16) - vpaddd 0x40(%rsp),%ymm6,%ymm0 - vmovdqa %ymm0,0x40(%rsp) - vpxor %ymm0,%ymm14,%ymm14 - vpshufb %ymm3,%ymm14,%ymm14 - # x3 += x7, x15 = rotl32(x15 ^ x3, 16) - vpaddd 0x60(%rsp),%ymm7,%ymm0 - vmovdqa %ymm0,0x60(%rsp) - vpxor %ymm0,%ymm15,%ymm15 - vpshufb %ymm3,%ymm15,%ymm15 - - # x8 += x12, x4 = rotl32(x4 ^ x8, 12) - vpaddd %ymm12,%ymm8,%ymm8 - vpxor %ymm8,%ymm4,%ymm4 - vpslld $12,%ymm4,%ymm0 - vpsrld $20,%ymm4,%ymm4 - vpor %ymm0,%ymm4,%ymm4 - # x9 += x13, x5 = rotl32(x5 ^ x9, 12) - vpaddd %ymm13,%ymm9,%ymm9 - vpxor %ymm9,%ymm5,%ymm5 - vpslld $12,%ymm5,%ymm0 - vpsrld $20,%ymm5,%ymm5 - vpor %ymm0,%ymm5,%ymm5 - # x10 += x14, x6 = rotl32(x6 ^ x10, 12) - vpaddd %ymm14,%ymm10,%ymm10 - vpxor %ymm10,%ymm6,%ymm6 - vpslld $12,%ymm6,%ymm0 - vpsrld $20,%ymm6,%ymm6 - vpor %ymm0,%ymm6,%ymm6 - # x11 += x15, x7 = rotl32(x7 ^ x11, 12) - vpaddd %ymm15,%ymm11,%ymm11 - vpxor %ymm11,%ymm7,%ymm7 - vpslld $12,%ymm7,%ymm0 - vpsrld $20,%ymm7,%ymm7 - vpor %ymm0,%ymm7,%ymm7 - - # x0 += x4, x12 = rotl32(x12 ^ x0, 8) - vpaddd 0x00(%rsp),%ymm4,%ymm0 - vmovdqa %ymm0,0x00(%rsp) - vpxor %ymm0,%ymm12,%ymm12 - vpshufb %ymm2,%ymm12,%ymm12 - # x1 += x5, x13 = rotl32(x13 ^ x1, 8) - vpaddd 0x20(%rsp),%ymm5,%ymm0 - vmovdqa %ymm0,0x20(%rsp) - vpxor %ymm0,%ymm13,%ymm13 - vpshufb %ymm2,%ymm13,%ymm13 - # x2 += x6, x14 = rotl32(x14 ^ x2, 8) - vpaddd 0x40(%rsp),%ymm6,%ymm0 - vmovdqa %ymm0,0x40(%rsp) - vpxor %ymm0,%ymm14,%ymm14 - vpshufb %ymm2,%ymm14,%ymm14 - # x3 += x7, x15 = rotl32(x15 ^ x3, 8) - vpaddd 0x60(%rsp),%ymm7,%ymm0 - vmovdqa %ymm0,0x60(%rsp) - vpxor %ymm0,%ymm15,%ymm15 - vpshufb %ymm2,%ymm15,%ymm15 - - # x8 += x12, x4 = rotl32(x4 ^ x8, 7) - vpaddd %ymm12,%ymm8,%ymm8 - vpxor %ymm8,%ymm4,%ymm4 - vpslld $7,%ymm4,%ymm0 - vpsrld $25,%ymm4,%ymm4 - vpor %ymm0,%ymm4,%ymm4 - # x9 += x13, x5 = rotl32(x5 ^ x9, 7) - vpaddd %ymm13,%ymm9,%ymm9 - vpxor %ymm9,%ymm5,%ymm5 - vpslld $7,%ymm5,%ymm0 - vpsrld $25,%ymm5,%ymm5 - vpor %ymm0,%ymm5,%ymm5 - # x10 += x14, x6 = rotl32(x6 ^ x10, 7) - vpaddd %ymm14,%ymm10,%ymm10 - vpxor %ymm10,%ymm6,%ymm6 - vpslld $7,%ymm6,%ymm0 - vpsrld $25,%ymm6,%ymm6 - vpor %ymm0,%ymm6,%ymm6 - # x11 += x15, x7 = rotl32(x7 ^ x11, 7) - vpaddd %ymm15,%ymm11,%ymm11 - vpxor %ymm11,%ymm7,%ymm7 - vpslld $7,%ymm7,%ymm0 - vpsrld $25,%ymm7,%ymm7 - vpor %ymm0,%ymm7,%ymm7 - - # x0 += x5, x15 = rotl32(x15 ^ x0, 16) - vpaddd 0x00(%rsp),%ymm5,%ymm0 - vmovdqa %ymm0,0x00(%rsp) - vpxor %ymm0,%ymm15,%ymm15 - vpshufb %ymm3,%ymm15,%ymm15 - # x1 += x6, x12 = rotl32(x12 ^ x1, 16)%ymm0 - vpaddd 0x20(%rsp),%ymm6,%ymm0 - vmovdqa %ymm0,0x20(%rsp) - vpxor %ymm0,%ymm12,%ymm12 - vpshufb %ymm3,%ymm12,%ymm12 - # x2 += x7, x13 = rotl32(x13 ^ x2, 16) - vpaddd 0x40(%rsp),%ymm7,%ymm0 - vmovdqa %ymm0,0x40(%rsp) - vpxor %ymm0,%ymm13,%ymm13 - vpshufb %ymm3,%ymm13,%ymm13 - # x3 += x4, x14 = rotl32(x14 ^ x3, 16) - vpaddd 0x60(%rsp),%ymm4,%ymm0 - vmovdqa %ymm0,0x60(%rsp) - vpxor %ymm0,%ymm14,%ymm14 - vpshufb %ymm3,%ymm14,%ymm14 - - # x10 += x15, x5 = rotl32(x5 ^ x10, 12) - vpaddd %ymm15,%ymm10,%ymm10 - vpxor %ymm10,%ymm5,%ymm5 - vpslld $12,%ymm5,%ymm0 - vpsrld $20,%ymm5,%ymm5 - vpor %ymm0,%ymm5,%ymm5 - # x11 += x12, x6 = rotl32(x6 ^ x11, 12) - vpaddd %ymm12,%ymm11,%ymm11 - vpxor %ymm11,%ymm6,%ymm6 - vpslld $12,%ymm6,%ymm0 - vpsrld $20,%ymm6,%ymm6 - vpor %ymm0,%ymm6,%ymm6 - # x8 += x13, x7 = rotl32(x7 ^ x8, 12) - vpaddd %ymm13,%ymm8,%ymm8 - vpxor %ymm8,%ymm7,%ymm7 - vpslld $12,%ymm7,%ymm0 - vpsrld $20,%ymm7,%ymm7 - vpor %ymm0,%ymm7,%ymm7 - # x9 += x14, x4 = rotl32(x4 ^ x9, 12) - vpaddd %ymm14,%ymm9,%ymm9 - vpxor %ymm9,%ymm4,%ymm4 - vpslld $12,%ymm4,%ymm0 - vpsrld $20,%ymm4,%ymm4 - vpor %ymm0,%ymm4,%ymm4 - - # x0 += x5, x15 = rotl32(x15 ^ x0, 8) - vpaddd 0x00(%rsp),%ymm5,%ymm0 - vmovdqa %ymm0,0x00(%rsp) - vpxor %ymm0,%ymm15,%ymm15 - vpshufb %ymm2,%ymm15,%ymm15 - # x1 += x6, x12 = rotl32(x12 ^ x1, 8) - vpaddd 0x20(%rsp),%ymm6,%ymm0 - vmovdqa %ymm0,0x20(%rsp) - vpxor %ymm0,%ymm12,%ymm12 - vpshufb %ymm2,%ymm12,%ymm12 - # x2 += x7, x13 = rotl32(x13 ^ x2, 8) - vpaddd 0x40(%rsp),%ymm7,%ymm0 - vmovdqa %ymm0,0x40(%rsp) - vpxor %ymm0,%ymm13,%ymm13 - vpshufb %ymm2,%ymm13,%ymm13 - # x3 += x4, x14 = rotl32(x14 ^ x3, 8) - vpaddd 0x60(%rsp),%ymm4,%ymm0 - vmovdqa %ymm0,0x60(%rsp) - vpxor %ymm0,%ymm14,%ymm14 - vpshufb %ymm2,%ymm14,%ymm14 - - # x10 += x15, x5 = rotl32(x5 ^ x10, 7) - vpaddd %ymm15,%ymm10,%ymm10 - vpxor %ymm10,%ymm5,%ymm5 - vpslld $7,%ymm5,%ymm0 - vpsrld $25,%ymm5,%ymm5 - vpor %ymm0,%ymm5,%ymm5 - # x11 += x12, x6 = rotl32(x6 ^ x11, 7) - vpaddd %ymm12,%ymm11,%ymm11 - vpxor %ymm11,%ymm6,%ymm6 - vpslld $7,%ymm6,%ymm0 - vpsrld $25,%ymm6,%ymm6 - vpor %ymm0,%ymm6,%ymm6 - # x8 += x13, x7 = rotl32(x7 ^ x8, 7) - vpaddd %ymm13,%ymm8,%ymm8 - vpxor %ymm8,%ymm7,%ymm7 - vpslld $7,%ymm7,%ymm0 - vpsrld $25,%ymm7,%ymm7 - vpor %ymm0,%ymm7,%ymm7 - # x9 += x14, x4 = rotl32(x4 ^ x9, 7) - vpaddd %ymm14,%ymm9,%ymm9 - vpxor %ymm9,%ymm4,%ymm4 - vpslld $7,%ymm4,%ymm0 - vpsrld $25,%ymm4,%ymm4 - vpor %ymm0,%ymm4,%ymm4 - - dec %ecx - jnz .Ldoubleround8 - - # x0..15[0-3] += s[0..15] - vpbroadcastd 0x00(%rdi),%ymm0 - vpaddd 0x00(%rsp),%ymm0,%ymm0 - vmovdqa %ymm0,0x00(%rsp) - vpbroadcastd 0x04(%rdi),%ymm0 - vpaddd 0x20(%rsp),%ymm0,%ymm0 - vmovdqa %ymm0,0x20(%rsp) - vpbroadcastd 0x08(%rdi),%ymm0 - vpaddd 0x40(%rsp),%ymm0,%ymm0 - vmovdqa %ymm0,0x40(%rsp) - vpbroadcastd 0x0c(%rdi),%ymm0 - vpaddd 0x60(%rsp),%ymm0,%ymm0 - vmovdqa %ymm0,0x60(%rsp) - vpbroadcastd 0x10(%rdi),%ymm0 - vpaddd %ymm0,%ymm4,%ymm4 - vpbroadcastd 0x14(%rdi),%ymm0 - vpaddd %ymm0,%ymm5,%ymm5 - vpbroadcastd 0x18(%rdi),%ymm0 - vpaddd %ymm0,%ymm6,%ymm6 - vpbroadcastd 0x1c(%rdi),%ymm0 - vpaddd %ymm0,%ymm7,%ymm7 - vpbroadcastd 0x20(%rdi),%ymm0 - vpaddd %ymm0,%ymm8,%ymm8 - vpbroadcastd 0x24(%rdi),%ymm0 - vpaddd %ymm0,%ymm9,%ymm9 - vpbroadcastd 0x28(%rdi),%ymm0 - vpaddd %ymm0,%ymm10,%ymm10 - vpbroadcastd 0x2c(%rdi),%ymm0 - vpaddd %ymm0,%ymm11,%ymm11 - vpbroadcastd 0x30(%rdi),%ymm0 - vpaddd %ymm0,%ymm12,%ymm12 - vpbroadcastd 0x34(%rdi),%ymm0 - vpaddd %ymm0,%ymm13,%ymm13 - vpbroadcastd 0x38(%rdi),%ymm0 - vpaddd %ymm0,%ymm14,%ymm14 - vpbroadcastd 0x3c(%rdi),%ymm0 - vpaddd %ymm0,%ymm15,%ymm15 - - # x12 += counter values 0-3 - vpaddd %ymm1,%ymm12,%ymm12 - - # interleave 32-bit words in state n, n+1 - vmovdqa 0x00(%rsp),%ymm0 - vmovdqa 0x20(%rsp),%ymm1 - vpunpckldq %ymm1,%ymm0,%ymm2 - vpunpckhdq %ymm1,%ymm0,%ymm1 - vmovdqa %ymm2,0x00(%rsp) - vmovdqa %ymm1,0x20(%rsp) - vmovdqa 0x40(%rsp),%ymm0 - vmovdqa 0x60(%rsp),%ymm1 - vpunpckldq %ymm1,%ymm0,%ymm2 - vpunpckhdq %ymm1,%ymm0,%ymm1 - vmovdqa %ymm2,0x40(%rsp) - vmovdqa %ymm1,0x60(%rsp) - vmovdqa %ymm4,%ymm0 - vpunpckldq %ymm5,%ymm0,%ymm4 - vpunpckhdq %ymm5,%ymm0,%ymm5 - vmovdqa %ymm6,%ymm0 - vpunpckldq %ymm7,%ymm0,%ymm6 - vpunpckhdq %ymm7,%ymm0,%ymm7 - vmovdqa %ymm8,%ymm0 - vpunpckldq %ymm9,%ymm0,%ymm8 - vpunpckhdq %ymm9,%ymm0,%ymm9 - vmovdqa %ymm10,%ymm0 - vpunpckldq %ymm11,%ymm0,%ymm10 - vpunpckhdq %ymm11,%ymm0,%ymm11 - vmovdqa %ymm12,%ymm0 - vpunpckldq %ymm13,%ymm0,%ymm12 - vpunpckhdq %ymm13,%ymm0,%ymm13 - vmovdqa %ymm14,%ymm0 - vpunpckldq %ymm15,%ymm0,%ymm14 - vpunpckhdq %ymm15,%ymm0,%ymm15 - - # interleave 64-bit words in state n, n+2 - vmovdqa 0x00(%rsp),%ymm0 - vmovdqa 0x40(%rsp),%ymm2 - vpunpcklqdq %ymm2,%ymm0,%ymm1 - vpunpckhqdq %ymm2,%ymm0,%ymm2 - vmovdqa %ymm1,0x00(%rsp) - vmovdqa %ymm2,0x40(%rsp) - vmovdqa 0x20(%rsp),%ymm0 - vmovdqa 0x60(%rsp),%ymm2 - vpunpcklqdq %ymm2,%ymm0,%ymm1 - vpunpckhqdq %ymm2,%ymm0,%ymm2 - vmovdqa %ymm1,0x20(%rsp) - vmovdqa %ymm2,0x60(%rsp) - vmovdqa %ymm4,%ymm0 - vpunpcklqdq %ymm6,%ymm0,%ymm4 - vpunpckhqdq %ymm6,%ymm0,%ymm6 - vmovdqa %ymm5,%ymm0 - vpunpcklqdq %ymm7,%ymm0,%ymm5 - vpunpckhqdq %ymm7,%ymm0,%ymm7 - vmovdqa %ymm8,%ymm0 - vpunpcklqdq %ymm10,%ymm0,%ymm8 - vpunpckhqdq %ymm10,%ymm0,%ymm10 - vmovdqa %ymm9,%ymm0 - vpunpcklqdq %ymm11,%ymm0,%ymm9 - vpunpckhqdq %ymm11,%ymm0,%ymm11 - vmovdqa %ymm12,%ymm0 - vpunpcklqdq %ymm14,%ymm0,%ymm12 - vpunpckhqdq %ymm14,%ymm0,%ymm14 - vmovdqa %ymm13,%ymm0 - vpunpcklqdq %ymm15,%ymm0,%ymm13 - vpunpckhqdq %ymm15,%ymm0,%ymm15 - - # interleave 128-bit words in state n, n+4 - vmovdqa 0x00(%rsp),%ymm0 - vperm2i128 $0x20,%ymm4,%ymm0,%ymm1 - vperm2i128 $0x31,%ymm4,%ymm0,%ymm4 - vmovdqa %ymm1,0x00(%rsp) - vmovdqa 0x20(%rsp),%ymm0 - vperm2i128 $0x20,%ymm5,%ymm0,%ymm1 - vperm2i128 $0x31,%ymm5,%ymm0,%ymm5 - vmovdqa %ymm1,0x20(%rsp) - vmovdqa 0x40(%rsp),%ymm0 - vperm2i128 $0x20,%ymm6,%ymm0,%ymm1 - vperm2i128 $0x31,%ymm6,%ymm0,%ymm6 - vmovdqa %ymm1,0x40(%rsp) - vmovdqa 0x60(%rsp),%ymm0 - vperm2i128 $0x20,%ymm7,%ymm0,%ymm1 - vperm2i128 $0x31,%ymm7,%ymm0,%ymm7 - vmovdqa %ymm1,0x60(%rsp) - vperm2i128 $0x20,%ymm12,%ymm8,%ymm0 - vperm2i128 $0x31,%ymm12,%ymm8,%ymm12 - vmovdqa %ymm0,%ymm8 - vperm2i128 $0x20,%ymm13,%ymm9,%ymm0 - vperm2i128 $0x31,%ymm13,%ymm9,%ymm13 - vmovdqa %ymm0,%ymm9 - vperm2i128 $0x20,%ymm14,%ymm10,%ymm0 - vperm2i128 $0x31,%ymm14,%ymm10,%ymm14 - vmovdqa %ymm0,%ymm10 - vperm2i128 $0x20,%ymm15,%ymm11,%ymm0 - vperm2i128 $0x31,%ymm15,%ymm11,%ymm15 - vmovdqa %ymm0,%ymm11 - - # xor with corresponding input, write to output - vmovdqa 0x00(%rsp),%ymm0 - vpxor 0x0000(%rdx),%ymm0,%ymm0 - vmovdqu %ymm0,0x0000(%rsi) - vmovdqa 0x20(%rsp),%ymm0 - vpxor 0x0080(%rdx),%ymm0,%ymm0 - vmovdqu %ymm0,0x0080(%rsi) - vmovdqa 0x40(%rsp),%ymm0 - vpxor 0x0040(%rdx),%ymm0,%ymm0 - vmovdqu %ymm0,0x0040(%rsi) - vmovdqa 0x60(%rsp),%ymm0 - vpxor 0x00c0(%rdx),%ymm0,%ymm0 - vmovdqu %ymm0,0x00c0(%rsi) - vpxor 0x0100(%rdx),%ymm4,%ymm4 - vmovdqu %ymm4,0x0100(%rsi) - vpxor 0x0180(%rdx),%ymm5,%ymm5 - vmovdqu %ymm5,0x00180(%rsi) - vpxor 0x0140(%rdx),%ymm6,%ymm6 - vmovdqu %ymm6,0x0140(%rsi) - vpxor 0x01c0(%rdx),%ymm7,%ymm7 - vmovdqu %ymm7,0x01c0(%rsi) - vpxor 0x0020(%rdx),%ymm8,%ymm8 - vmovdqu %ymm8,0x0020(%rsi) - vpxor 0x00a0(%rdx),%ymm9,%ymm9 - vmovdqu %ymm9,0x00a0(%rsi) - vpxor 0x0060(%rdx),%ymm10,%ymm10 - vmovdqu %ymm10,0x0060(%rsi) - vpxor 0x00e0(%rdx),%ymm11,%ymm11 - vmovdqu %ymm11,0x00e0(%rsi) - vpxor 0x0120(%rdx),%ymm12,%ymm12 - vmovdqu %ymm12,0x0120(%rsi) - vpxor 0x01a0(%rdx),%ymm13,%ymm13 - vmovdqu %ymm13,0x01a0(%rsi) - vpxor 0x0160(%rdx),%ymm14,%ymm14 - vmovdqu %ymm14,0x0160(%rsi) - vpxor 0x01e0(%rdx),%ymm15,%ymm15 - vmovdqu %ymm15,0x01e0(%rsi) - - vzeroupper - lea -8(%r10),%rsp - ret -ENDPROC(chacha20_8block_xor_avx2) diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c deleted file mode 100644 index dce7c5d39c2f..000000000000 --- a/arch/x86/crypto/chacha20_glue.c +++ /dev/null @@ -1,146 +0,0 @@ -/* - * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code - * - * Copyright (C) 2015 Martin Willi - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include - -#define CHACHA20_STATE_ALIGN 16 - -asmlinkage void chacha20_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src); -asmlinkage void chacha20_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src); -#ifdef CONFIG_AS_AVX2 -asmlinkage void chacha20_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src); -static bool chacha20_use_avx2; -#endif - -static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src, - unsigned int bytes) -{ - u8 buf[CHACHA20_BLOCK_SIZE]; - -#ifdef CONFIG_AS_AVX2 - if (chacha20_use_avx2) { - while (bytes >= CHACHA20_BLOCK_SIZE * 8) { - chacha20_8block_xor_avx2(state, dst, src); - bytes -= CHACHA20_BLOCK_SIZE * 8; - src += CHACHA20_BLOCK_SIZE * 8; - dst += CHACHA20_BLOCK_SIZE * 8; - state[12] += 8; - } - } -#endif - while (bytes >= CHACHA20_BLOCK_SIZE * 4) { - chacha20_4block_xor_ssse3(state, dst, src); - bytes -= CHACHA20_BLOCK_SIZE * 4; - src += CHACHA20_BLOCK_SIZE * 4; - dst += CHACHA20_BLOCK_SIZE * 4; - state[12] += 4; - } - while (bytes >= CHACHA20_BLOCK_SIZE) { - chacha20_block_xor_ssse3(state, dst, src); - bytes -= CHACHA20_BLOCK_SIZE; - src += CHACHA20_BLOCK_SIZE; - dst += CHACHA20_BLOCK_SIZE; - state[12]++; - } - if (bytes) { - memcpy(buf, src, bytes); - chacha20_block_xor_ssse3(state, buf, buf); - memcpy(dst, buf, bytes); - } -} - -static int chacha20_simd(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); - u32 *state, state_buf[16 + 2] __aligned(8); - struct skcipher_walk walk; - int err; - - BUILD_BUG_ON(CHACHA20_STATE_ALIGN != 16); - state = PTR_ALIGN(state_buf + 0, CHACHA20_STATE_ALIGN); - - if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd()) - return crypto_chacha20_crypt(req); - - err = skcipher_walk_virt(&walk, req, true); - - crypto_chacha20_init(state, ctx, walk.iv); - - kernel_fpu_begin(); - - while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { - chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, - rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE)); - err = skcipher_walk_done(&walk, - walk.nbytes % CHACHA20_BLOCK_SIZE); - } - - if (walk.nbytes) { - chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, - walk.nbytes); - err = skcipher_walk_done(&walk, 0); - } - - kernel_fpu_end(); - - return err; -} - -static struct skcipher_alg alg = { - .base.cra_name = "chacha20", - .base.cra_driver_name = "chacha20-simd", - .base.cra_priority = 300, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha20_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA20_KEY_SIZE, - .max_keysize = CHACHA20_KEY_SIZE, - .ivsize = CHACHA20_IV_SIZE, - .chunksize = CHACHA20_BLOCK_SIZE, - .setkey = crypto_chacha20_setkey, - .encrypt = chacha20_simd, - .decrypt = chacha20_simd, -}; - -static int __init chacha20_simd_mod_init(void) -{ - if (!boot_cpu_has(X86_FEATURE_SSSE3)) - return -ENODEV; - -#ifdef CONFIG_AS_AVX2 - chacha20_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) && - boot_cpu_has(X86_FEATURE_AVX2) && - cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); -#endif - return crypto_register_skcipher(&alg); -} - -static void __exit chacha20_simd_mod_fini(void) -{ - crypto_unregister_skcipher(&alg); -} - -module_init(chacha20_simd_mod_init); -module_exit(chacha20_simd_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Martin Willi "); -MODULE_DESCRIPTION("chacha20 cipher algorithm, SIMD accelerated"); -MODULE_ALIAS_CRYPTO("chacha20"); -MODULE_ALIAS_CRYPTO("chacha20-simd"); diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c new file mode 100644 index 000000000000..45c1c4143176 --- /dev/null +++ b/arch/x86/crypto/chacha_glue.c @@ -0,0 +1,304 @@ +/* + * x64 SIMD accelerated ChaCha and XChaCha stream ciphers, + * including ChaCha20 (RFC7539) + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define CHACHA_STATE_ALIGN 16 + +asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src, + unsigned int len, int nrounds); +asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src, + unsigned int len, int nrounds); +asmlinkage void hchacha_block_ssse3(const u32 *state, u32 *out, int nrounds); +#ifdef CONFIG_AS_AVX2 +asmlinkage void chacha_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src, + unsigned int len, int nrounds); +asmlinkage void chacha_4block_xor_avx2(u32 *state, u8 *dst, const u8 *src, + unsigned int len, int nrounds); +asmlinkage void chacha_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src, + unsigned int len, int nrounds); +static bool chacha_use_avx2; +#ifdef CONFIG_AS_AVX512 +asmlinkage void chacha_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, + unsigned int len, int nrounds); +asmlinkage void chacha_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, + unsigned int len, int nrounds); +asmlinkage void chacha_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, + unsigned int len, int nrounds); +static bool chacha_use_avx512vl; +#endif +#endif + +static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks) +{ + len = min(len, maxblocks * CHACHA_BLOCK_SIZE); + return round_up(len, CHACHA_BLOCK_SIZE) / CHACHA_BLOCK_SIZE; +} + +static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) +{ +#ifdef CONFIG_AS_AVX2 +#ifdef CONFIG_AS_AVX512 + if (chacha_use_avx512vl) { + while (bytes >= CHACHA_BLOCK_SIZE * 8) { + chacha_8block_xor_avx512vl(state, dst, src, bytes, + nrounds); + bytes -= CHACHA_BLOCK_SIZE * 8; + src += CHACHA_BLOCK_SIZE * 8; + dst += CHACHA_BLOCK_SIZE * 8; + state[12] += 8; + } + if (bytes > CHACHA_BLOCK_SIZE * 4) { + chacha_8block_xor_avx512vl(state, dst, src, bytes, + nrounds); + state[12] += chacha_advance(bytes, 8); + return; + } + if (bytes > CHACHA_BLOCK_SIZE * 2) { + chacha_4block_xor_avx512vl(state, dst, src, bytes, + nrounds); + state[12] += chacha_advance(bytes, 4); + return; + } + if (bytes) { + chacha_2block_xor_avx512vl(state, dst, src, bytes, + nrounds); + state[12] += chacha_advance(bytes, 2); + return; + } + } +#endif + if (chacha_use_avx2) { + while (bytes >= CHACHA_BLOCK_SIZE * 8) { + chacha_8block_xor_avx2(state, dst, src, bytes, nrounds); + bytes -= CHACHA_BLOCK_SIZE * 8; + src += CHACHA_BLOCK_SIZE * 8; + dst += CHACHA_BLOCK_SIZE * 8; + state[12] += 8; + } + if (bytes > CHACHA_BLOCK_SIZE * 4) { + chacha_8block_xor_avx2(state, dst, src, bytes, nrounds); + state[12] += chacha_advance(bytes, 8); + return; + } + if (bytes > CHACHA_BLOCK_SIZE * 2) { + chacha_4block_xor_avx2(state, dst, src, bytes, nrounds); + state[12] += chacha_advance(bytes, 4); + return; + } + if (bytes > CHACHA_BLOCK_SIZE) { + chacha_2block_xor_avx2(state, dst, src, bytes, nrounds); + state[12] += chacha_advance(bytes, 2); + return; + } + } +#endif + while (bytes >= CHACHA_BLOCK_SIZE * 4) { + chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds); + bytes -= CHACHA_BLOCK_SIZE * 4; + src += CHACHA_BLOCK_SIZE * 4; + dst += CHACHA_BLOCK_SIZE * 4; + state[12] += 4; + } + if (bytes > CHACHA_BLOCK_SIZE) { + chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds); + state[12] += chacha_advance(bytes, 4); + return; + } + if (bytes) { + chacha_block_xor_ssse3(state, dst, src, bytes, nrounds); + state[12]++; + } +} + +static int chacha_simd_stream_xor(struct skcipher_walk *walk, + struct chacha_ctx *ctx, u8 *iv) +{ + u32 *state, state_buf[16 + 2] __aligned(8); + int next_yield = 4096; /* bytes until next FPU yield */ + int err = 0; + + BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16); + state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN); + + crypto_chacha_init(state, ctx, iv); + + while (walk->nbytes > 0) { + unsigned int nbytes = walk->nbytes; + + if (nbytes < walk->total) { + nbytes = round_down(nbytes, walk->stride); + next_yield -= nbytes; + } + + chacha_dosimd(state, walk->dst.virt.addr, walk->src.virt.addr, + nbytes, ctx->nrounds); + + if (next_yield <= 0) { + /* temporarily allow preemption */ + kernel_fpu_end(); + kernel_fpu_begin(); + next_yield = 4096; + } + + err = skcipher_walk_done(walk, walk->nbytes - nbytes); + } + + return err; +} + +static int chacha_simd(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + int err; + + if (req->cryptlen <= CHACHA_BLOCK_SIZE || !irq_fpu_usable()) + return crypto_chacha_crypt(req); + + err = skcipher_walk_virt(&walk, req, true); + if (err) + return err; + + kernel_fpu_begin(); + err = chacha_simd_stream_xor(&walk, ctx, req->iv); + kernel_fpu_end(); + return err; +} + +static int xchacha_simd(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + struct chacha_ctx subctx; + u32 *state, state_buf[16 + 2] __aligned(8); + u8 real_iv[16]; + int err; + + if (req->cryptlen <= CHACHA_BLOCK_SIZE || !irq_fpu_usable()) + return crypto_xchacha_crypt(req); + + err = skcipher_walk_virt(&walk, req, true); + if (err) + return err; + + BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16); + state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN); + crypto_chacha_init(state, ctx, req->iv); + + kernel_fpu_begin(); + + hchacha_block_ssse3(state, subctx.key, ctx->nrounds); + subctx.nrounds = ctx->nrounds; + + memcpy(&real_iv[0], req->iv + 24, 8); + memcpy(&real_iv[8], req->iv + 16, 8); + err = chacha_simd_stream_xor(&walk, &subctx, real_iv); + + kernel_fpu_end(); + + return err; +} + +static struct skcipher_alg algs[] = { + { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-simd", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = CHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = chacha_simd, + .decrypt = chacha_simd, + }, { + .base.cra_name = "xchacha20", + .base.cra_driver_name = "xchacha20-simd", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = xchacha_simd, + .decrypt = xchacha_simd, + }, { + .base.cra_name = "xchacha12", + .base.cra_driver_name = "xchacha12-simd", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = crypto_chacha12_setkey, + .encrypt = xchacha_simd, + .decrypt = xchacha_simd, + }, +}; + +static int __init chacha_simd_mod_init(void) +{ + if (!boot_cpu_has(X86_FEATURE_SSSE3)) + return -ENODEV; + +#ifdef CONFIG_AS_AVX2 + chacha_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_AVX2) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); +#ifdef CONFIG_AS_AVX512 + chacha_use_avx512vl = chacha_use_avx2 && + boot_cpu_has(X86_FEATURE_AVX512VL) && + boot_cpu_has(X86_FEATURE_AVX512BW); /* kmovq */ +#endif +#endif + return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); +} + +static void __exit chacha_simd_mod_fini(void) +{ + crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); +} + +module_init(chacha_simd_mod_init); +module_exit(chacha_simd_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Martin Willi "); +MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (x64 SIMD accelerated)"); +MODULE_ALIAS_CRYPTO("chacha20"); +MODULE_ALIAS_CRYPTO("chacha20-simd"); +MODULE_ALIAS_CRYPTO("xchacha20"); +MODULE_ALIAS_CRYPTO("xchacha20-simd"); +MODULE_ALIAS_CRYPTO("xchacha12"); +MODULE_ALIAS_CRYPTO("xchacha12-simd"); diff --git a/arch/x86/crypto/nh-avx2-x86_64.S b/arch/x86/crypto/nh-avx2-x86_64.S new file mode 100644 index 000000000000..f7946ea1b704 --- /dev/null +++ b/arch/x86/crypto/nh-avx2-x86_64.S @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * NH - ε-almost-universal hash function, x86_64 AVX2 accelerated + * + * Copyright 2018 Google LLC + * + * Author: Eric Biggers + */ + +#include + +#define PASS0_SUMS %ymm0 +#define PASS1_SUMS %ymm1 +#define PASS2_SUMS %ymm2 +#define PASS3_SUMS %ymm3 +#define K0 %ymm4 +#define K0_XMM %xmm4 +#define K1 %ymm5 +#define K1_XMM %xmm5 +#define K2 %ymm6 +#define K2_XMM %xmm6 +#define K3 %ymm7 +#define K3_XMM %xmm7 +#define T0 %ymm8 +#define T1 %ymm9 +#define T2 %ymm10 +#define T2_XMM %xmm10 +#define T3 %ymm11 +#define T3_XMM %xmm11 +#define T4 %ymm12 +#define T5 %ymm13 +#define T6 %ymm14 +#define T7 %ymm15 +#define KEY %rdi +#define MESSAGE %rsi +#define MESSAGE_LEN %rdx +#define HASH %rcx + +.macro _nh_2xstride k0, k1, k2, k3 + + // Add message words to key words + vpaddd \k0, T3, T0 + vpaddd \k1, T3, T1 + vpaddd \k2, T3, T2 + vpaddd \k3, T3, T3 + + // Multiply 32x32 => 64 and accumulate + vpshufd $0x10, T0, T4 + vpshufd $0x32, T0, T0 + vpshufd $0x10, T1, T5 + vpshufd $0x32, T1, T1 + vpshufd $0x10, T2, T6 + vpshufd $0x32, T2, T2 + vpshufd $0x10, T3, T7 + vpshufd $0x32, T3, T3 + vpmuludq T4, T0, T0 + vpmuludq T5, T1, T1 + vpmuludq T6, T2, T2 + vpmuludq T7, T3, T3 + vpaddq T0, PASS0_SUMS, PASS0_SUMS + vpaddq T1, PASS1_SUMS, PASS1_SUMS + vpaddq T2, PASS2_SUMS, PASS2_SUMS + vpaddq T3, PASS3_SUMS, PASS3_SUMS +.endm + +/* + * void nh_avx2(const u32 *key, const u8 *message, size_t message_len, + * u8 hash[NH_HASH_BYTES]) + * + * It's guaranteed that message_len % 16 == 0. + */ +ENTRY(nh_avx2) + + vmovdqu 0x00(KEY), K0 + vmovdqu 0x10(KEY), K1 + add $0x20, KEY + vpxor PASS0_SUMS, PASS0_SUMS, PASS0_SUMS + vpxor PASS1_SUMS, PASS1_SUMS, PASS1_SUMS + vpxor PASS2_SUMS, PASS2_SUMS, PASS2_SUMS + vpxor PASS3_SUMS, PASS3_SUMS, PASS3_SUMS + + sub $0x40, MESSAGE_LEN + jl .Lloop4_done +.Lloop4: + vmovdqu (MESSAGE), T3 + vmovdqu 0x00(KEY), K2 + vmovdqu 0x10(KEY), K3 + _nh_2xstride K0, K1, K2, K3 + + vmovdqu 0x20(MESSAGE), T3 + vmovdqu 0x20(KEY), K0 + vmovdqu 0x30(KEY), K1 + _nh_2xstride K2, K3, K0, K1 + + add $0x40, MESSAGE + add $0x40, KEY + sub $0x40, MESSAGE_LEN + jge .Lloop4 + +.Lloop4_done: + and $0x3f, MESSAGE_LEN + jz .Ldone + + cmp $0x20, MESSAGE_LEN + jl .Llast + + // 2 or 3 strides remain; do 2 more. + vmovdqu (MESSAGE), T3 + vmovdqu 0x00(KEY), K2 + vmovdqu 0x10(KEY), K3 + _nh_2xstride K0, K1, K2, K3 + add $0x20, MESSAGE + add $0x20, KEY + sub $0x20, MESSAGE_LEN + jz .Ldone + vmovdqa K2, K0 + vmovdqa K3, K1 +.Llast: + // Last stride. Zero the high 128 bits of the message and keys so they + // don't affect the result when processing them like 2 strides. + vmovdqu (MESSAGE), T3_XMM + vmovdqa K0_XMM, K0_XMM + vmovdqa K1_XMM, K1_XMM + vmovdqu 0x00(KEY), K2_XMM + vmovdqu 0x10(KEY), K3_XMM + _nh_2xstride K0, K1, K2, K3 + +.Ldone: + // Sum the accumulators for each pass, then store the sums to 'hash' + + // PASS0_SUMS is (0A 0B 0C 0D) + // PASS1_SUMS is (1A 1B 1C 1D) + // PASS2_SUMS is (2A 2B 2C 2D) + // PASS3_SUMS is (3A 3B 3C 3D) + // We need the horizontal sums: + // (0A + 0B + 0C + 0D, + // 1A + 1B + 1C + 1D, + // 2A + 2B + 2C + 2D, + // 3A + 3B + 3C + 3D) + // + + vpunpcklqdq PASS1_SUMS, PASS0_SUMS, T0 // T0 = (0A 1A 0C 1C) + vpunpckhqdq PASS1_SUMS, PASS0_SUMS, T1 // T1 = (0B 1B 0D 1D) + vpunpcklqdq PASS3_SUMS, PASS2_SUMS, T2 // T2 = (2A 3A 2C 3C) + vpunpckhqdq PASS3_SUMS, PASS2_SUMS, T3 // T3 = (2B 3B 2D 3D) + + vinserti128 $0x1, T2_XMM, T0, T4 // T4 = (0A 1A 2A 3A) + vinserti128 $0x1, T3_XMM, T1, T5 // T5 = (0B 1B 2B 3B) + vperm2i128 $0x31, T2, T0, T0 // T0 = (0C 1C 2C 3C) + vperm2i128 $0x31, T3, T1, T1 // T1 = (0D 1D 2D 3D) + + vpaddq T5, T4, T4 + vpaddq T1, T0, T0 + vpaddq T4, T0, T0 + vmovdqu T0, (HASH) + ret +ENDPROC(nh_avx2) diff --git a/arch/x86/crypto/nh-sse2-x86_64.S b/arch/x86/crypto/nh-sse2-x86_64.S new file mode 100644 index 000000000000..51f52d4ab4bb --- /dev/null +++ b/arch/x86/crypto/nh-sse2-x86_64.S @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * NH - ε-almost-universal hash function, x86_64 SSE2 accelerated + * + * Copyright 2018 Google LLC + * + * Author: Eric Biggers + */ + +#include + +#define PASS0_SUMS %xmm0 +#define PASS1_SUMS %xmm1 +#define PASS2_SUMS %xmm2 +#define PASS3_SUMS %xmm3 +#define K0 %xmm4 +#define K1 %xmm5 +#define K2 %xmm6 +#define K3 %xmm7 +#define T0 %xmm8 +#define T1 %xmm9 +#define T2 %xmm10 +#define T3 %xmm11 +#define T4 %xmm12 +#define T5 %xmm13 +#define T6 %xmm14 +#define T7 %xmm15 +#define KEY %rdi +#define MESSAGE %rsi +#define MESSAGE_LEN %rdx +#define HASH %rcx + +.macro _nh_stride k0, k1, k2, k3, offset + + // Load next message stride + movdqu \offset(MESSAGE), T1 + + // Load next key stride + movdqu \offset(KEY), \k3 + + // Add message words to key words + movdqa T1, T2 + movdqa T1, T3 + paddd T1, \k0 // reuse k0 to avoid a move + paddd \k1, T1 + paddd \k2, T2 + paddd \k3, T3 + + // Multiply 32x32 => 64 and accumulate + pshufd $0x10, \k0, T4 + pshufd $0x32, \k0, \k0 + pshufd $0x10, T1, T5 + pshufd $0x32, T1, T1 + pshufd $0x10, T2, T6 + pshufd $0x32, T2, T2 + pshufd $0x10, T3, T7 + pshufd $0x32, T3, T3 + pmuludq T4, \k0 + pmuludq T5, T1 + pmuludq T6, T2 + pmuludq T7, T3 + paddq \k0, PASS0_SUMS + paddq T1, PASS1_SUMS + paddq T2, PASS2_SUMS + paddq T3, PASS3_SUMS +.endm + +/* + * void nh_sse2(const u32 *key, const u8 *message, size_t message_len, + * u8 hash[NH_HASH_BYTES]) + * + * It's guaranteed that message_len % 16 == 0. + */ +ENTRY(nh_sse2) + + movdqu 0x00(KEY), K0 + movdqu 0x10(KEY), K1 + movdqu 0x20(KEY), K2 + add $0x30, KEY + pxor PASS0_SUMS, PASS0_SUMS + pxor PASS1_SUMS, PASS1_SUMS + pxor PASS2_SUMS, PASS2_SUMS + pxor PASS3_SUMS, PASS3_SUMS + + sub $0x40, MESSAGE_LEN + jl .Lloop4_done +.Lloop4: + _nh_stride K0, K1, K2, K3, 0x00 + _nh_stride K1, K2, K3, K0, 0x10 + _nh_stride K2, K3, K0, K1, 0x20 + _nh_stride K3, K0, K1, K2, 0x30 + add $0x40, KEY + add $0x40, MESSAGE + sub $0x40, MESSAGE_LEN + jge .Lloop4 + +.Lloop4_done: + and $0x3f, MESSAGE_LEN + jz .Ldone + _nh_stride K0, K1, K2, K3, 0x00 + + sub $0x10, MESSAGE_LEN + jz .Ldone + _nh_stride K1, K2, K3, K0, 0x10 + + sub $0x10, MESSAGE_LEN + jz .Ldone + _nh_stride K2, K3, K0, K1, 0x20 + +.Ldone: + // Sum the accumulators for each pass, then store the sums to 'hash' + movdqa PASS0_SUMS, T0 + movdqa PASS2_SUMS, T1 + punpcklqdq PASS1_SUMS, T0 // => (PASS0_SUM_A PASS1_SUM_A) + punpcklqdq PASS3_SUMS, T1 // => (PASS2_SUM_A PASS3_SUM_A) + punpckhqdq PASS1_SUMS, PASS0_SUMS // => (PASS0_SUM_B PASS1_SUM_B) + punpckhqdq PASS3_SUMS, PASS2_SUMS // => (PASS2_SUM_B PASS3_SUM_B) + paddq PASS0_SUMS, T0 + paddq PASS2_SUMS, T1 + movdqu T0, 0x00(HASH) + movdqu T1, 0x10(HASH) + ret +ENDPROC(nh_sse2) diff --git a/arch/x86/crypto/nhpoly1305-avx2-glue.c b/arch/x86/crypto/nhpoly1305-avx2-glue.c new file mode 100644 index 000000000000..20d815ea4b6a --- /dev/null +++ b/arch/x86/crypto/nhpoly1305-avx2-glue.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NHPoly1305 - ε-almost-∆-universal hash function for Adiantum + * (AVX2 accelerated version) + * + * Copyright 2018 Google LLC + */ + +#include +#include +#include +#include + +asmlinkage void nh_avx2(const u32 *key, const u8 *message, size_t message_len, + u8 hash[NH_HASH_BYTES]); + +/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */ +static void _nh_avx2(const u32 *key, const u8 *message, size_t message_len, + __le64 hash[NH_NUM_PASSES]) +{ + nh_avx2(key, message, message_len, (u8 *)hash); +} + +static int nhpoly1305_avx2_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + if (srclen < 64 || !irq_fpu_usable()) + return crypto_nhpoly1305_update(desc, src, srclen); + + do { + unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE); + + kernel_fpu_begin(); + crypto_nhpoly1305_update_helper(desc, src, n, _nh_avx2); + kernel_fpu_end(); + src += n; + srclen -= n; + } while (srclen); + return 0; +} + +static struct shash_alg nhpoly1305_alg = { + .base.cra_name = "nhpoly1305", + .base.cra_driver_name = "nhpoly1305-avx2", + .base.cra_priority = 300, + .base.cra_ctxsize = sizeof(struct nhpoly1305_key), + .base.cra_module = THIS_MODULE, + .digestsize = POLY1305_DIGEST_SIZE, + .init = crypto_nhpoly1305_init, + .update = nhpoly1305_avx2_update, + .final = crypto_nhpoly1305_final, + .setkey = crypto_nhpoly1305_setkey, + .descsize = sizeof(struct nhpoly1305_state), +}; + +static int __init nhpoly1305_mod_init(void) +{ + if (!boot_cpu_has(X86_FEATURE_AVX2) || + !boot_cpu_has(X86_FEATURE_OSXSAVE)) + return -ENODEV; + + return crypto_register_shash(&nhpoly1305_alg); +} + +static void __exit nhpoly1305_mod_exit(void) +{ + crypto_unregister_shash(&nhpoly1305_alg); +} + +module_init(nhpoly1305_mod_init); +module_exit(nhpoly1305_mod_exit); + +MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function (AVX2-accelerated)"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Eric Biggers "); +MODULE_ALIAS_CRYPTO("nhpoly1305"); +MODULE_ALIAS_CRYPTO("nhpoly1305-avx2"); diff --git a/arch/x86/crypto/nhpoly1305-sse2-glue.c b/arch/x86/crypto/nhpoly1305-sse2-glue.c new file mode 100644 index 000000000000..ed68d164ce14 --- /dev/null +++ b/arch/x86/crypto/nhpoly1305-sse2-glue.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NHPoly1305 - ε-almost-∆-universal hash function for Adiantum + * (SSE2 accelerated version) + * + * Copyright 2018 Google LLC + */ + +#include +#include +#include +#include + +asmlinkage void nh_sse2(const u32 *key, const u8 *message, size_t message_len, + u8 hash[NH_HASH_BYTES]); + +/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */ +static void _nh_sse2(const u32 *key, const u8 *message, size_t message_len, + __le64 hash[NH_NUM_PASSES]) +{ + nh_sse2(key, message, message_len, (u8 *)hash); +} + +static int nhpoly1305_sse2_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + if (srclen < 64 || !irq_fpu_usable()) + return crypto_nhpoly1305_update(desc, src, srclen); + + do { + unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE); + + kernel_fpu_begin(); + crypto_nhpoly1305_update_helper(desc, src, n, _nh_sse2); + kernel_fpu_end(); + src += n; + srclen -= n; + } while (srclen); + return 0; +} + +static struct shash_alg nhpoly1305_alg = { + .base.cra_name = "nhpoly1305", + .base.cra_driver_name = "nhpoly1305-sse2", + .base.cra_priority = 200, + .base.cra_ctxsize = sizeof(struct nhpoly1305_key), + .base.cra_module = THIS_MODULE, + .digestsize = POLY1305_DIGEST_SIZE, + .init = crypto_nhpoly1305_init, + .update = nhpoly1305_sse2_update, + .final = crypto_nhpoly1305_final, + .setkey = crypto_nhpoly1305_setkey, + .descsize = sizeof(struct nhpoly1305_state), +}; + +static int __init nhpoly1305_mod_init(void) +{ + if (!boot_cpu_has(X86_FEATURE_XMM2)) + return -ENODEV; + + return crypto_register_shash(&nhpoly1305_alg); +} + +static void __exit nhpoly1305_mod_exit(void) +{ + crypto_unregister_shash(&nhpoly1305_alg); +} + +module_init(nhpoly1305_mod_init); +module_exit(nhpoly1305_mod_exit); + +MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function (SSE2-accelerated)"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Eric Biggers "); +MODULE_ALIAS_CRYPTO("nhpoly1305"); +MODULE_ALIAS_CRYPTO("nhpoly1305-sse2"); diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c index f012b7e28ad1..88cc01506c84 100644 --- a/arch/x86/crypto/poly1305_glue.c +++ b/arch/x86/crypto/poly1305_glue.c @@ -83,35 +83,37 @@ static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx, if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) { if (unlikely(!sctx->wset)) { if (!sctx->uset) { - memcpy(sctx->u, dctx->r, sizeof(sctx->u)); - poly1305_simd_mult(sctx->u, dctx->r); + memcpy(sctx->u, dctx->r.r, sizeof(sctx->u)); + poly1305_simd_mult(sctx->u, dctx->r.r); sctx->uset = true; } memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u)); - poly1305_simd_mult(sctx->u + 5, dctx->r); + poly1305_simd_mult(sctx->u + 5, dctx->r.r); memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u)); - poly1305_simd_mult(sctx->u + 10, dctx->r); + poly1305_simd_mult(sctx->u + 10, dctx->r.r); sctx->wset = true; } blocks = srclen / (POLY1305_BLOCK_SIZE * 4); - poly1305_4block_avx2(dctx->h, src, dctx->r, blocks, sctx->u); + poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks, + sctx->u); src += POLY1305_BLOCK_SIZE * 4 * blocks; srclen -= POLY1305_BLOCK_SIZE * 4 * blocks; } #endif if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) { if (unlikely(!sctx->uset)) { - memcpy(sctx->u, dctx->r, sizeof(sctx->u)); - poly1305_simd_mult(sctx->u, dctx->r); + memcpy(sctx->u, dctx->r.r, sizeof(sctx->u)); + poly1305_simd_mult(sctx->u, dctx->r.r); sctx->uset = true; } blocks = srclen / (POLY1305_BLOCK_SIZE * 2); - poly1305_2block_sse2(dctx->h, src, dctx->r, blocks, sctx->u); + poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks, + sctx->u); src += POLY1305_BLOCK_SIZE * 2 * blocks; srclen -= POLY1305_BLOCK_SIZE * 2 * blocks; } if (srclen >= POLY1305_BLOCK_SIZE) { - poly1305_block_sse2(dctx->h, src, dctx->r, 1); + poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1); srclen -= POLY1305_BLOCK_SIZE; } return srclen; diff --git a/crypto/Kconfig b/crypto/Kconfig index 05c91eb10ca1..045af6eeb7e2 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -430,11 +430,14 @@ config CRYPTO_CTS help CTS: Cipher Text Stealing This is the Cipher Text Stealing mode as described by - Section 8 of rfc2040 and referenced by rfc3962. - (rfc3962 includes errata information in its Appendix A) + Section 8 of rfc2040 and referenced by rfc3962 + (rfc3962 includes errata information in its Appendix A) or + CBC-CS3 as defined by NIST in Sp800-38A addendum from Oct 2010. This mode is required for Kerberos gss mechanism support for AES encryption. + See: https://csrc.nist.gov/publications/detail/sp/800-38a/addendum/final + config CRYPTO_ECB tristate "ECB support" select CRYPTO_BLKCIPHER @@ -493,6 +496,50 @@ config CRYPTO_KEYWRAP Support for key wrapping (NIST SP800-38F / RFC3394) without padding. +config CRYPTO_NHPOLY1305 + tristate + select CRYPTO_HASH + select CRYPTO_POLY1305 + +config CRYPTO_NHPOLY1305_SSE2 + tristate "NHPoly1305 hash function (x86_64 SSE2 implementation)" + depends on X86 && 64BIT + select CRYPTO_NHPOLY1305 + help + SSE2 optimized implementation of the hash function used by the + Adiantum encryption mode. + +config CRYPTO_NHPOLY1305_AVX2 + tristate "NHPoly1305 hash function (x86_64 AVX2 implementation)" + depends on X86 && 64BIT + select CRYPTO_NHPOLY1305 + help + AVX2 optimized implementation of the hash function used by the + Adiantum encryption mode. + +config CRYPTO_ADIANTUM + tristate "Adiantum support" + select CRYPTO_CHACHA20 + select CRYPTO_POLY1305 + select CRYPTO_NHPOLY1305 + help + Adiantum is a tweakable, length-preserving encryption mode + designed for fast and secure disk encryption, especially on + CPUs without dedicated crypto instructions. It encrypts + each sector using the XChaCha12 stream cipher, two passes of + an ε-almost-∆-universal hash function, and an invocation of + the AES-256 block cipher on a single 16-byte block. On CPUs + without AES instructions, Adiantum is much faster than + AES-XTS. + + Adiantum's security is provably reducible to that of its + underlying stream and block ciphers, subject to a security + bound. Unlike XTS, Adiantum is a true wide-block encryption + mode, so it actually provides an even stronger notion of + security than XTS, subject to the security bound. + + If unsure, say N. + comment "Hash modes" config CRYPTO_CMAC @@ -936,6 +983,18 @@ config CRYPTO_SM3 http://www.oscca.gov.cn/UpFile/20101222141857786.pdf https://datatracker.ietf.org/doc/html/draft-shen-sm3-hash +config CRYPTO_STREEBOG + tristate "Streebog Hash Function" + select CRYPTO_HASH + help + Streebog Hash Function (GOST R 34.11-2012, RFC 6986) is one of the Russian + cryptographic standard algorithms (called GOST algorithms). + This setting enables two hash algorithms with 256 and 512 bits output. + + References: + https://tc26.ru/upload/iblock/fed/feddbb4d26b685903faa2ba11aea43f6.pdf + https://tools.ietf.org/html/rfc6986 + config CRYPTO_TGR192 tristate "Tiger digest algorithms" select CRYPTO_HASH @@ -1006,7 +1065,8 @@ config CRYPTO_AES_TI 8 for decryption), this implementation only uses just two S-boxes of 256 bytes each, and attempts to eliminate data dependent latencies by prefetching the entire table into the cache at the start of each - block. + block. Interrupts are also disabled to avoid races where cachelines + are evicted when the CPU is interrupted to do something else. config CRYPTO_AES_586 tristate "AES cipher algorithms (i586)" @@ -1387,32 +1447,34 @@ config CRYPTO_SALSA20 Bernstein . See config CRYPTO_CHACHA20 - tristate "ChaCha20 cipher algorithm" + tristate "ChaCha stream cipher algorithms" select CRYPTO_BLKCIPHER help - ChaCha20 cipher algorithm, RFC7539. + The ChaCha20, XChaCha20, and XChaCha12 stream cipher algorithms. ChaCha20 is a 256-bit high-speed stream cipher designed by Daniel J. Bernstein and further specified in RFC7539 for use in IETF protocols. - This is the portable C implementation of ChaCha20. - - See also: + This is the portable C implementation of ChaCha20. See also: + XChaCha20 is the application of the XSalsa20 construction to ChaCha20 + rather than to Salsa20. XChaCha20 extends ChaCha20's nonce length + from 64 bits (or 96 bits using the RFC7539 convention) to 192 bits, + while provably retaining ChaCha20's security. See also: + + + XChaCha12 is XChaCha20 reduced to 12 rounds, with correspondingly + reduced security margin but increased performance. It can be needed + in some performance-sensitive scenarios. + config CRYPTO_CHACHA20_X86_64 - tristate "ChaCha20 cipher algorithm (x86_64/SSSE3/AVX2)" + tristate "ChaCha stream cipher algorithms (x86_64/SSSE3/AVX2/AVX-512VL)" depends on X86 && 64BIT select CRYPTO_BLKCIPHER select CRYPTO_CHACHA20 help - ChaCha20 cipher algorithm, RFC7539. - - ChaCha20 is a 256-bit high-speed stream cipher designed by Daniel J. - Bernstein and further specified in RFC7539 for use in IETF protocols. - This is the x86_64 assembler implementation using SIMD instructions. - - See also: - + SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20, + XChaCha20, and XChaCha12 stream ciphers. config CRYPTO_SEED tristate "SEED cipher algorithm" @@ -1812,7 +1874,8 @@ config CRYPTO_USER_API_AEAD cipher algorithms. config CRYPTO_STATS - bool + bool "Crypto usage statistics for User-space" + depends on CRYPTO_USER help This option enables the gathering of crypto stats. This will collect: diff --git a/crypto/Makefile b/crypto/Makefile index 5c207c76abf7..799ed5e94606 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -54,7 +54,8 @@ cryptomgr-y := algboss.o testmgr.o obj-$(CONFIG_CRYPTO_MANAGER2) += cryptomgr.o obj-$(CONFIG_CRYPTO_USER) += crypto_user.o -crypto_user-y := crypto_user_base.o crypto_user_stat.o +crypto_user-y := crypto_user_base.o +crypto_user-$(CONFIG_CRYPTO_STATS) += crypto_user_stat.o obj-$(CONFIG_CRYPTO_CMAC) += cmac.o obj-$(CONFIG_CRYPTO_HMAC) += hmac.o obj-$(CONFIG_CRYPTO_VMAC) += vmac.o @@ -71,6 +72,7 @@ obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o obj-$(CONFIG_CRYPTO_SHA3) += sha3_generic.o obj-$(CONFIG_CRYPTO_SM3) += sm3_generic.o +obj-$(CONFIG_CRYPTO_STREEBOG) += streebog_generic.o obj-$(CONFIG_CRYPTO_WP512) += wp512.o CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149 obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o @@ -84,6 +86,8 @@ obj-$(CONFIG_CRYPTO_LRW) += lrw.o obj-$(CONFIG_CRYPTO_XTS) += xts.o obj-$(CONFIG_CRYPTO_CTR) += ctr.o obj-$(CONFIG_CRYPTO_KEYWRAP) += keywrap.o +obj-$(CONFIG_CRYPTO_ADIANTUM) += adiantum.o +obj-$(CONFIG_CRYPTO_NHPOLY1305) += nhpoly1305.o obj-$(CONFIG_CRYPTO_GCM) += gcm.o obj-$(CONFIG_CRYPTO_CCM) += ccm.o obj-$(CONFIG_CRYPTO_CHACHA20POLY1305) += chacha20poly1305.o @@ -116,7 +120,7 @@ obj-$(CONFIG_CRYPTO_KHAZAD) += khazad.o obj-$(CONFIG_CRYPTO_ANUBIS) += anubis.o obj-$(CONFIG_CRYPTO_SEED) += seed.o obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o -obj-$(CONFIG_CRYPTO_CHACHA20) += chacha20_generic.o +obj-$(CONFIG_CRYPTO_CHACHA20) += chacha_generic.o obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_generic.o obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o diff --git a/crypto/ablkcipher.c b/crypto/ablkcipher.c index 8882e90e868e..b339587073c3 100644 --- a/crypto/ablkcipher.c +++ b/crypto/ablkcipher.c @@ -365,23 +365,18 @@ static int crypto_ablkcipher_report(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_blkcipher rblkcipher; - strncpy(rblkcipher.type, "ablkcipher", sizeof(rblkcipher.type)); - strncpy(rblkcipher.geniv, alg->cra_ablkcipher.geniv ?: "", - sizeof(rblkcipher.geniv)); - rblkcipher.geniv[sizeof(rblkcipher.geniv) - 1] = '\0'; + memset(&rblkcipher, 0, sizeof(rblkcipher)); + + strscpy(rblkcipher.type, "ablkcipher", sizeof(rblkcipher.type)); + strscpy(rblkcipher.geniv, "", sizeof(rblkcipher.geniv)); rblkcipher.blocksize = alg->cra_blocksize; rblkcipher.min_keysize = alg->cra_ablkcipher.min_keysize; rblkcipher.max_keysize = alg->cra_ablkcipher.max_keysize; rblkcipher.ivsize = alg->cra_ablkcipher.ivsize; - if (nla_put(skb, CRYPTOCFGA_REPORT_BLKCIPHER, - sizeof(struct crypto_report_blkcipher), &rblkcipher)) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -EMSGSIZE; + return nla_put(skb, CRYPTOCFGA_REPORT_BLKCIPHER, + sizeof(rblkcipher), &rblkcipher); } #else static int crypto_ablkcipher_report(struct sk_buff *skb, struct crypto_alg *alg) @@ -403,7 +398,7 @@ static void crypto_ablkcipher_show(struct seq_file *m, struct crypto_alg *alg) seq_printf(m, "min keysize : %u\n", ablkcipher->min_keysize); seq_printf(m, "max keysize : %u\n", ablkcipher->max_keysize); seq_printf(m, "ivsize : %u\n", ablkcipher->ivsize); - seq_printf(m, "geniv : %s\n", ablkcipher->geniv ?: ""); + seq_printf(m, "geniv : \n"); } const struct crypto_type crypto_ablkcipher_type = { @@ -415,78 +410,3 @@ const struct crypto_type crypto_ablkcipher_type = { .report = crypto_ablkcipher_report, }; EXPORT_SYMBOL_GPL(crypto_ablkcipher_type); - -static int crypto_init_givcipher_ops(struct crypto_tfm *tfm, u32 type, - u32 mask) -{ - struct ablkcipher_alg *alg = &tfm->__crt_alg->cra_ablkcipher; - struct ablkcipher_tfm *crt = &tfm->crt_ablkcipher; - - if (alg->ivsize > PAGE_SIZE / 8) - return -EINVAL; - - crt->setkey = tfm->__crt_alg->cra_flags & CRYPTO_ALG_GENIV ? - alg->setkey : setkey; - crt->encrypt = alg->encrypt; - crt->decrypt = alg->decrypt; - crt->base = __crypto_ablkcipher_cast(tfm); - crt->ivsize = alg->ivsize; - - return 0; -} - -#ifdef CONFIG_NET -static int crypto_givcipher_report(struct sk_buff *skb, struct crypto_alg *alg) -{ - struct crypto_report_blkcipher rblkcipher; - - strncpy(rblkcipher.type, "givcipher", sizeof(rblkcipher.type)); - strncpy(rblkcipher.geniv, alg->cra_ablkcipher.geniv ?: "", - sizeof(rblkcipher.geniv)); - rblkcipher.geniv[sizeof(rblkcipher.geniv) - 1] = '\0'; - - rblkcipher.blocksize = alg->cra_blocksize; - rblkcipher.min_keysize = alg->cra_ablkcipher.min_keysize; - rblkcipher.max_keysize = alg->cra_ablkcipher.max_keysize; - rblkcipher.ivsize = alg->cra_ablkcipher.ivsize; - - if (nla_put(skb, CRYPTOCFGA_REPORT_BLKCIPHER, - sizeof(struct crypto_report_blkcipher), &rblkcipher)) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -EMSGSIZE; -} -#else -static int crypto_givcipher_report(struct sk_buff *skb, struct crypto_alg *alg) -{ - return -ENOSYS; -} -#endif - -static void crypto_givcipher_show(struct seq_file *m, struct crypto_alg *alg) - __maybe_unused; -static void crypto_givcipher_show(struct seq_file *m, struct crypto_alg *alg) -{ - struct ablkcipher_alg *ablkcipher = &alg->cra_ablkcipher; - - seq_printf(m, "type : givcipher\n"); - seq_printf(m, "async : %s\n", alg->cra_flags & CRYPTO_ALG_ASYNC ? - "yes" : "no"); - seq_printf(m, "blocksize : %u\n", alg->cra_blocksize); - seq_printf(m, "min keysize : %u\n", ablkcipher->min_keysize); - seq_printf(m, "max keysize : %u\n", ablkcipher->max_keysize); - seq_printf(m, "ivsize : %u\n", ablkcipher->ivsize); - seq_printf(m, "geniv : %s\n", ablkcipher->geniv ?: ""); -} - -const struct crypto_type crypto_givcipher_type = { - .ctxsize = crypto_ablkcipher_ctxsize, - .init = crypto_init_givcipher_ops, -#ifdef CONFIG_PROC_FS - .show = crypto_givcipher_show, -#endif - .report = crypto_givcipher_report, -}; -EXPORT_SYMBOL_GPL(crypto_givcipher_type); diff --git a/crypto/acompress.c b/crypto/acompress.c index 1544b7c057fb..0c5bedd06e70 100644 --- a/crypto/acompress.c +++ b/crypto/acompress.c @@ -33,15 +33,11 @@ static int crypto_acomp_report(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_acomp racomp; - strncpy(racomp.type, "acomp", sizeof(racomp.type)); + memset(&racomp, 0, sizeof(racomp)); - if (nla_put(skb, CRYPTOCFGA_REPORT_ACOMP, - sizeof(struct crypto_report_acomp), &racomp)) - goto nla_put_failure; - return 0; + strscpy(racomp.type, "acomp", sizeof(racomp.type)); -nla_put_failure: - return -EMSGSIZE; + return nla_put(skb, CRYPTOCFGA_REPORT_ACOMP, sizeof(racomp), &racomp); } #else static int crypto_acomp_report(struct sk_buff *skb, struct crypto_alg *alg) diff --git a/crypto/adiantum.c b/crypto/adiantum.c new file mode 100644 index 000000000000..6651e713c45d --- /dev/null +++ b/crypto/adiantum.c @@ -0,0 +1,664 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Adiantum length-preserving encryption mode + * + * Copyright 2018 Google LLC + */ + +/* + * Adiantum is a tweakable, length-preserving encryption mode designed for fast + * and secure disk encryption, especially on CPUs without dedicated crypto + * instructions. Adiantum encrypts each sector using the XChaCha12 stream + * cipher, two passes of an ε-almost-∆-universal (ε-∆U) hash function based on + * NH and Poly1305, and an invocation of the AES-256 block cipher on a single + * 16-byte block. See the paper for details: + * + * Adiantum: length-preserving encryption for entry-level processors + * (https://eprint.iacr.org/2018/720.pdf) + * + * For flexibility, this implementation also allows other ciphers: + * + * - Stream cipher: XChaCha12 or XChaCha20 + * - Block cipher: any with a 128-bit block size and 256-bit key + * + * This implementation doesn't currently allow other ε-∆U hash functions, i.e. + * HPolyC is not supported. This is because Adiantum is ~20% faster than HPolyC + * but still provably as secure, and also the ε-∆U hash function of HBSH is + * formally defined to take two inputs (tweak, message) which makes it difficult + * to wrap with the crypto_shash API. Rather, some details need to be handled + * here. Nevertheless, if needed in the future, support for other ε-∆U hash + * functions could be added here. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "internal.h" + +/* + * Size of right-hand part of input data, in bytes; also the size of the block + * cipher's block size and the hash function's output. + */ +#define BLOCKCIPHER_BLOCK_SIZE 16 + +/* Size of the block cipher key (K_E) in bytes */ +#define BLOCKCIPHER_KEY_SIZE 32 + +/* Size of the hash key (K_H) in bytes */ +#define HASH_KEY_SIZE (POLY1305_BLOCK_SIZE + NHPOLY1305_KEY_SIZE) + +/* + * The specification allows variable-length tweaks, but Linux's crypto API + * currently only allows algorithms to support a single length. The "natural" + * tweak length for Adiantum is 16, since that fits into one Poly1305 block for + * the best performance. But longer tweaks are useful for fscrypt, to avoid + * needing to derive per-file keys. So instead we use two blocks, or 32 bytes. + */ +#define TWEAK_SIZE 32 + +struct adiantum_instance_ctx { + struct crypto_skcipher_spawn streamcipher_spawn; + struct crypto_spawn blockcipher_spawn; + struct crypto_shash_spawn hash_spawn; +}; + +struct adiantum_tfm_ctx { + struct crypto_skcipher *streamcipher; + struct crypto_cipher *blockcipher; + struct crypto_shash *hash; + struct poly1305_key header_hash_key; +}; + +struct adiantum_request_ctx { + + /* + * Buffer for right-hand part of data, i.e. + * + * P_L => P_M => C_M => C_R when encrypting, or + * C_R => C_M => P_M => P_L when decrypting. + * + * Also used to build the IV for the stream cipher. + */ + union { + u8 bytes[XCHACHA_IV_SIZE]; + __le32 words[XCHACHA_IV_SIZE / sizeof(__le32)]; + le128 bignum; /* interpret as element of Z/(2^{128}Z) */ + } rbuf; + + bool enc; /* true if encrypting, false if decrypting */ + + /* + * The result of the Poly1305 ε-∆U hash function applied to + * (bulk length, tweak) + */ + le128 header_hash; + + /* Sub-requests, must be last */ + union { + struct shash_desc hash_desc; + struct skcipher_request streamcipher_req; + } u; +}; + +/* + * Given the XChaCha stream key K_S, derive the block cipher key K_E and the + * hash key K_H as follows: + * + * K_E || K_H || ... = XChaCha(key=K_S, nonce=1||0^191) + * + * Note that this denotes using bits from the XChaCha keystream, which here we + * get indirectly by encrypting a buffer containing all 0's. + */ +static int adiantum_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); + struct { + u8 iv[XCHACHA_IV_SIZE]; + u8 derived_keys[BLOCKCIPHER_KEY_SIZE + HASH_KEY_SIZE]; + struct scatterlist sg; + struct crypto_wait wait; + struct skcipher_request req; /* must be last */ + } *data; + u8 *keyp; + int err; + + /* Set the stream cipher key (K_S) */ + crypto_skcipher_clear_flags(tctx->streamcipher, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(tctx->streamcipher, + crypto_skcipher_get_flags(tfm) & + CRYPTO_TFM_REQ_MASK); + err = crypto_skcipher_setkey(tctx->streamcipher, key, keylen); + crypto_skcipher_set_flags(tfm, + crypto_skcipher_get_flags(tctx->streamcipher) & + CRYPTO_TFM_RES_MASK); + if (err) + return err; + + /* Derive the subkeys */ + data = kzalloc(sizeof(*data) + + crypto_skcipher_reqsize(tctx->streamcipher), GFP_KERNEL); + if (!data) + return -ENOMEM; + data->iv[0] = 1; + sg_init_one(&data->sg, data->derived_keys, sizeof(data->derived_keys)); + crypto_init_wait(&data->wait); + skcipher_request_set_tfm(&data->req, tctx->streamcipher); + skcipher_request_set_callback(&data->req, CRYPTO_TFM_REQ_MAY_SLEEP | + CRYPTO_TFM_REQ_MAY_BACKLOG, + crypto_req_done, &data->wait); + skcipher_request_set_crypt(&data->req, &data->sg, &data->sg, + sizeof(data->derived_keys), data->iv); + err = crypto_wait_req(crypto_skcipher_encrypt(&data->req), &data->wait); + if (err) + goto out; + keyp = data->derived_keys; + + /* Set the block cipher key (K_E) */ + crypto_cipher_clear_flags(tctx->blockcipher, CRYPTO_TFM_REQ_MASK); + crypto_cipher_set_flags(tctx->blockcipher, + crypto_skcipher_get_flags(tfm) & + CRYPTO_TFM_REQ_MASK); + err = crypto_cipher_setkey(tctx->blockcipher, keyp, + BLOCKCIPHER_KEY_SIZE); + crypto_skcipher_set_flags(tfm, + crypto_cipher_get_flags(tctx->blockcipher) & + CRYPTO_TFM_RES_MASK); + if (err) + goto out; + keyp += BLOCKCIPHER_KEY_SIZE; + + /* Set the hash key (K_H) */ + poly1305_core_setkey(&tctx->header_hash_key, keyp); + keyp += POLY1305_BLOCK_SIZE; + + crypto_shash_clear_flags(tctx->hash, CRYPTO_TFM_REQ_MASK); + crypto_shash_set_flags(tctx->hash, crypto_skcipher_get_flags(tfm) & + CRYPTO_TFM_REQ_MASK); + err = crypto_shash_setkey(tctx->hash, keyp, NHPOLY1305_KEY_SIZE); + crypto_skcipher_set_flags(tfm, crypto_shash_get_flags(tctx->hash) & + CRYPTO_TFM_RES_MASK); + keyp += NHPOLY1305_KEY_SIZE; + WARN_ON(keyp != &data->derived_keys[ARRAY_SIZE(data->derived_keys)]); +out: + kzfree(data); + return err; +} + +/* Addition in Z/(2^{128}Z) */ +static inline void le128_add(le128 *r, const le128 *v1, const le128 *v2) +{ + u64 x = le64_to_cpu(v1->b); + u64 y = le64_to_cpu(v2->b); + + r->b = cpu_to_le64(x + y); + r->a = cpu_to_le64(le64_to_cpu(v1->a) + le64_to_cpu(v2->a) + + (x + y < x)); +} + +/* Subtraction in Z/(2^{128}Z) */ +static inline void le128_sub(le128 *r, const le128 *v1, const le128 *v2) +{ + u64 x = le64_to_cpu(v1->b); + u64 y = le64_to_cpu(v2->b); + + r->b = cpu_to_le64(x - y); + r->a = cpu_to_le64(le64_to_cpu(v1->a) - le64_to_cpu(v2->a) - + (x - y > x)); +} + +/* + * Apply the Poly1305 ε-∆U hash function to (bulk length, tweak) and save the + * result to rctx->header_hash. This is the calculation + * + * H_T ← Poly1305_{K_T}(bin_{128}(|L|) || T) + * + * from the procedure in section 6.4 of the Adiantum paper. The resulting value + * is reused in both the first and second hash steps. Specifically, it's added + * to the result of an independently keyed ε-∆U hash function (for equal length + * inputs only) taken over the left-hand part (the "bulk") of the message, to + * give the overall Adiantum hash of the (tweak, left-hand part) pair. + */ +static void adiantum_hash_header(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); + struct adiantum_request_ctx *rctx = skcipher_request_ctx(req); + const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE; + struct { + __le64 message_bits; + __le64 padding; + } header = { + .message_bits = cpu_to_le64((u64)bulk_len * 8) + }; + struct poly1305_state state; + + poly1305_core_init(&state); + + BUILD_BUG_ON(sizeof(header) % POLY1305_BLOCK_SIZE != 0); + poly1305_core_blocks(&state, &tctx->header_hash_key, + &header, sizeof(header) / POLY1305_BLOCK_SIZE); + + BUILD_BUG_ON(TWEAK_SIZE % POLY1305_BLOCK_SIZE != 0); + poly1305_core_blocks(&state, &tctx->header_hash_key, req->iv, + TWEAK_SIZE / POLY1305_BLOCK_SIZE); + + poly1305_core_emit(&state, &rctx->header_hash); +} + +/* Hash the left-hand part (the "bulk") of the message using NHPoly1305 */ +static int adiantum_hash_message(struct skcipher_request *req, + struct scatterlist *sgl, le128 *digest) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); + struct adiantum_request_ctx *rctx = skcipher_request_ctx(req); + const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE; + struct shash_desc *hash_desc = &rctx->u.hash_desc; + struct sg_mapping_iter miter; + unsigned int i, n; + int err; + + hash_desc->tfm = tctx->hash; + hash_desc->flags = 0; + + err = crypto_shash_init(hash_desc); + if (err) + return err; + + sg_miter_start(&miter, sgl, sg_nents(sgl), + SG_MITER_FROM_SG | SG_MITER_ATOMIC); + for (i = 0; i < bulk_len; i += n) { + sg_miter_next(&miter); + n = min_t(unsigned int, miter.length, bulk_len - i); + err = crypto_shash_update(hash_desc, miter.addr, n); + if (err) + break; + } + sg_miter_stop(&miter); + if (err) + return err; + + return crypto_shash_final(hash_desc, (u8 *)digest); +} + +/* Continue Adiantum encryption/decryption after the stream cipher step */ +static int adiantum_finish(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); + struct adiantum_request_ctx *rctx = skcipher_request_ctx(req); + const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE; + le128 digest; + int err; + + /* If decrypting, decrypt C_M with the block cipher to get P_M */ + if (!rctx->enc) + crypto_cipher_decrypt_one(tctx->blockcipher, rctx->rbuf.bytes, + rctx->rbuf.bytes); + + /* + * Second hash step + * enc: C_R = C_M - H_{K_H}(T, C_L) + * dec: P_R = P_M - H_{K_H}(T, P_L) + */ + err = adiantum_hash_message(req, req->dst, &digest); + if (err) + return err; + le128_add(&digest, &digest, &rctx->header_hash); + le128_sub(&rctx->rbuf.bignum, &rctx->rbuf.bignum, &digest); + scatterwalk_map_and_copy(&rctx->rbuf.bignum, req->dst, + bulk_len, BLOCKCIPHER_BLOCK_SIZE, 1); + return 0; +} + +static void adiantum_streamcipher_done(struct crypto_async_request *areq, + int err) +{ + struct skcipher_request *req = areq->data; + + if (!err) + err = adiantum_finish(req); + + skcipher_request_complete(req, err); +} + +static int adiantum_crypt(struct skcipher_request *req, bool enc) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); + struct adiantum_request_ctx *rctx = skcipher_request_ctx(req); + const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE; + unsigned int stream_len; + le128 digest; + int err; + + if (req->cryptlen < BLOCKCIPHER_BLOCK_SIZE) + return -EINVAL; + + rctx->enc = enc; + + /* + * First hash step + * enc: P_M = P_R + H_{K_H}(T, P_L) + * dec: C_M = C_R + H_{K_H}(T, C_L) + */ + adiantum_hash_header(req); + err = adiantum_hash_message(req, req->src, &digest); + if (err) + return err; + le128_add(&digest, &digest, &rctx->header_hash); + scatterwalk_map_and_copy(&rctx->rbuf.bignum, req->src, + bulk_len, BLOCKCIPHER_BLOCK_SIZE, 0); + le128_add(&rctx->rbuf.bignum, &rctx->rbuf.bignum, &digest); + + /* If encrypting, encrypt P_M with the block cipher to get C_M */ + if (enc) + crypto_cipher_encrypt_one(tctx->blockcipher, rctx->rbuf.bytes, + rctx->rbuf.bytes); + + /* Initialize the rest of the XChaCha IV (first part is C_M) */ + BUILD_BUG_ON(BLOCKCIPHER_BLOCK_SIZE != 16); + BUILD_BUG_ON(XCHACHA_IV_SIZE != 32); /* nonce || stream position */ + rctx->rbuf.words[4] = cpu_to_le32(1); + rctx->rbuf.words[5] = 0; + rctx->rbuf.words[6] = 0; + rctx->rbuf.words[7] = 0; + + /* + * XChaCha needs to be done on all the data except the last 16 bytes; + * for disk encryption that usually means 4080 or 496 bytes. But ChaCha + * implementations tend to be most efficient when passed a whole number + * of 64-byte ChaCha blocks, or sometimes even a multiple of 256 bytes. + * And here it doesn't matter whether the last 16 bytes are written to, + * as the second hash step will overwrite them. Thus, round the XChaCha + * length up to the next 64-byte boundary if possible. + */ + stream_len = bulk_len; + if (round_up(stream_len, CHACHA_BLOCK_SIZE) <= req->cryptlen) + stream_len = round_up(stream_len, CHACHA_BLOCK_SIZE); + + skcipher_request_set_tfm(&rctx->u.streamcipher_req, tctx->streamcipher); + skcipher_request_set_crypt(&rctx->u.streamcipher_req, req->src, + req->dst, stream_len, &rctx->rbuf); + skcipher_request_set_callback(&rctx->u.streamcipher_req, + req->base.flags, + adiantum_streamcipher_done, req); + return crypto_skcipher_encrypt(&rctx->u.streamcipher_req) ?: + adiantum_finish(req); +} + +static int adiantum_encrypt(struct skcipher_request *req) +{ + return adiantum_crypt(req, true); +} + +static int adiantum_decrypt(struct skcipher_request *req) +{ + return adiantum_crypt(req, false); +} + +static int adiantum_init_tfm(struct crypto_skcipher *tfm) +{ + struct skcipher_instance *inst = skcipher_alg_instance(tfm); + struct adiantum_instance_ctx *ictx = skcipher_instance_ctx(inst); + struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); + struct crypto_skcipher *streamcipher; + struct crypto_cipher *blockcipher; + struct crypto_shash *hash; + unsigned int subreq_size; + int err; + + streamcipher = crypto_spawn_skcipher(&ictx->streamcipher_spawn); + if (IS_ERR(streamcipher)) + return PTR_ERR(streamcipher); + + blockcipher = crypto_spawn_cipher(&ictx->blockcipher_spawn); + if (IS_ERR(blockcipher)) { + err = PTR_ERR(blockcipher); + goto err_free_streamcipher; + } + + hash = crypto_spawn_shash(&ictx->hash_spawn); + if (IS_ERR(hash)) { + err = PTR_ERR(hash); + goto err_free_blockcipher; + } + + tctx->streamcipher = streamcipher; + tctx->blockcipher = blockcipher; + tctx->hash = hash; + + BUILD_BUG_ON(offsetofend(struct adiantum_request_ctx, u) != + sizeof(struct adiantum_request_ctx)); + subreq_size = max(FIELD_SIZEOF(struct adiantum_request_ctx, + u.hash_desc) + + crypto_shash_descsize(hash), + FIELD_SIZEOF(struct adiantum_request_ctx, + u.streamcipher_req) + + crypto_skcipher_reqsize(streamcipher)); + + crypto_skcipher_set_reqsize(tfm, + offsetof(struct adiantum_request_ctx, u) + + subreq_size); + return 0; + +err_free_blockcipher: + crypto_free_cipher(blockcipher); +err_free_streamcipher: + crypto_free_skcipher(streamcipher); + return err; +} + +static void adiantum_exit_tfm(struct crypto_skcipher *tfm) +{ + struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); + + crypto_free_skcipher(tctx->streamcipher); + crypto_free_cipher(tctx->blockcipher); + crypto_free_shash(tctx->hash); +} + +static void adiantum_free_instance(struct skcipher_instance *inst) +{ + struct adiantum_instance_ctx *ictx = skcipher_instance_ctx(inst); + + crypto_drop_skcipher(&ictx->streamcipher_spawn); + crypto_drop_spawn(&ictx->blockcipher_spawn); + crypto_drop_shash(&ictx->hash_spawn); + kfree(inst); +} + +/* + * Check for a supported set of inner algorithms. + * See the comment at the beginning of this file. + */ +static bool adiantum_supported_algorithms(struct skcipher_alg *streamcipher_alg, + struct crypto_alg *blockcipher_alg, + struct shash_alg *hash_alg) +{ + if (strcmp(streamcipher_alg->base.cra_name, "xchacha12") != 0 && + strcmp(streamcipher_alg->base.cra_name, "xchacha20") != 0) + return false; + + if (blockcipher_alg->cra_cipher.cia_min_keysize > BLOCKCIPHER_KEY_SIZE || + blockcipher_alg->cra_cipher.cia_max_keysize < BLOCKCIPHER_KEY_SIZE) + return false; + if (blockcipher_alg->cra_blocksize != BLOCKCIPHER_BLOCK_SIZE) + return false; + + if (strcmp(hash_alg->base.cra_name, "nhpoly1305") != 0) + return false; + + return true; +} + +static int adiantum_create(struct crypto_template *tmpl, struct rtattr **tb) +{ + struct crypto_attr_type *algt; + const char *streamcipher_name; + const char *blockcipher_name; + const char *nhpoly1305_name; + struct skcipher_instance *inst; + struct adiantum_instance_ctx *ictx; + struct skcipher_alg *streamcipher_alg; + struct crypto_alg *blockcipher_alg; + struct crypto_alg *_hash_alg; + struct shash_alg *hash_alg; + int err; + + algt = crypto_get_attr_type(tb); + if (IS_ERR(algt)) + return PTR_ERR(algt); + + if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask) + return -EINVAL; + + streamcipher_name = crypto_attr_alg_name(tb[1]); + if (IS_ERR(streamcipher_name)) + return PTR_ERR(streamcipher_name); + + blockcipher_name = crypto_attr_alg_name(tb[2]); + if (IS_ERR(blockcipher_name)) + return PTR_ERR(blockcipher_name); + + nhpoly1305_name = crypto_attr_alg_name(tb[3]); + if (nhpoly1305_name == ERR_PTR(-ENOENT)) + nhpoly1305_name = "nhpoly1305"; + if (IS_ERR(nhpoly1305_name)) + return PTR_ERR(nhpoly1305_name); + + inst = kzalloc(sizeof(*inst) + sizeof(*ictx), GFP_KERNEL); + if (!inst) + return -ENOMEM; + ictx = skcipher_instance_ctx(inst); + + /* Stream cipher, e.g. "xchacha12" */ + err = crypto_grab_skcipher(&ictx->streamcipher_spawn, streamcipher_name, + 0, crypto_requires_sync(algt->type, + algt->mask)); + if (err) + goto out_free_inst; + streamcipher_alg = crypto_spawn_skcipher_alg(&ictx->streamcipher_spawn); + + /* Block cipher, e.g. "aes" */ + err = crypto_grab_spawn(&ictx->blockcipher_spawn, blockcipher_name, + CRYPTO_ALG_TYPE_CIPHER, CRYPTO_ALG_TYPE_MASK); + if (err) + goto out_drop_streamcipher; + blockcipher_alg = ictx->blockcipher_spawn.alg; + + /* NHPoly1305 ε-∆U hash function */ + _hash_alg = crypto_alg_mod_lookup(nhpoly1305_name, + CRYPTO_ALG_TYPE_SHASH, + CRYPTO_ALG_TYPE_MASK); + if (IS_ERR(_hash_alg)) { + err = PTR_ERR(_hash_alg); + goto out_drop_blockcipher; + } + hash_alg = __crypto_shash_alg(_hash_alg); + err = crypto_init_shash_spawn(&ictx->hash_spawn, hash_alg, + skcipher_crypto_instance(inst)); + if (err) + goto out_put_hash; + + /* Check the set of algorithms */ + if (!adiantum_supported_algorithms(streamcipher_alg, blockcipher_alg, + hash_alg)) { + pr_warn("Unsupported Adiantum instantiation: (%s,%s,%s)\n", + streamcipher_alg->base.cra_name, + blockcipher_alg->cra_name, hash_alg->base.cra_name); + err = -EINVAL; + goto out_drop_hash; + } + + /* Instance fields */ + + err = -ENAMETOOLONG; + if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, + "adiantum(%s,%s)", streamcipher_alg->base.cra_name, + blockcipher_alg->cra_name) >= CRYPTO_MAX_ALG_NAME) + goto out_drop_hash; + if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, + "adiantum(%s,%s,%s)", + streamcipher_alg->base.cra_driver_name, + blockcipher_alg->cra_driver_name, + hash_alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) + goto out_drop_hash; + + inst->alg.base.cra_flags = streamcipher_alg->base.cra_flags & + CRYPTO_ALG_ASYNC; + inst->alg.base.cra_blocksize = BLOCKCIPHER_BLOCK_SIZE; + inst->alg.base.cra_ctxsize = sizeof(struct adiantum_tfm_ctx); + inst->alg.base.cra_alignmask = streamcipher_alg->base.cra_alignmask | + hash_alg->base.cra_alignmask; + /* + * The block cipher is only invoked once per message, so for long + * messages (e.g. sectors for disk encryption) its performance doesn't + * matter as much as that of the stream cipher and hash function. Thus, + * weigh the block cipher's ->cra_priority less. + */ + inst->alg.base.cra_priority = (4 * streamcipher_alg->base.cra_priority + + 2 * hash_alg->base.cra_priority + + blockcipher_alg->cra_priority) / 7; + + inst->alg.setkey = adiantum_setkey; + inst->alg.encrypt = adiantum_encrypt; + inst->alg.decrypt = adiantum_decrypt; + inst->alg.init = adiantum_init_tfm; + inst->alg.exit = adiantum_exit_tfm; + inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(streamcipher_alg); + inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(streamcipher_alg); + inst->alg.ivsize = TWEAK_SIZE; + + inst->free = adiantum_free_instance; + + err = skcipher_register_instance(tmpl, inst); + if (err) + goto out_drop_hash; + + crypto_mod_put(_hash_alg); + return 0; + +out_drop_hash: + crypto_drop_shash(&ictx->hash_spawn); +out_put_hash: + crypto_mod_put(_hash_alg); +out_drop_blockcipher: + crypto_drop_spawn(&ictx->blockcipher_spawn); +out_drop_streamcipher: + crypto_drop_skcipher(&ictx->streamcipher_spawn); +out_free_inst: + kfree(inst); + return err; +} + +/* adiantum(streamcipher_name, blockcipher_name [, nhpoly1305_name]) */ +static struct crypto_template adiantum_tmpl = { + .name = "adiantum", + .create = adiantum_create, + .module = THIS_MODULE, +}; + +static int __init adiantum_module_init(void) +{ + return crypto_register_template(&adiantum_tmpl); +} + +static void __exit adiantum_module_exit(void) +{ + crypto_unregister_template(&adiantum_tmpl); +} + +module_init(adiantum_module_init); +module_exit(adiantum_module_exit); + +MODULE_DESCRIPTION("Adiantum length-preserving encryption mode"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Eric Biggers "); +MODULE_ALIAS_CRYPTO("adiantum"); diff --git a/crypto/aead.c b/crypto/aead.c index 60b3bbe973e7..189c52d1f63a 100644 --- a/crypto/aead.c +++ b/crypto/aead.c @@ -119,20 +119,16 @@ static int crypto_aead_report(struct sk_buff *skb, struct crypto_alg *alg) struct crypto_report_aead raead; struct aead_alg *aead = container_of(alg, struct aead_alg, base); - strncpy(raead.type, "aead", sizeof(raead.type)); - strncpy(raead.geniv, "", sizeof(raead.geniv)); + memset(&raead, 0, sizeof(raead)); + + strscpy(raead.type, "aead", sizeof(raead.type)); + strscpy(raead.geniv, "", sizeof(raead.geniv)); raead.blocksize = alg->cra_blocksize; raead.maxauthsize = aead->maxauthsize; raead.ivsize = aead->ivsize; - if (nla_put(skb, CRYPTOCFGA_REPORT_AEAD, - sizeof(struct crypto_report_aead), &raead)) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -EMSGSIZE; + return nla_put(skb, CRYPTOCFGA_REPORT_AEAD, sizeof(raead), &raead); } #else static int crypto_aead_report(struct sk_buff *skb, struct crypto_alg *alg) diff --git a/crypto/aes_generic.c b/crypto/aes_generic.c index ca554d57d01e..13df33aca463 100644 --- a/crypto/aes_generic.c +++ b/crypto/aes_generic.c @@ -63,7 +63,8 @@ static inline u8 byte(const u32 x, const unsigned n) static const u32 rco_tab[10] = { 1, 2, 4, 8, 16, 32, 64, 128, 27, 54 }; -__visible const u32 crypto_ft_tab[4][256] = { +/* cacheline-aligned to facilitate prefetching into cache */ +__visible const u32 crypto_ft_tab[4][256] __cacheline_aligned = { { 0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6, 0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591, @@ -327,7 +328,7 @@ __visible const u32 crypto_ft_tab[4][256] = { } }; -__visible const u32 crypto_fl_tab[4][256] = { +__visible const u32 crypto_fl_tab[4][256] __cacheline_aligned = { { 0x00000063, 0x0000007c, 0x00000077, 0x0000007b, 0x000000f2, 0x0000006b, 0x0000006f, 0x000000c5, @@ -591,7 +592,7 @@ __visible const u32 crypto_fl_tab[4][256] = { } }; -__visible const u32 crypto_it_tab[4][256] = { +__visible const u32 crypto_it_tab[4][256] __cacheline_aligned = { { 0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a, 0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b, @@ -855,7 +856,7 @@ __visible const u32 crypto_it_tab[4][256] = { } }; -__visible const u32 crypto_il_tab[4][256] = { +__visible const u32 crypto_il_tab[4][256] __cacheline_aligned = { { 0x00000052, 0x00000009, 0x0000006a, 0x000000d5, 0x00000030, 0x00000036, 0x000000a5, 0x00000038, diff --git a/crypto/aes_ti.c b/crypto/aes_ti.c index 03023b2290e8..1ff9785b30f5 100644 --- a/crypto/aes_ti.c +++ b/crypto/aes_ti.c @@ -269,6 +269,7 @@ static void aesti_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) const u32 *rkp = ctx->key_enc + 4; int rounds = 6 + ctx->key_length / 4; u32 st0[4], st1[4]; + unsigned long flags; int round; st0[0] = ctx->key_enc[0] ^ get_unaligned_le32(in); @@ -276,6 +277,12 @@ static void aesti_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) st0[2] = ctx->key_enc[2] ^ get_unaligned_le32(in + 8); st0[3] = ctx->key_enc[3] ^ get_unaligned_le32(in + 12); + /* + * Temporarily disable interrupts to avoid races where cachelines are + * evicted when the CPU is interrupted to do something else. + */ + local_irq_save(flags); + st0[0] ^= __aesti_sbox[ 0] ^ __aesti_sbox[128]; st0[1] ^= __aesti_sbox[32] ^ __aesti_sbox[160]; st0[2] ^= __aesti_sbox[64] ^ __aesti_sbox[192]; @@ -300,6 +307,8 @@ static void aesti_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) put_unaligned_le32(subshift(st1, 1) ^ rkp[5], out + 4); put_unaligned_le32(subshift(st1, 2) ^ rkp[6], out + 8); put_unaligned_le32(subshift(st1, 3) ^ rkp[7], out + 12); + + local_irq_restore(flags); } static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) @@ -308,6 +317,7 @@ static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) const u32 *rkp = ctx->key_dec + 4; int rounds = 6 + ctx->key_length / 4; u32 st0[4], st1[4]; + unsigned long flags; int round; st0[0] = ctx->key_dec[0] ^ get_unaligned_le32(in); @@ -315,6 +325,12 @@ static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) st0[2] = ctx->key_dec[2] ^ get_unaligned_le32(in + 8); st0[3] = ctx->key_dec[3] ^ get_unaligned_le32(in + 12); + /* + * Temporarily disable interrupts to avoid races where cachelines are + * evicted when the CPU is interrupted to do something else. + */ + local_irq_save(flags); + st0[0] ^= __aesti_inv_sbox[ 0] ^ __aesti_inv_sbox[128]; st0[1] ^= __aesti_inv_sbox[32] ^ __aesti_inv_sbox[160]; st0[2] ^= __aesti_inv_sbox[64] ^ __aesti_inv_sbox[192]; @@ -339,6 +355,8 @@ static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) put_unaligned_le32(inv_subshift(st1, 1) ^ rkp[5], out + 4); put_unaligned_le32(inv_subshift(st1, 2) ^ rkp[6], out + 8); put_unaligned_le32(inv_subshift(st1, 3) ^ rkp[7], out + 12); + + local_irq_restore(flags); } static struct crypto_alg aes_alg = { diff --git a/crypto/ahash.c b/crypto/ahash.c index e21667b4e10a..5d320a811f75 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -364,20 +364,28 @@ static int crypto_ahash_op(struct ahash_request *req, int crypto_ahash_final(struct ahash_request *req) { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct crypto_alg *alg = tfm->base.__crt_alg; + unsigned int nbytes = req->nbytes; int ret; + crypto_stats_get(alg); ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->final); - crypto_stat_ahash_final(req, ret); + crypto_stats_ahash_final(nbytes, ret, alg); return ret; } EXPORT_SYMBOL_GPL(crypto_ahash_final); int crypto_ahash_finup(struct ahash_request *req) { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct crypto_alg *alg = tfm->base.__crt_alg; + unsigned int nbytes = req->nbytes; int ret; + crypto_stats_get(alg); ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->finup); - crypto_stat_ahash_final(req, ret); + crypto_stats_ahash_final(nbytes, ret, alg); return ret; } EXPORT_SYMBOL_GPL(crypto_ahash_finup); @@ -385,13 +393,16 @@ EXPORT_SYMBOL_GPL(crypto_ahash_finup); int crypto_ahash_digest(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct crypto_alg *alg = tfm->base.__crt_alg; + unsigned int nbytes = req->nbytes; int ret; + crypto_stats_get(alg); if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) ret = -ENOKEY; else ret = crypto_ahash_op(req, tfm->digest); - crypto_stat_ahash_final(req, ret); + crypto_stats_ahash_final(nbytes, ret, alg); return ret; } EXPORT_SYMBOL_GPL(crypto_ahash_digest); @@ -498,18 +509,14 @@ static int crypto_ahash_report(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_hash rhash; - strncpy(rhash.type, "ahash", sizeof(rhash.type)); + memset(&rhash, 0, sizeof(rhash)); + + strscpy(rhash.type, "ahash", sizeof(rhash.type)); rhash.blocksize = alg->cra_blocksize; rhash.digestsize = __crypto_hash_alg_common(alg)->digestsize; - if (nla_put(skb, CRYPTOCFGA_REPORT_HASH, - sizeof(struct crypto_report_hash), &rhash)) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -EMSGSIZE; + return nla_put(skb, CRYPTOCFGA_REPORT_HASH, sizeof(rhash), &rhash); } #else static int crypto_ahash_report(struct sk_buff *skb, struct crypto_alg *alg) diff --git a/crypto/akcipher.c b/crypto/akcipher.c index cfbdb06d8ca8..0cbeae137e0a 100644 --- a/crypto/akcipher.c +++ b/crypto/akcipher.c @@ -30,15 +30,12 @@ static int crypto_akcipher_report(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_akcipher rakcipher; - strncpy(rakcipher.type, "akcipher", sizeof(rakcipher.type)); + memset(&rakcipher, 0, sizeof(rakcipher)); - if (nla_put(skb, CRYPTOCFGA_REPORT_AKCIPHER, - sizeof(struct crypto_report_akcipher), &rakcipher)) - goto nla_put_failure; - return 0; + strscpy(rakcipher.type, "akcipher", sizeof(rakcipher.type)); -nla_put_failure: - return -EMSGSIZE; + return nla_put(skb, CRYPTOCFGA_REPORT_AKCIPHER, + sizeof(rakcipher), &rakcipher); } #else static int crypto_akcipher_report(struct sk_buff *skb, struct crypto_alg *alg) diff --git a/crypto/algapi.c b/crypto/algapi.c index 2545c5f89c4c..8b65ada33e5d 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -258,13 +258,7 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg) list_add(&alg->cra_list, &crypto_alg_list); list_add(&larval->alg.cra_list, &crypto_alg_list); - atomic_set(&alg->encrypt_cnt, 0); - atomic_set(&alg->decrypt_cnt, 0); - atomic64_set(&alg->encrypt_tlen, 0); - atomic64_set(&alg->decrypt_tlen, 0); - atomic_set(&alg->verify_cnt, 0); - atomic_set(&alg->cipher_err_cnt, 0); - atomic_set(&alg->sign_cnt, 0); + crypto_stats_init(alg); out: return larval; @@ -1076,6 +1070,245 @@ int crypto_type_has_alg(const char *name, const struct crypto_type *frontend, } EXPORT_SYMBOL_GPL(crypto_type_has_alg); +#ifdef CONFIG_CRYPTO_STATS +void crypto_stats_init(struct crypto_alg *alg) +{ + memset(&alg->stats, 0, sizeof(alg->stats)); +} +EXPORT_SYMBOL_GPL(crypto_stats_init); + +void crypto_stats_get(struct crypto_alg *alg) +{ + crypto_alg_get(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_get); + +void crypto_stats_ablkcipher_encrypt(unsigned int nbytes, int ret, + struct crypto_alg *alg) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) { + atomic64_inc(&alg->stats.cipher.err_cnt); + } else { + atomic64_inc(&alg->stats.cipher.encrypt_cnt); + atomic64_add(nbytes, &alg->stats.cipher.encrypt_tlen); + } + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_ablkcipher_encrypt); + +void crypto_stats_ablkcipher_decrypt(unsigned int nbytes, int ret, + struct crypto_alg *alg) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) { + atomic64_inc(&alg->stats.cipher.err_cnt); + } else { + atomic64_inc(&alg->stats.cipher.decrypt_cnt); + atomic64_add(nbytes, &alg->stats.cipher.decrypt_tlen); + } + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_ablkcipher_decrypt); + +void crypto_stats_aead_encrypt(unsigned int cryptlen, struct crypto_alg *alg, + int ret) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) { + atomic64_inc(&alg->stats.aead.err_cnt); + } else { + atomic64_inc(&alg->stats.aead.encrypt_cnt); + atomic64_add(cryptlen, &alg->stats.aead.encrypt_tlen); + } + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_aead_encrypt); + +void crypto_stats_aead_decrypt(unsigned int cryptlen, struct crypto_alg *alg, + int ret) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) { + atomic64_inc(&alg->stats.aead.err_cnt); + } else { + atomic64_inc(&alg->stats.aead.decrypt_cnt); + atomic64_add(cryptlen, &alg->stats.aead.decrypt_tlen); + } + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_aead_decrypt); + +void crypto_stats_akcipher_encrypt(unsigned int src_len, int ret, + struct crypto_alg *alg) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) { + atomic64_inc(&alg->stats.akcipher.err_cnt); + } else { + atomic64_inc(&alg->stats.akcipher.encrypt_cnt); + atomic64_add(src_len, &alg->stats.akcipher.encrypt_tlen); + } + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_akcipher_encrypt); + +void crypto_stats_akcipher_decrypt(unsigned int src_len, int ret, + struct crypto_alg *alg) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) { + atomic64_inc(&alg->stats.akcipher.err_cnt); + } else { + atomic64_inc(&alg->stats.akcipher.decrypt_cnt); + atomic64_add(src_len, &alg->stats.akcipher.decrypt_tlen); + } + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_akcipher_decrypt); + +void crypto_stats_akcipher_sign(int ret, struct crypto_alg *alg) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) + atomic64_inc(&alg->stats.akcipher.err_cnt); + else + atomic64_inc(&alg->stats.akcipher.sign_cnt); + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_akcipher_sign); + +void crypto_stats_akcipher_verify(int ret, struct crypto_alg *alg) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) + atomic64_inc(&alg->stats.akcipher.err_cnt); + else + atomic64_inc(&alg->stats.akcipher.verify_cnt); + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_akcipher_verify); + +void crypto_stats_compress(unsigned int slen, int ret, struct crypto_alg *alg) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) { + atomic64_inc(&alg->stats.compress.err_cnt); + } else { + atomic64_inc(&alg->stats.compress.compress_cnt); + atomic64_add(slen, &alg->stats.compress.compress_tlen); + } + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_compress); + +void crypto_stats_decompress(unsigned int slen, int ret, struct crypto_alg *alg) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) { + atomic64_inc(&alg->stats.compress.err_cnt); + } else { + atomic64_inc(&alg->stats.compress.decompress_cnt); + atomic64_add(slen, &alg->stats.compress.decompress_tlen); + } + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_decompress); + +void crypto_stats_ahash_update(unsigned int nbytes, int ret, + struct crypto_alg *alg) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) + atomic64_inc(&alg->stats.hash.err_cnt); + else + atomic64_add(nbytes, &alg->stats.hash.hash_tlen); + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_ahash_update); + +void crypto_stats_ahash_final(unsigned int nbytes, int ret, + struct crypto_alg *alg) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) { + atomic64_inc(&alg->stats.hash.err_cnt); + } else { + atomic64_inc(&alg->stats.hash.hash_cnt); + atomic64_add(nbytes, &alg->stats.hash.hash_tlen); + } + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_ahash_final); + +void crypto_stats_kpp_set_secret(struct crypto_alg *alg, int ret) +{ + if (ret) + atomic64_inc(&alg->stats.kpp.err_cnt); + else + atomic64_inc(&alg->stats.kpp.setsecret_cnt); + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_kpp_set_secret); + +void crypto_stats_kpp_generate_public_key(struct crypto_alg *alg, int ret) +{ + if (ret) + atomic64_inc(&alg->stats.kpp.err_cnt); + else + atomic64_inc(&alg->stats.kpp.generate_public_key_cnt); + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_kpp_generate_public_key); + +void crypto_stats_kpp_compute_shared_secret(struct crypto_alg *alg, int ret) +{ + if (ret) + atomic64_inc(&alg->stats.kpp.err_cnt); + else + atomic64_inc(&alg->stats.kpp.compute_shared_secret_cnt); + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_kpp_compute_shared_secret); + +void crypto_stats_rng_seed(struct crypto_alg *alg, int ret) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) + atomic64_inc(&alg->stats.rng.err_cnt); + else + atomic64_inc(&alg->stats.rng.seed_cnt); + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_rng_seed); + +void crypto_stats_rng_generate(struct crypto_alg *alg, unsigned int dlen, + int ret) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) { + atomic64_inc(&alg->stats.rng.err_cnt); + } else { + atomic64_inc(&alg->stats.rng.generate_cnt); + atomic64_add(dlen, &alg->stats.rng.generate_tlen); + } + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_rng_generate); + +void crypto_stats_skcipher_encrypt(unsigned int cryptlen, int ret, + struct crypto_alg *alg) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) { + atomic64_inc(&alg->stats.cipher.err_cnt); + } else { + atomic64_inc(&alg->stats.cipher.encrypt_cnt); + atomic64_add(cryptlen, &alg->stats.cipher.encrypt_tlen); + } + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_skcipher_encrypt); + +void crypto_stats_skcipher_decrypt(unsigned int cryptlen, int ret, + struct crypto_alg *alg) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) { + atomic64_inc(&alg->stats.cipher.err_cnt); + } else { + atomic64_inc(&alg->stats.cipher.decrypt_cnt); + atomic64_add(cryptlen, &alg->stats.cipher.decrypt_tlen); + } + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_skcipher_decrypt); +#endif + static int __init crypto_algapi_init(void) { crypto_init_proc(); diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c index f93abf13b5d4..c5398bd54942 100644 --- a/crypto/blkcipher.c +++ b/crypto/blkcipher.c @@ -507,23 +507,18 @@ static int crypto_blkcipher_report(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_blkcipher rblkcipher; - strncpy(rblkcipher.type, "blkcipher", sizeof(rblkcipher.type)); - strncpy(rblkcipher.geniv, alg->cra_blkcipher.geniv ?: "", - sizeof(rblkcipher.geniv)); - rblkcipher.geniv[sizeof(rblkcipher.geniv) - 1] = '\0'; + memset(&rblkcipher, 0, sizeof(rblkcipher)); + + strscpy(rblkcipher.type, "blkcipher", sizeof(rblkcipher.type)); + strscpy(rblkcipher.geniv, "", sizeof(rblkcipher.geniv)); rblkcipher.blocksize = alg->cra_blocksize; rblkcipher.min_keysize = alg->cra_blkcipher.min_keysize; rblkcipher.max_keysize = alg->cra_blkcipher.max_keysize; rblkcipher.ivsize = alg->cra_blkcipher.ivsize; - if (nla_put(skb, CRYPTOCFGA_REPORT_BLKCIPHER, - sizeof(struct crypto_report_blkcipher), &rblkcipher)) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -EMSGSIZE; + return nla_put(skb, CRYPTOCFGA_REPORT_BLKCIPHER, + sizeof(rblkcipher), &rblkcipher); } #else static int crypto_blkcipher_report(struct sk_buff *skb, struct crypto_alg *alg) @@ -541,8 +536,7 @@ static void crypto_blkcipher_show(struct seq_file *m, struct crypto_alg *alg) seq_printf(m, "min keysize : %u\n", alg->cra_blkcipher.min_keysize); seq_printf(m, "max keysize : %u\n", alg->cra_blkcipher.max_keysize); seq_printf(m, "ivsize : %u\n", alg->cra_blkcipher.ivsize); - seq_printf(m, "geniv : %s\n", alg->cra_blkcipher.geniv ?: - ""); + seq_printf(m, "geniv : \n"); } const struct crypto_type crypto_blkcipher_type = { diff --git a/crypto/cfb.c b/crypto/cfb.c index 20987d0e09d8..e81e45673498 100644 --- a/crypto/cfb.c +++ b/crypto/cfb.c @@ -144,7 +144,7 @@ static int crypto_cfb_decrypt_segment(struct skcipher_walk *walk, do { crypto_cfb_encrypt_one(tfm, iv, dst); - crypto_xor(dst, iv, bsize); + crypto_xor(dst, src, bsize); iv = src; src += bsize; diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c deleted file mode 100644 index 3ae96587caf9..000000000000 --- a/crypto/chacha20_generic.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - * ChaCha20 256-bit cipher algorithm, RFC7539 - * - * Copyright (C) 2015 Martin Willi - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include -#include -#include -#include -#include - -static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src, - unsigned int bytes) -{ - /* aligned to potentially speed up crypto_xor() */ - u8 stream[CHACHA20_BLOCK_SIZE] __aligned(sizeof(long)); - - if (dst != src) - memcpy(dst, src, bytes); - - while (bytes >= CHACHA20_BLOCK_SIZE) { - chacha20_block(state, stream); - crypto_xor(dst, stream, CHACHA20_BLOCK_SIZE); - bytes -= CHACHA20_BLOCK_SIZE; - dst += CHACHA20_BLOCK_SIZE; - } - if (bytes) { - chacha20_block(state, stream); - crypto_xor(dst, stream, bytes); - } -} - -void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv) -{ - state[0] = 0x61707865; /* "expa" */ - state[1] = 0x3320646e; /* "nd 3" */ - state[2] = 0x79622d32; /* "2-by" */ - state[3] = 0x6b206574; /* "te k" */ - state[4] = ctx->key[0]; - state[5] = ctx->key[1]; - state[6] = ctx->key[2]; - state[7] = ctx->key[3]; - state[8] = ctx->key[4]; - state[9] = ctx->key[5]; - state[10] = ctx->key[6]; - state[11] = ctx->key[7]; - state[12] = get_unaligned_le32(iv + 0); - state[13] = get_unaligned_le32(iv + 4); - state[14] = get_unaligned_le32(iv + 8); - state[15] = get_unaligned_le32(iv + 12); -} -EXPORT_SYMBOL_GPL(crypto_chacha20_init); - -int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keysize) -{ - struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); - int i; - - if (keysize != CHACHA20_KEY_SIZE) - return -EINVAL; - - for (i = 0; i < ARRAY_SIZE(ctx->key); i++) - ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32)); - - return 0; -} -EXPORT_SYMBOL_GPL(crypto_chacha20_setkey); - -int crypto_chacha20_crypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - u32 state[16]; - int err; - - err = skcipher_walk_virt(&walk, req, true); - - crypto_chacha20_init(state, ctx, walk.iv); - - while (walk.nbytes > 0) { - unsigned int nbytes = walk.nbytes; - - if (nbytes < walk.total) - nbytes = round_down(nbytes, walk.stride); - - chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr, - nbytes); - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); - } - - return err; -} -EXPORT_SYMBOL_GPL(crypto_chacha20_crypt); - -static struct skcipher_alg alg = { - .base.cra_name = "chacha20", - .base.cra_driver_name = "chacha20-generic", - .base.cra_priority = 100, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha20_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA20_KEY_SIZE, - .max_keysize = CHACHA20_KEY_SIZE, - .ivsize = CHACHA20_IV_SIZE, - .chunksize = CHACHA20_BLOCK_SIZE, - .setkey = crypto_chacha20_setkey, - .encrypt = crypto_chacha20_crypt, - .decrypt = crypto_chacha20_crypt, -}; - -static int __init chacha20_generic_mod_init(void) -{ - return crypto_register_skcipher(&alg); -} - -static void __exit chacha20_generic_mod_fini(void) -{ - crypto_unregister_skcipher(&alg); -} - -module_init(chacha20_generic_mod_init); -module_exit(chacha20_generic_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Martin Willi "); -MODULE_DESCRIPTION("chacha20 cipher algorithm"); -MODULE_ALIAS_CRYPTO("chacha20"); -MODULE_ALIAS_CRYPTO("chacha20-generic"); diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c index 600afa99941f..fef11446ab1b 100644 --- a/crypto/chacha20poly1305.c +++ b/crypto/chacha20poly1305.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include @@ -22,8 +22,6 @@ #include "internal.h" -#define CHACHAPOLY_IV_SIZE 12 - struct chachapoly_instance_ctx { struct crypto_skcipher_spawn chacha; struct crypto_ahash_spawn poly; @@ -51,7 +49,7 @@ struct poly_req { }; struct chacha_req { - u8 iv[CHACHA20_IV_SIZE]; + u8 iv[CHACHA_IV_SIZE]; struct scatterlist src[1]; struct skcipher_request req; /* must be last member */ }; @@ -91,7 +89,7 @@ static void chacha_iv(u8 *iv, struct aead_request *req, u32 icb) memcpy(iv, &leicb, sizeof(leicb)); memcpy(iv + sizeof(leicb), ctx->salt, ctx->saltlen); memcpy(iv + sizeof(leicb) + ctx->saltlen, req->iv, - CHACHA20_IV_SIZE - sizeof(leicb) - ctx->saltlen); + CHACHA_IV_SIZE - sizeof(leicb) - ctx->saltlen); } static int poly_verify_tag(struct aead_request *req) @@ -494,7 +492,7 @@ static int chachapoly_setkey(struct crypto_aead *aead, const u8 *key, struct chachapoly_ctx *ctx = crypto_aead_ctx(aead); int err; - if (keylen != ctx->saltlen + CHACHA20_KEY_SIZE) + if (keylen != ctx->saltlen + CHACHA_KEY_SIZE) return -EINVAL; keylen -= ctx->saltlen; @@ -639,7 +637,7 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb, err = -EINVAL; /* Need 16-byte IV size, including Initial Block Counter value */ - if (crypto_skcipher_alg_ivsize(chacha) != CHACHA20_IV_SIZE) + if (crypto_skcipher_alg_ivsize(chacha) != CHACHA_IV_SIZE) goto out_drop_chacha; /* Not a stream cipher? */ if (chacha->base.cra_blocksize != 1) diff --git a/crypto/chacha_generic.c b/crypto/chacha_generic.c new file mode 100644 index 000000000000..35b583101f4f --- /dev/null +++ b/crypto/chacha_generic.c @@ -0,0 +1,217 @@ +/* + * ChaCha and XChaCha stream ciphers, including ChaCha20 (RFC7539) + * + * Copyright (C) 2015 Martin Willi + * Copyright (C) 2018 Google LLC + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include + +static void chacha_docrypt(u32 *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) +{ + /* aligned to potentially speed up crypto_xor() */ + u8 stream[CHACHA_BLOCK_SIZE] __aligned(sizeof(long)); + + if (dst != src) + memcpy(dst, src, bytes); + + while (bytes >= CHACHA_BLOCK_SIZE) { + chacha_block(state, stream, nrounds); + crypto_xor(dst, stream, CHACHA_BLOCK_SIZE); + bytes -= CHACHA_BLOCK_SIZE; + dst += CHACHA_BLOCK_SIZE; + } + if (bytes) { + chacha_block(state, stream, nrounds); + crypto_xor(dst, stream, bytes); + } +} + +static int chacha_stream_xor(struct skcipher_request *req, + struct chacha_ctx *ctx, u8 *iv) +{ + struct skcipher_walk walk; + u32 state[16]; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + crypto_chacha_init(state, ctx, iv); + + while (walk.nbytes > 0) { + unsigned int nbytes = walk.nbytes; + + if (nbytes < walk.total) + nbytes = round_down(nbytes, walk.stride); + + chacha_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr, + nbytes, ctx->nrounds); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + } + + return err; +} + +void crypto_chacha_init(u32 *state, struct chacha_ctx *ctx, u8 *iv) +{ + state[0] = 0x61707865; /* "expa" */ + state[1] = 0x3320646e; /* "nd 3" */ + state[2] = 0x79622d32; /* "2-by" */ + state[3] = 0x6b206574; /* "te k" */ + state[4] = ctx->key[0]; + state[5] = ctx->key[1]; + state[6] = ctx->key[2]; + state[7] = ctx->key[3]; + state[8] = ctx->key[4]; + state[9] = ctx->key[5]; + state[10] = ctx->key[6]; + state[11] = ctx->key[7]; + state[12] = get_unaligned_le32(iv + 0); + state[13] = get_unaligned_le32(iv + 4); + state[14] = get_unaligned_le32(iv + 8); + state[15] = get_unaligned_le32(iv + 12); +} +EXPORT_SYMBOL_GPL(crypto_chacha_init); + +static int chacha_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keysize, int nrounds) +{ + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + int i; + + if (keysize != CHACHA_KEY_SIZE) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(ctx->key); i++) + ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32)); + + ctx->nrounds = nrounds; + return 0; +} + +int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keysize) +{ + return chacha_setkey(tfm, key, keysize, 20); +} +EXPORT_SYMBOL_GPL(crypto_chacha20_setkey); + +int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keysize) +{ + return chacha_setkey(tfm, key, keysize, 12); +} +EXPORT_SYMBOL_GPL(crypto_chacha12_setkey); + +int crypto_chacha_crypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + + return chacha_stream_xor(req, ctx, req->iv); +} +EXPORT_SYMBOL_GPL(crypto_chacha_crypt); + +int crypto_xchacha_crypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + struct chacha_ctx subctx; + u32 state[16]; + u8 real_iv[16]; + + /* Compute the subkey given the original key and first 128 nonce bits */ + crypto_chacha_init(state, ctx, req->iv); + hchacha_block(state, subctx.key, ctx->nrounds); + subctx.nrounds = ctx->nrounds; + + /* Build the real IV */ + memcpy(&real_iv[0], req->iv + 24, 8); /* stream position */ + memcpy(&real_iv[8], req->iv + 16, 8); /* remaining 64 nonce bits */ + + /* Generate the stream and XOR it with the data */ + return chacha_stream_xor(req, &subctx, real_iv); +} +EXPORT_SYMBOL_GPL(crypto_xchacha_crypt); + +static struct skcipher_alg algs[] = { + { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-generic", + .base.cra_priority = 100, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = CHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = crypto_chacha_crypt, + .decrypt = crypto_chacha_crypt, + }, { + .base.cra_name = "xchacha20", + .base.cra_driver_name = "xchacha20-generic", + .base.cra_priority = 100, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = crypto_xchacha_crypt, + .decrypt = crypto_xchacha_crypt, + }, { + .base.cra_name = "xchacha12", + .base.cra_driver_name = "xchacha12-generic", + .base.cra_priority = 100, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = crypto_chacha12_setkey, + .encrypt = crypto_xchacha_crypt, + .decrypt = crypto_xchacha_crypt, + } +}; + +static int __init chacha_generic_mod_init(void) +{ + return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); +} + +static void __exit chacha_generic_mod_fini(void) +{ + crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); +} + +module_init(chacha_generic_mod_init); +module_exit(chacha_generic_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Martin Willi "); +MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (generic)"); +MODULE_ALIAS_CRYPTO("chacha20"); +MODULE_ALIAS_CRYPTO("chacha20-generic"); +MODULE_ALIAS_CRYPTO("xchacha20"); +MODULE_ALIAS_CRYPTO("xchacha20-generic"); +MODULE_ALIAS_CRYPTO("xchacha12"); +MODULE_ALIAS_CRYPTO("xchacha12-generic"); diff --git a/crypto/cryptd.c b/crypto/cryptd.c index 7118fb5efbaa..5640e5db7bdb 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -422,8 +422,6 @@ static int cryptd_create_blkcipher(struct crypto_template *tmpl, inst->alg.cra_ablkcipher.min_keysize = alg->cra_blkcipher.min_keysize; inst->alg.cra_ablkcipher.max_keysize = alg->cra_blkcipher.max_keysize; - inst->alg.cra_ablkcipher.geniv = alg->cra_blkcipher.geniv; - inst->alg.cra_ctxsize = sizeof(struct cryptd_blkcipher_ctx); inst->alg.cra_init = cryptd_blkcipher_init_tfm; @@ -1174,7 +1172,7 @@ struct cryptd_ablkcipher *cryptd_alloc_ablkcipher(const char *alg_name, return ERR_PTR(-EINVAL); type = crypto_skcipher_type(type); mask &= ~CRYPTO_ALG_TYPE_MASK; - mask |= (CRYPTO_ALG_GENIV | CRYPTO_ALG_TYPE_BLKCIPHER_MASK); + mask |= CRYPTO_ALG_TYPE_BLKCIPHER_MASK; tfm = crypto_alloc_base(cryptd_alg_name, type, mask); if (IS_ERR(tfm)) return ERR_CAST(tfm); diff --git a/crypto/crypto_user_base.c b/crypto/crypto_user_base.c index 784748dbb19f..f25d3f32c9c2 100644 --- a/crypto/crypto_user_base.c +++ b/crypto/crypto_user_base.c @@ -84,87 +84,38 @@ static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_cipher rcipher; - strncpy(rcipher.type, "cipher", sizeof(rcipher.type)); + memset(&rcipher, 0, sizeof(rcipher)); + + strscpy(rcipher.type, "cipher", sizeof(rcipher.type)); rcipher.blocksize = alg->cra_blocksize; rcipher.min_keysize = alg->cra_cipher.cia_min_keysize; rcipher.max_keysize = alg->cra_cipher.cia_max_keysize; - if (nla_put(skb, CRYPTOCFGA_REPORT_CIPHER, - sizeof(struct crypto_report_cipher), &rcipher)) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -EMSGSIZE; + return nla_put(skb, CRYPTOCFGA_REPORT_CIPHER, + sizeof(rcipher), &rcipher); } static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_comp rcomp; - strncpy(rcomp.type, "compression", sizeof(rcomp.type)); - if (nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS, - sizeof(struct crypto_report_comp), &rcomp)) - goto nla_put_failure; - return 0; + memset(&rcomp, 0, sizeof(rcomp)); -nla_put_failure: - return -EMSGSIZE; -} + strscpy(rcomp.type, "compression", sizeof(rcomp.type)); -static int crypto_report_acomp(struct sk_buff *skb, struct crypto_alg *alg) -{ - struct crypto_report_acomp racomp; - - strncpy(racomp.type, "acomp", sizeof(racomp.type)); - - if (nla_put(skb, CRYPTOCFGA_REPORT_ACOMP, - sizeof(struct crypto_report_acomp), &racomp)) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -EMSGSIZE; -} - -static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg) -{ - struct crypto_report_akcipher rakcipher; - - strncpy(rakcipher.type, "akcipher", sizeof(rakcipher.type)); - - if (nla_put(skb, CRYPTOCFGA_REPORT_AKCIPHER, - sizeof(struct crypto_report_akcipher), &rakcipher)) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -EMSGSIZE; -} - -static int crypto_report_kpp(struct sk_buff *skb, struct crypto_alg *alg) -{ - struct crypto_report_kpp rkpp; - - strncpy(rkpp.type, "kpp", sizeof(rkpp.type)); - - if (nla_put(skb, CRYPTOCFGA_REPORT_KPP, - sizeof(struct crypto_report_kpp), &rkpp)) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -EMSGSIZE; + return nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS, sizeof(rcomp), &rcomp); } static int crypto_report_one(struct crypto_alg *alg, struct crypto_user_alg *ualg, struct sk_buff *skb) { - strncpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name)); - strncpy(ualg->cru_driver_name, alg->cra_driver_name, + memset(ualg, 0, sizeof(*ualg)); + + strscpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name)); + strscpy(ualg->cru_driver_name, alg->cra_driver_name, sizeof(ualg->cru_driver_name)); - strncpy(ualg->cru_module_name, module_name(alg->cra_module), + strscpy(ualg->cru_module_name, module_name(alg->cra_module), sizeof(ualg->cru_module_name)); ualg->cru_type = 0; @@ -177,9 +128,9 @@ static int crypto_report_one(struct crypto_alg *alg, if (alg->cra_flags & CRYPTO_ALG_LARVAL) { struct crypto_report_larval rl; - strncpy(rl.type, "larval", sizeof(rl.type)); - if (nla_put(skb, CRYPTOCFGA_REPORT_LARVAL, - sizeof(struct crypto_report_larval), &rl)) + memset(&rl, 0, sizeof(rl)); + strscpy(rl.type, "larval", sizeof(rl.type)); + if (nla_put(skb, CRYPTOCFGA_REPORT_LARVAL, sizeof(rl), &rl)) goto nla_put_failure; goto out; } @@ -202,20 +153,6 @@ static int crypto_report_one(struct crypto_alg *alg, goto nla_put_failure; break; - case CRYPTO_ALG_TYPE_ACOMPRESS: - if (crypto_report_acomp(skb, alg)) - goto nla_put_failure; - - break; - case CRYPTO_ALG_TYPE_AKCIPHER: - if (crypto_report_akcipher(skb, alg)) - goto nla_put_failure; - - break; - case CRYPTO_ALG_TYPE_KPP: - if (crypto_report_kpp(skb, alg)) - goto nla_put_failure; - break; } out: @@ -294,30 +231,33 @@ drop_alg: static int crypto_dump_report(struct sk_buff *skb, struct netlink_callback *cb) { - struct crypto_alg *alg; + const size_t start_pos = cb->args[0]; + size_t pos = 0; struct crypto_dump_info info; - int err; - - if (cb->args[0]) - goto out; - - cb->args[0] = 1; + struct crypto_alg *alg; + int res; info.in_skb = cb->skb; info.out_skb = skb; info.nlmsg_seq = cb->nlh->nlmsg_seq; info.nlmsg_flags = NLM_F_MULTI; + down_read(&crypto_alg_sem); list_for_each_entry(alg, &crypto_alg_list, cra_list) { - err = crypto_report_alg(alg, &info); - if (err) - goto out_err; + if (pos >= start_pos) { + res = crypto_report_alg(alg, &info); + if (res == -EMSGSIZE) + break; + if (res) + goto out; + } + pos++; } - + cb->args[0] = pos; + res = skb->len; out: - return skb->len; -out_err: - return err; + up_read(&crypto_alg_sem); + return res; } static int crypto_dump_report_done(struct netlink_callback *cb) @@ -483,9 +423,7 @@ static const struct crypto_link { .dump = crypto_dump_report, .done = crypto_dump_report_done}, [CRYPTO_MSG_DELRNG - CRYPTO_MSG_BASE] = { .doit = crypto_del_rng }, - [CRYPTO_MSG_GETSTAT - CRYPTO_MSG_BASE] = { .doit = crypto_reportstat, - .dump = crypto_dump_reportstat, - .done = crypto_dump_reportstat_done}, + [CRYPTO_MSG_GETSTAT - CRYPTO_MSG_BASE] = { .doit = crypto_reportstat}, }; static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -505,7 +443,7 @@ static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, if ((type == (CRYPTO_MSG_GETALG - CRYPTO_MSG_BASE) && (nlh->nlmsg_flags & NLM_F_DUMP))) { struct crypto_alg *alg; - u16 dump_alloc = 0; + unsigned long dump_alloc = 0; if (link->dump == NULL) return -EINVAL; @@ -513,16 +451,16 @@ static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, down_read(&crypto_alg_sem); list_for_each_entry(alg, &crypto_alg_list, cra_list) dump_alloc += CRYPTO_REPORT_MAXSIZE; + up_read(&crypto_alg_sem); { struct netlink_dump_control c = { .dump = link->dump, .done = link->done, - .min_dump_alloc = dump_alloc, + .min_dump_alloc = min(dump_alloc, 65535UL), }; err = netlink_dump_start(crypto_nlsk, skb, nlh, &c); } - up_read(&crypto_alg_sem); return err; } diff --git a/crypto/crypto_user_stat.c b/crypto/crypto_user_stat.c index 1dfaa0ccd555..3e9a53233d80 100644 --- a/crypto/crypto_user_stat.c +++ b/crypto/crypto_user_stat.c @@ -33,260 +33,149 @@ struct crypto_dump_info { static int crypto_report_aead(struct sk_buff *skb, struct crypto_alg *alg) { - struct crypto_stat raead; - u64 v64; - u32 v32; + struct crypto_stat_aead raead; memset(&raead, 0, sizeof(raead)); - strncpy(raead.type, "aead", sizeof(raead.type)); + strscpy(raead.type, "aead", sizeof(raead.type)); - v32 = atomic_read(&alg->encrypt_cnt); - raead.stat_encrypt_cnt = v32; - v64 = atomic64_read(&alg->encrypt_tlen); - raead.stat_encrypt_tlen = v64; - v32 = atomic_read(&alg->decrypt_cnt); - raead.stat_decrypt_cnt = v32; - v64 = atomic64_read(&alg->decrypt_tlen); - raead.stat_decrypt_tlen = v64; - v32 = atomic_read(&alg->aead_err_cnt); - raead.stat_aead_err_cnt = v32; + raead.stat_encrypt_cnt = atomic64_read(&alg->stats.aead.encrypt_cnt); + raead.stat_encrypt_tlen = atomic64_read(&alg->stats.aead.encrypt_tlen); + raead.stat_decrypt_cnt = atomic64_read(&alg->stats.aead.decrypt_cnt); + raead.stat_decrypt_tlen = atomic64_read(&alg->stats.aead.decrypt_tlen); + raead.stat_err_cnt = atomic64_read(&alg->stats.aead.err_cnt); - if (nla_put(skb, CRYPTOCFGA_STAT_AEAD, - sizeof(struct crypto_stat), &raead)) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -EMSGSIZE; + return nla_put(skb, CRYPTOCFGA_STAT_AEAD, sizeof(raead), &raead); } static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg) { - struct crypto_stat rcipher; - u64 v64; - u32 v32; + struct crypto_stat_cipher rcipher; memset(&rcipher, 0, sizeof(rcipher)); - strlcpy(rcipher.type, "cipher", sizeof(rcipher.type)); + strscpy(rcipher.type, "cipher", sizeof(rcipher.type)); - v32 = atomic_read(&alg->encrypt_cnt); - rcipher.stat_encrypt_cnt = v32; - v64 = atomic64_read(&alg->encrypt_tlen); - rcipher.stat_encrypt_tlen = v64; - v32 = atomic_read(&alg->decrypt_cnt); - rcipher.stat_decrypt_cnt = v32; - v64 = atomic64_read(&alg->decrypt_tlen); - rcipher.stat_decrypt_tlen = v64; - v32 = atomic_read(&alg->cipher_err_cnt); - rcipher.stat_cipher_err_cnt = v32; + rcipher.stat_encrypt_cnt = atomic64_read(&alg->stats.cipher.encrypt_cnt); + rcipher.stat_encrypt_tlen = atomic64_read(&alg->stats.cipher.encrypt_tlen); + rcipher.stat_decrypt_cnt = atomic64_read(&alg->stats.cipher.decrypt_cnt); + rcipher.stat_decrypt_tlen = atomic64_read(&alg->stats.cipher.decrypt_tlen); + rcipher.stat_err_cnt = atomic64_read(&alg->stats.cipher.err_cnt); - if (nla_put(skb, CRYPTOCFGA_STAT_CIPHER, - sizeof(struct crypto_stat), &rcipher)) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -EMSGSIZE; + return nla_put(skb, CRYPTOCFGA_STAT_CIPHER, sizeof(rcipher), &rcipher); } static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg) { - struct crypto_stat rcomp; - u64 v64; - u32 v32; + struct crypto_stat_compress rcomp; memset(&rcomp, 0, sizeof(rcomp)); - strlcpy(rcomp.type, "compression", sizeof(rcomp.type)); - v32 = atomic_read(&alg->compress_cnt); - rcomp.stat_compress_cnt = v32; - v64 = atomic64_read(&alg->compress_tlen); - rcomp.stat_compress_tlen = v64; - v32 = atomic_read(&alg->decompress_cnt); - rcomp.stat_decompress_cnt = v32; - v64 = atomic64_read(&alg->decompress_tlen); - rcomp.stat_decompress_tlen = v64; - v32 = atomic_read(&alg->cipher_err_cnt); - rcomp.stat_compress_err_cnt = v32; + strscpy(rcomp.type, "compression", sizeof(rcomp.type)); + rcomp.stat_compress_cnt = atomic64_read(&alg->stats.compress.compress_cnt); + rcomp.stat_compress_tlen = atomic64_read(&alg->stats.compress.compress_tlen); + rcomp.stat_decompress_cnt = atomic64_read(&alg->stats.compress.decompress_cnt); + rcomp.stat_decompress_tlen = atomic64_read(&alg->stats.compress.decompress_tlen); + rcomp.stat_err_cnt = atomic64_read(&alg->stats.compress.err_cnt); - if (nla_put(skb, CRYPTOCFGA_STAT_COMPRESS, - sizeof(struct crypto_stat), &rcomp)) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -EMSGSIZE; + return nla_put(skb, CRYPTOCFGA_STAT_COMPRESS, sizeof(rcomp), &rcomp); } static int crypto_report_acomp(struct sk_buff *skb, struct crypto_alg *alg) { - struct crypto_stat racomp; - u64 v64; - u32 v32; + struct crypto_stat_compress racomp; memset(&racomp, 0, sizeof(racomp)); - strlcpy(racomp.type, "acomp", sizeof(racomp.type)); - v32 = atomic_read(&alg->compress_cnt); - racomp.stat_compress_cnt = v32; - v64 = atomic64_read(&alg->compress_tlen); - racomp.stat_compress_tlen = v64; - v32 = atomic_read(&alg->decompress_cnt); - racomp.stat_decompress_cnt = v32; - v64 = atomic64_read(&alg->decompress_tlen); - racomp.stat_decompress_tlen = v64; - v32 = atomic_read(&alg->cipher_err_cnt); - racomp.stat_compress_err_cnt = v32; + strscpy(racomp.type, "acomp", sizeof(racomp.type)); + racomp.stat_compress_cnt = atomic64_read(&alg->stats.compress.compress_cnt); + racomp.stat_compress_tlen = atomic64_read(&alg->stats.compress.compress_tlen); + racomp.stat_decompress_cnt = atomic64_read(&alg->stats.compress.decompress_cnt); + racomp.stat_decompress_tlen = atomic64_read(&alg->stats.compress.decompress_tlen); + racomp.stat_err_cnt = atomic64_read(&alg->stats.compress.err_cnt); - if (nla_put(skb, CRYPTOCFGA_STAT_ACOMP, - sizeof(struct crypto_stat), &racomp)) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -EMSGSIZE; + return nla_put(skb, CRYPTOCFGA_STAT_ACOMP, sizeof(racomp), &racomp); } static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg) { - struct crypto_stat rakcipher; - u64 v64; - u32 v32; + struct crypto_stat_akcipher rakcipher; memset(&rakcipher, 0, sizeof(rakcipher)); - strncpy(rakcipher.type, "akcipher", sizeof(rakcipher.type)); - v32 = atomic_read(&alg->encrypt_cnt); - rakcipher.stat_encrypt_cnt = v32; - v64 = atomic64_read(&alg->encrypt_tlen); - rakcipher.stat_encrypt_tlen = v64; - v32 = atomic_read(&alg->decrypt_cnt); - rakcipher.stat_decrypt_cnt = v32; - v64 = atomic64_read(&alg->decrypt_tlen); - rakcipher.stat_decrypt_tlen = v64; - v32 = atomic_read(&alg->sign_cnt); - rakcipher.stat_sign_cnt = v32; - v32 = atomic_read(&alg->verify_cnt); - rakcipher.stat_verify_cnt = v32; - v32 = atomic_read(&alg->akcipher_err_cnt); - rakcipher.stat_akcipher_err_cnt = v32; + strscpy(rakcipher.type, "akcipher", sizeof(rakcipher.type)); + rakcipher.stat_encrypt_cnt = atomic64_read(&alg->stats.akcipher.encrypt_cnt); + rakcipher.stat_encrypt_tlen = atomic64_read(&alg->stats.akcipher.encrypt_tlen); + rakcipher.stat_decrypt_cnt = atomic64_read(&alg->stats.akcipher.decrypt_cnt); + rakcipher.stat_decrypt_tlen = atomic64_read(&alg->stats.akcipher.decrypt_tlen); + rakcipher.stat_sign_cnt = atomic64_read(&alg->stats.akcipher.sign_cnt); + rakcipher.stat_verify_cnt = atomic64_read(&alg->stats.akcipher.verify_cnt); + rakcipher.stat_err_cnt = atomic64_read(&alg->stats.akcipher.err_cnt); - if (nla_put(skb, CRYPTOCFGA_STAT_AKCIPHER, - sizeof(struct crypto_stat), &rakcipher)) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -EMSGSIZE; + return nla_put(skb, CRYPTOCFGA_STAT_AKCIPHER, + sizeof(rakcipher), &rakcipher); } static int crypto_report_kpp(struct sk_buff *skb, struct crypto_alg *alg) { - struct crypto_stat rkpp; - u32 v; + struct crypto_stat_kpp rkpp; memset(&rkpp, 0, sizeof(rkpp)); - strlcpy(rkpp.type, "kpp", sizeof(rkpp.type)); + strscpy(rkpp.type, "kpp", sizeof(rkpp.type)); - v = atomic_read(&alg->setsecret_cnt); - rkpp.stat_setsecret_cnt = v; - v = atomic_read(&alg->generate_public_key_cnt); - rkpp.stat_generate_public_key_cnt = v; - v = atomic_read(&alg->compute_shared_secret_cnt); - rkpp.stat_compute_shared_secret_cnt = v; - v = atomic_read(&alg->kpp_err_cnt); - rkpp.stat_kpp_err_cnt = v; + rkpp.stat_setsecret_cnt = atomic64_read(&alg->stats.kpp.setsecret_cnt); + rkpp.stat_generate_public_key_cnt = atomic64_read(&alg->stats.kpp.generate_public_key_cnt); + rkpp.stat_compute_shared_secret_cnt = atomic64_read(&alg->stats.kpp.compute_shared_secret_cnt); + rkpp.stat_err_cnt = atomic64_read(&alg->stats.kpp.err_cnt); - if (nla_put(skb, CRYPTOCFGA_STAT_KPP, - sizeof(struct crypto_stat), &rkpp)) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -EMSGSIZE; + return nla_put(skb, CRYPTOCFGA_STAT_KPP, sizeof(rkpp), &rkpp); } static int crypto_report_ahash(struct sk_buff *skb, struct crypto_alg *alg) { - struct crypto_stat rhash; - u64 v64; - u32 v32; + struct crypto_stat_hash rhash; memset(&rhash, 0, sizeof(rhash)); - strncpy(rhash.type, "ahash", sizeof(rhash.type)); + strscpy(rhash.type, "ahash", sizeof(rhash.type)); - v32 = atomic_read(&alg->hash_cnt); - rhash.stat_hash_cnt = v32; - v64 = atomic64_read(&alg->hash_tlen); - rhash.stat_hash_tlen = v64; - v32 = atomic_read(&alg->hash_err_cnt); - rhash.stat_hash_err_cnt = v32; + rhash.stat_hash_cnt = atomic64_read(&alg->stats.hash.hash_cnt); + rhash.stat_hash_tlen = atomic64_read(&alg->stats.hash.hash_tlen); + rhash.stat_err_cnt = atomic64_read(&alg->stats.hash.err_cnt); - if (nla_put(skb, CRYPTOCFGA_STAT_HASH, - sizeof(struct crypto_stat), &rhash)) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -EMSGSIZE; + return nla_put(skb, CRYPTOCFGA_STAT_HASH, sizeof(rhash), &rhash); } static int crypto_report_shash(struct sk_buff *skb, struct crypto_alg *alg) { - struct crypto_stat rhash; - u64 v64; - u32 v32; + struct crypto_stat_hash rhash; memset(&rhash, 0, sizeof(rhash)); - strncpy(rhash.type, "shash", sizeof(rhash.type)); + strscpy(rhash.type, "shash", sizeof(rhash.type)); - v32 = atomic_read(&alg->hash_cnt); - rhash.stat_hash_cnt = v32; - v64 = atomic64_read(&alg->hash_tlen); - rhash.stat_hash_tlen = v64; - v32 = atomic_read(&alg->hash_err_cnt); - rhash.stat_hash_err_cnt = v32; + rhash.stat_hash_cnt = atomic64_read(&alg->stats.hash.hash_cnt); + rhash.stat_hash_tlen = atomic64_read(&alg->stats.hash.hash_tlen); + rhash.stat_err_cnt = atomic64_read(&alg->stats.hash.err_cnt); - if (nla_put(skb, CRYPTOCFGA_STAT_HASH, - sizeof(struct crypto_stat), &rhash)) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -EMSGSIZE; + return nla_put(skb, CRYPTOCFGA_STAT_HASH, sizeof(rhash), &rhash); } static int crypto_report_rng(struct sk_buff *skb, struct crypto_alg *alg) { - struct crypto_stat rrng; - u64 v64; - u32 v32; + struct crypto_stat_rng rrng; memset(&rrng, 0, sizeof(rrng)); - strncpy(rrng.type, "rng", sizeof(rrng.type)); + strscpy(rrng.type, "rng", sizeof(rrng.type)); - v32 = atomic_read(&alg->generate_cnt); - rrng.stat_generate_cnt = v32; - v64 = atomic64_read(&alg->generate_tlen); - rrng.stat_generate_tlen = v64; - v32 = atomic_read(&alg->seed_cnt); - rrng.stat_seed_cnt = v32; - v32 = atomic_read(&alg->hash_err_cnt); - rrng.stat_rng_err_cnt = v32; + rrng.stat_generate_cnt = atomic64_read(&alg->stats.rng.generate_cnt); + rrng.stat_generate_tlen = atomic64_read(&alg->stats.rng.generate_tlen); + rrng.stat_seed_cnt = atomic64_read(&alg->stats.rng.seed_cnt); + rrng.stat_err_cnt = atomic64_read(&alg->stats.rng.err_cnt); - if (nla_put(skb, CRYPTOCFGA_STAT_RNG, - sizeof(struct crypto_stat), &rrng)) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -EMSGSIZE; + return nla_put(skb, CRYPTOCFGA_STAT_RNG, sizeof(rrng), &rrng); } static int crypto_reportstat_one(struct crypto_alg *alg, @@ -295,10 +184,10 @@ static int crypto_reportstat_one(struct crypto_alg *alg, { memset(ualg, 0, sizeof(*ualg)); - strlcpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name)); - strlcpy(ualg->cru_driver_name, alg->cra_driver_name, + strscpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name)); + strscpy(ualg->cru_driver_name, alg->cra_driver_name, sizeof(ualg->cru_driver_name)); - strlcpy(ualg->cru_module_name, module_name(alg->cra_module), + strscpy(ualg->cru_module_name, module_name(alg->cra_module), sizeof(ualg->cru_module_name)); ualg->cru_type = 0; @@ -309,12 +198,11 @@ static int crypto_reportstat_one(struct crypto_alg *alg, if (nla_put_u32(skb, CRYPTOCFGA_PRIORITY_VAL, alg->cra_priority)) goto nla_put_failure; if (alg->cra_flags & CRYPTO_ALG_LARVAL) { - struct crypto_stat rl; + struct crypto_stat_larval rl; memset(&rl, 0, sizeof(rl)); - strlcpy(rl.type, "larval", sizeof(rl.type)); - if (nla_put(skb, CRYPTOCFGA_STAT_LARVAL, - sizeof(struct crypto_stat), &rl)) + strscpy(rl.type, "larval", sizeof(rl.type)); + if (nla_put(skb, CRYPTOCFGA_STAT_LARVAL, sizeof(rl), &rl)) goto nla_put_failure; goto out; } @@ -448,37 +336,4 @@ drop_alg: return nlmsg_unicast(crypto_nlsk, skb, NETLINK_CB(in_skb).portid); } -int crypto_dump_reportstat(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct crypto_alg *alg; - struct crypto_dump_info info; - int err; - - if (cb->args[0]) - goto out; - - cb->args[0] = 1; - - info.in_skb = cb->skb; - info.out_skb = skb; - info.nlmsg_seq = cb->nlh->nlmsg_seq; - info.nlmsg_flags = NLM_F_MULTI; - - list_for_each_entry(alg, &crypto_alg_list, cra_list) { - err = crypto_reportstat_alg(alg, &info); - if (err) - goto out_err; - } - -out: - return skb->len; -out_err: - return err; -} - -int crypto_dump_reportstat_done(struct netlink_callback *cb) -{ - return 0; -} - MODULE_LICENSE("GPL"); diff --git a/crypto/ctr.c b/crypto/ctr.c index 435b75bd619e..30f3946efc6d 100644 --- a/crypto/ctr.c +++ b/crypto/ctr.c @@ -233,8 +233,6 @@ static struct crypto_instance *crypto_ctr_alloc(struct rtattr **tb) inst->alg.cra_blkcipher.encrypt = crypto_ctr_crypt; inst->alg.cra_blkcipher.decrypt = crypto_ctr_crypt; - inst->alg.cra_blkcipher.geniv = "chainiv"; - out: crypto_mod_put(alg); return inst; diff --git a/crypto/ecc.c b/crypto/ecc.c index 8facafd67802..ed1237115066 100644 --- a/crypto/ecc.c +++ b/crypto/ecc.c @@ -842,15 +842,23 @@ static void xycz_add_c(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime, static void ecc_point_mult(struct ecc_point *result, const struct ecc_point *point, const u64 *scalar, - u64 *initial_z, u64 *curve_prime, + u64 *initial_z, const struct ecc_curve *curve, unsigned int ndigits) { /* R0 and R1 */ u64 rx[2][ECC_MAX_DIGITS]; u64 ry[2][ECC_MAX_DIGITS]; u64 z[ECC_MAX_DIGITS]; + u64 sk[2][ECC_MAX_DIGITS]; + u64 *curve_prime = curve->p; int i, nb; - int num_bits = vli_num_bits(scalar, ndigits); + int num_bits; + int carry; + + carry = vli_add(sk[0], scalar, curve->n, ndigits); + vli_add(sk[1], sk[0], curve->n, ndigits); + scalar = sk[!carry]; + num_bits = sizeof(u64) * ndigits * 8 + 1; vli_set(rx[1], point->x, ndigits); vli_set(ry[1], point->y, ndigits); @@ -904,28 +912,41 @@ static inline void ecc_swap_digits(const u64 *in, u64 *out, out[i] = __swab64(in[ndigits - 1 - i]); } +static int __ecc_is_key_valid(const struct ecc_curve *curve, + const u64 *private_key, unsigned int ndigits) +{ + u64 one[ECC_MAX_DIGITS] = { 1, }; + u64 res[ECC_MAX_DIGITS]; + + if (!private_key) + return -EINVAL; + + if (curve->g.ndigits != ndigits) + return -EINVAL; + + /* Make sure the private key is in the range [2, n-3]. */ + if (vli_cmp(one, private_key, ndigits) != -1) + return -EINVAL; + vli_sub(res, curve->n, one, ndigits); + vli_sub(res, res, one, ndigits); + if (vli_cmp(res, private_key, ndigits) != 1) + return -EINVAL; + + return 0; +} + int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits, const u64 *private_key, unsigned int private_key_len) { int nbytes; const struct ecc_curve *curve = ecc_get_curve(curve_id); - if (!private_key) - return -EINVAL; - nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT; if (private_key_len != nbytes) return -EINVAL; - if (vli_is_zero(private_key, ndigits)) - return -EINVAL; - - /* Make sure the private key is in the range [1, n-1]. */ - if (vli_cmp(curve->n, private_key, ndigits) != 1) - return -EINVAL; - - return 0; + return __ecc_is_key_valid(curve, private_key, ndigits); } /* @@ -971,11 +992,8 @@ int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, u64 *privkey) if (err) return err; - if (vli_is_zero(priv, ndigits)) - return -EINVAL; - - /* Make sure the private key is in the range [1, n-1]. */ - if (vli_cmp(curve->n, priv, ndigits) != 1) + /* Make sure the private key is in the valid range. */ + if (__ecc_is_key_valid(curve, priv, ndigits)) return -EINVAL; ecc_swap_digits(priv, privkey, ndigits); @@ -1004,7 +1022,7 @@ int ecc_make_pub_key(unsigned int curve_id, unsigned int ndigits, goto out; } - ecc_point_mult(pk, &curve->g, priv, NULL, curve->p, ndigits); + ecc_point_mult(pk, &curve->g, priv, NULL, curve, ndigits); if (ecc_point_is_zero(pk)) { ret = -EAGAIN; goto err_free_point; @@ -1090,7 +1108,7 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits, goto err_alloc_product; } - ecc_point_mult(product, pk, priv, rand_z, curve->p, ndigits); + ecc_point_mult(product, pk, priv, rand_z, curve, ndigits); ecc_swap_digits(product->x, secret, ndigits); diff --git a/crypto/hash_info.c b/crypto/hash_info.c index 7b1e0b188ce6..1dd095e4b451 100644 --- a/crypto/hash_info.c +++ b/crypto/hash_info.c @@ -32,6 +32,8 @@ const char *const hash_algo_name[HASH_ALGO__LAST] = { [HASH_ALGO_TGR_160] = "tgr160", [HASH_ALGO_TGR_192] = "tgr192", [HASH_ALGO_SM3_256] = "sm3-256", + [HASH_ALGO_STREEBOG_256] = "streebog256", + [HASH_ALGO_STREEBOG_512] = "streebog512", }; EXPORT_SYMBOL_GPL(hash_algo_name); @@ -54,5 +56,7 @@ const int hash_digest_size[HASH_ALGO__LAST] = { [HASH_ALGO_TGR_160] = TGR160_DIGEST_SIZE, [HASH_ALGO_TGR_192] = TGR192_DIGEST_SIZE, [HASH_ALGO_SM3_256] = SM3256_DIGEST_SIZE, + [HASH_ALGO_STREEBOG_256] = STREEBOG256_DIGEST_SIZE, + [HASH_ALGO_STREEBOG_512] = STREEBOG512_DIGEST_SIZE, }; EXPORT_SYMBOL_GPL(hash_digest_size); diff --git a/crypto/kpp.c b/crypto/kpp.c index a90edc27af77..bc2f1006a2f7 100644 --- a/crypto/kpp.c +++ b/crypto/kpp.c @@ -30,15 +30,11 @@ static int crypto_kpp_report(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_kpp rkpp; - strncpy(rkpp.type, "kpp", sizeof(rkpp.type)); + memset(&rkpp, 0, sizeof(rkpp)); - if (nla_put(skb, CRYPTOCFGA_REPORT_KPP, - sizeof(struct crypto_report_kpp), &rkpp)) - goto nla_put_failure; - return 0; + strscpy(rkpp.type, "kpp", sizeof(rkpp.type)); -nla_put_failure: - return -EMSGSIZE; + return nla_put(skb, CRYPTOCFGA_REPORT_KPP, sizeof(rkpp), &rkpp); } #else static int crypto_kpp_report(struct sk_buff *skb, struct crypto_alg *alg) diff --git a/crypto/lz4.c b/crypto/lz4.c index 2ce2660d3519..c160dfdbf2e0 100644 --- a/crypto/lz4.c +++ b/crypto/lz4.c @@ -122,7 +122,6 @@ static struct crypto_alg alg_lz4 = { .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, .cra_ctxsize = sizeof(struct lz4_ctx), .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(alg_lz4.cra_list), .cra_init = lz4_init, .cra_exit = lz4_exit, .cra_u = { .compress = { diff --git a/crypto/lz4hc.c b/crypto/lz4hc.c index 2be14f054daf..583b5e013d7a 100644 --- a/crypto/lz4hc.c +++ b/crypto/lz4hc.c @@ -123,7 +123,6 @@ static struct crypto_alg alg_lz4hc = { .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, .cra_ctxsize = sizeof(struct lz4hc_ctx), .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(alg_lz4hc.cra_list), .cra_init = lz4hc_init, .cra_exit = lz4hc_exit, .cra_u = { .compress = { diff --git a/crypto/nhpoly1305.c b/crypto/nhpoly1305.c new file mode 100644 index 000000000000..ec831a5594d8 --- /dev/null +++ b/crypto/nhpoly1305.c @@ -0,0 +1,254 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NHPoly1305 - ε-almost-∆-universal hash function for Adiantum + * + * Copyright 2018 Google LLC + */ + +/* + * "NHPoly1305" is the main component of Adiantum hashing. + * Specifically, it is the calculation + * + * H_L ← Poly1305_{K_L}(NH_{K_N}(pad_{128}(L))) + * + * from the procedure in section 6.4 of the Adiantum paper [1]. It is an + * ε-almost-∆-universal (ε-∆U) hash function for equal-length inputs over + * Z/(2^{128}Z), where the "∆" operation is addition. It hashes 1024-byte + * chunks of the input with the NH hash function [2], reducing the input length + * by 32x. The resulting NH digests are evaluated as a polynomial in + * GF(2^{130}-5), like in the Poly1305 MAC [3]. Note that the polynomial + * evaluation by itself would suffice to achieve the ε-∆U property; NH is used + * for performance since it's over twice as fast as Poly1305. + * + * This is *not* a cryptographic hash function; do not use it as such! + * + * [1] Adiantum: length-preserving encryption for entry-level processors + * (https://eprint.iacr.org/2018/720.pdf) + * [2] UMAC: Fast and Secure Message Authentication + * (https://fastcrypto.org/umac/umac_proc.pdf) + * [3] The Poly1305-AES message-authentication code + * (https://cr.yp.to/mac/poly1305-20050329.pdf) + */ + +#include +#include +#include +#include +#include +#include +#include + +static void nh_generic(const u32 *key, const u8 *message, size_t message_len, + __le64 hash[NH_NUM_PASSES]) +{ + u64 sums[4] = { 0, 0, 0, 0 }; + + BUILD_BUG_ON(NH_PAIR_STRIDE != 2); + BUILD_BUG_ON(NH_NUM_PASSES != 4); + + while (message_len) { + u32 m0 = get_unaligned_le32(message + 0); + u32 m1 = get_unaligned_le32(message + 4); + u32 m2 = get_unaligned_le32(message + 8); + u32 m3 = get_unaligned_le32(message + 12); + + sums[0] += (u64)(u32)(m0 + key[ 0]) * (u32)(m2 + key[ 2]); + sums[1] += (u64)(u32)(m0 + key[ 4]) * (u32)(m2 + key[ 6]); + sums[2] += (u64)(u32)(m0 + key[ 8]) * (u32)(m2 + key[10]); + sums[3] += (u64)(u32)(m0 + key[12]) * (u32)(m2 + key[14]); + sums[0] += (u64)(u32)(m1 + key[ 1]) * (u32)(m3 + key[ 3]); + sums[1] += (u64)(u32)(m1 + key[ 5]) * (u32)(m3 + key[ 7]); + sums[2] += (u64)(u32)(m1 + key[ 9]) * (u32)(m3 + key[11]); + sums[3] += (u64)(u32)(m1 + key[13]) * (u32)(m3 + key[15]); + key += NH_MESSAGE_UNIT / sizeof(key[0]); + message += NH_MESSAGE_UNIT; + message_len -= NH_MESSAGE_UNIT; + } + + hash[0] = cpu_to_le64(sums[0]); + hash[1] = cpu_to_le64(sums[1]); + hash[2] = cpu_to_le64(sums[2]); + hash[3] = cpu_to_le64(sums[3]); +} + +/* Pass the next NH hash value through Poly1305 */ +static void process_nh_hash_value(struct nhpoly1305_state *state, + const struct nhpoly1305_key *key) +{ + BUILD_BUG_ON(NH_HASH_BYTES % POLY1305_BLOCK_SIZE != 0); + + poly1305_core_blocks(&state->poly_state, &key->poly_key, state->nh_hash, + NH_HASH_BYTES / POLY1305_BLOCK_SIZE); +} + +/* + * Feed the next portion of the source data, as a whole number of 16-byte + * "NH message units", through NH and Poly1305. Each NH hash is taken over + * 1024 bytes, except possibly the final one which is taken over a multiple of + * 16 bytes up to 1024. Also, in the case where data is passed in misaligned + * chunks, we combine partial hashes; the end result is the same either way. + */ +static void nhpoly1305_units(struct nhpoly1305_state *state, + const struct nhpoly1305_key *key, + const u8 *src, unsigned int srclen, nh_t nh_fn) +{ + do { + unsigned int bytes; + + if (state->nh_remaining == 0) { + /* Starting a new NH message */ + bytes = min_t(unsigned int, srclen, NH_MESSAGE_BYTES); + nh_fn(key->nh_key, src, bytes, state->nh_hash); + state->nh_remaining = NH_MESSAGE_BYTES - bytes; + } else { + /* Continuing a previous NH message */ + __le64 tmp_hash[NH_NUM_PASSES]; + unsigned int pos; + int i; + + pos = NH_MESSAGE_BYTES - state->nh_remaining; + bytes = min(srclen, state->nh_remaining); + nh_fn(&key->nh_key[pos / 4], src, bytes, tmp_hash); + for (i = 0; i < NH_NUM_PASSES; i++) + le64_add_cpu(&state->nh_hash[i], + le64_to_cpu(tmp_hash[i])); + state->nh_remaining -= bytes; + } + if (state->nh_remaining == 0) + process_nh_hash_value(state, key); + src += bytes; + srclen -= bytes; + } while (srclen); +} + +int crypto_nhpoly1305_setkey(struct crypto_shash *tfm, + const u8 *key, unsigned int keylen) +{ + struct nhpoly1305_key *ctx = crypto_shash_ctx(tfm); + int i; + + if (keylen != NHPOLY1305_KEY_SIZE) + return -EINVAL; + + poly1305_core_setkey(&ctx->poly_key, key); + key += POLY1305_BLOCK_SIZE; + + for (i = 0; i < NH_KEY_WORDS; i++) + ctx->nh_key[i] = get_unaligned_le32(key + i * sizeof(u32)); + + return 0; +} +EXPORT_SYMBOL(crypto_nhpoly1305_setkey); + +int crypto_nhpoly1305_init(struct shash_desc *desc) +{ + struct nhpoly1305_state *state = shash_desc_ctx(desc); + + poly1305_core_init(&state->poly_state); + state->buflen = 0; + state->nh_remaining = 0; + return 0; +} +EXPORT_SYMBOL(crypto_nhpoly1305_init); + +int crypto_nhpoly1305_update_helper(struct shash_desc *desc, + const u8 *src, unsigned int srclen, + nh_t nh_fn) +{ + struct nhpoly1305_state *state = shash_desc_ctx(desc); + const struct nhpoly1305_key *key = crypto_shash_ctx(desc->tfm); + unsigned int bytes; + + if (state->buflen) { + bytes = min(srclen, (int)NH_MESSAGE_UNIT - state->buflen); + memcpy(&state->buffer[state->buflen], src, bytes); + state->buflen += bytes; + if (state->buflen < NH_MESSAGE_UNIT) + return 0; + nhpoly1305_units(state, key, state->buffer, NH_MESSAGE_UNIT, + nh_fn); + state->buflen = 0; + src += bytes; + srclen -= bytes; + } + + if (srclen >= NH_MESSAGE_UNIT) { + bytes = round_down(srclen, NH_MESSAGE_UNIT); + nhpoly1305_units(state, key, src, bytes, nh_fn); + src += bytes; + srclen -= bytes; + } + + if (srclen) { + memcpy(state->buffer, src, srclen); + state->buflen = srclen; + } + return 0; +} +EXPORT_SYMBOL(crypto_nhpoly1305_update_helper); + +int crypto_nhpoly1305_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + return crypto_nhpoly1305_update_helper(desc, src, srclen, nh_generic); +} +EXPORT_SYMBOL(crypto_nhpoly1305_update); + +int crypto_nhpoly1305_final_helper(struct shash_desc *desc, u8 *dst, nh_t nh_fn) +{ + struct nhpoly1305_state *state = shash_desc_ctx(desc); + const struct nhpoly1305_key *key = crypto_shash_ctx(desc->tfm); + + if (state->buflen) { + memset(&state->buffer[state->buflen], 0, + NH_MESSAGE_UNIT - state->buflen); + nhpoly1305_units(state, key, state->buffer, NH_MESSAGE_UNIT, + nh_fn); + } + + if (state->nh_remaining) + process_nh_hash_value(state, key); + + poly1305_core_emit(&state->poly_state, dst); + return 0; +} +EXPORT_SYMBOL(crypto_nhpoly1305_final_helper); + +int crypto_nhpoly1305_final(struct shash_desc *desc, u8 *dst) +{ + return crypto_nhpoly1305_final_helper(desc, dst, nh_generic); +} +EXPORT_SYMBOL(crypto_nhpoly1305_final); + +static struct shash_alg nhpoly1305_alg = { + .base.cra_name = "nhpoly1305", + .base.cra_driver_name = "nhpoly1305-generic", + .base.cra_priority = 100, + .base.cra_ctxsize = sizeof(struct nhpoly1305_key), + .base.cra_module = THIS_MODULE, + .digestsize = POLY1305_DIGEST_SIZE, + .init = crypto_nhpoly1305_init, + .update = crypto_nhpoly1305_update, + .final = crypto_nhpoly1305_final, + .setkey = crypto_nhpoly1305_setkey, + .descsize = sizeof(struct nhpoly1305_state), +}; + +static int __init nhpoly1305_mod_init(void) +{ + return crypto_register_shash(&nhpoly1305_alg); +} + +static void __exit nhpoly1305_mod_exit(void) +{ + crypto_unregister_shash(&nhpoly1305_alg); +} + +module_init(nhpoly1305_mod_init); +module_exit(nhpoly1305_mod_exit); + +MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Eric Biggers "); +MODULE_ALIAS_CRYPTO("nhpoly1305"); +MODULE_ALIAS_CRYPTO("nhpoly1305-generic"); diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index 8eb3c4c9ff67..d47cfc47b1b1 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -394,7 +394,7 @@ static int pcrypt_sysfs_add(struct padata_instance *pinst, const char *name) int ret; pinst->kobj.kset = pcrypt_kset; - ret = kobject_add(&pinst->kobj, NULL, name); + ret = kobject_add(&pinst->kobj, NULL, "%s", name); if (!ret) kobject_uevent(&pinst->kobj, KOBJ_ADD); diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c index 47d3a6b83931..2a06874204e8 100644 --- a/crypto/poly1305_generic.c +++ b/crypto/poly1305_generic.c @@ -38,7 +38,7 @@ int crypto_poly1305_init(struct shash_desc *desc) { struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - memset(dctx->h, 0, sizeof(dctx->h)); + poly1305_core_init(&dctx->h); dctx->buflen = 0; dctx->rset = false; dctx->sset = false; @@ -47,23 +47,16 @@ int crypto_poly1305_init(struct shash_desc *desc) } EXPORT_SYMBOL_GPL(crypto_poly1305_init); -static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key) +void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key) { /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ - dctx->r[0] = (get_unaligned_le32(key + 0) >> 0) & 0x3ffffff; - dctx->r[1] = (get_unaligned_le32(key + 3) >> 2) & 0x3ffff03; - dctx->r[2] = (get_unaligned_le32(key + 6) >> 4) & 0x3ffc0ff; - dctx->r[3] = (get_unaligned_le32(key + 9) >> 6) & 0x3f03fff; - dctx->r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff; -} - -static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key) -{ - dctx->s[0] = get_unaligned_le32(key + 0); - dctx->s[1] = get_unaligned_le32(key + 4); - dctx->s[2] = get_unaligned_le32(key + 8); - dctx->s[3] = get_unaligned_le32(key + 12); + key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff; + key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03; + key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff; + key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff; + key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff; } +EXPORT_SYMBOL_GPL(poly1305_core_setkey); /* * Poly1305 requires a unique key for each tag, which implies that we can't set @@ -75,13 +68,16 @@ unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, { if (!dctx->sset) { if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) { - poly1305_setrkey(dctx, src); + poly1305_core_setkey(&dctx->r, src); src += POLY1305_BLOCK_SIZE; srclen -= POLY1305_BLOCK_SIZE; dctx->rset = true; } if (srclen >= POLY1305_BLOCK_SIZE) { - poly1305_setskey(dctx, src); + dctx->s[0] = get_unaligned_le32(src + 0); + dctx->s[1] = get_unaligned_le32(src + 4); + dctx->s[2] = get_unaligned_le32(src + 8); + dctx->s[3] = get_unaligned_le32(src + 12); src += POLY1305_BLOCK_SIZE; srclen -= POLY1305_BLOCK_SIZE; dctx->sset = true; @@ -91,41 +87,37 @@ unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, } EXPORT_SYMBOL_GPL(crypto_poly1305_setdesckey); -static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx, - const u8 *src, unsigned int srclen, - u32 hibit) +static void poly1305_blocks_internal(struct poly1305_state *state, + const struct poly1305_key *key, + const void *src, unsigned int nblocks, + u32 hibit) { u32 r0, r1, r2, r3, r4; u32 s1, s2, s3, s4; u32 h0, h1, h2, h3, h4; u64 d0, d1, d2, d3, d4; - unsigned int datalen; - if (unlikely(!dctx->sset)) { - datalen = crypto_poly1305_setdesckey(dctx, src, srclen); - src += srclen - datalen; - srclen = datalen; - } + if (!nblocks) + return; - r0 = dctx->r[0]; - r1 = dctx->r[1]; - r2 = dctx->r[2]; - r3 = dctx->r[3]; - r4 = dctx->r[4]; + r0 = key->r[0]; + r1 = key->r[1]; + r2 = key->r[2]; + r3 = key->r[3]; + r4 = key->r[4]; s1 = r1 * 5; s2 = r2 * 5; s3 = r3 * 5; s4 = r4 * 5; - h0 = dctx->h[0]; - h1 = dctx->h[1]; - h2 = dctx->h[2]; - h3 = dctx->h[3]; - h4 = dctx->h[4]; - - while (likely(srclen >= POLY1305_BLOCK_SIZE)) { + h0 = state->h[0]; + h1 = state->h[1]; + h2 = state->h[2]; + h3 = state->h[3]; + h4 = state->h[4]; + do { /* h += m[i] */ h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff; h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff; @@ -154,16 +146,36 @@ static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx, h1 += h0 >> 26; h0 = h0 & 0x3ffffff; src += POLY1305_BLOCK_SIZE; - srclen -= POLY1305_BLOCK_SIZE; + } while (--nblocks); + + state->h[0] = h0; + state->h[1] = h1; + state->h[2] = h2; + state->h[3] = h3; + state->h[4] = h4; +} + +void poly1305_core_blocks(struct poly1305_state *state, + const struct poly1305_key *key, + const void *src, unsigned int nblocks) +{ + poly1305_blocks_internal(state, key, src, nblocks, 1 << 24); +} +EXPORT_SYMBOL_GPL(poly1305_core_blocks); + +static void poly1305_blocks(struct poly1305_desc_ctx *dctx, + const u8 *src, unsigned int srclen, u32 hibit) +{ + unsigned int datalen; + + if (unlikely(!dctx->sset)) { + datalen = crypto_poly1305_setdesckey(dctx, src, srclen); + src += srclen - datalen; + srclen = datalen; } - dctx->h[0] = h0; - dctx->h[1] = h1; - dctx->h[2] = h2; - dctx->h[3] = h3; - dctx->h[4] = h4; - - return srclen; + poly1305_blocks_internal(&dctx->h, &dctx->r, + src, srclen / POLY1305_BLOCK_SIZE, hibit); } int crypto_poly1305_update(struct shash_desc *desc, @@ -187,9 +199,9 @@ int crypto_poly1305_update(struct shash_desc *desc, } if (likely(srclen >= POLY1305_BLOCK_SIZE)) { - bytes = poly1305_blocks(dctx, src, srclen, 1 << 24); - src += srclen - bytes; - srclen = bytes; + poly1305_blocks(dctx, src, srclen, 1 << 24); + src += srclen - (srclen % POLY1305_BLOCK_SIZE); + srclen %= POLY1305_BLOCK_SIZE; } if (unlikely(srclen)) { @@ -201,30 +213,18 @@ int crypto_poly1305_update(struct shash_desc *desc, } EXPORT_SYMBOL_GPL(crypto_poly1305_update); -int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) +void poly1305_core_emit(const struct poly1305_state *state, void *dst) { - struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); u32 h0, h1, h2, h3, h4; u32 g0, g1, g2, g3, g4; u32 mask; - u64 f = 0; - - if (unlikely(!dctx->sset)) - return -ENOKEY; - - if (unlikely(dctx->buflen)) { - dctx->buf[dctx->buflen++] = 1; - memset(dctx->buf + dctx->buflen, 0, - POLY1305_BLOCK_SIZE - dctx->buflen); - poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 0); - } /* fully carry h */ - h0 = dctx->h[0]; - h1 = dctx->h[1]; - h2 = dctx->h[2]; - h3 = dctx->h[3]; - h4 = dctx->h[4]; + h0 = state->h[0]; + h1 = state->h[1]; + h2 = state->h[2]; + h3 = state->h[3]; + h4 = state->h[4]; h2 += (h1 >> 26); h1 = h1 & 0x3ffffff; h3 += (h2 >> 26); h2 = h2 & 0x3ffffff; @@ -254,16 +254,40 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) h4 = (h4 & mask) | g4; /* h = h % (2^128) */ - h0 = (h0 >> 0) | (h1 << 26); - h1 = (h1 >> 6) | (h2 << 20); - h2 = (h2 >> 12) | (h3 << 14); - h3 = (h3 >> 18) | (h4 << 8); + put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0); + put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4); + put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8); + put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12); +} +EXPORT_SYMBOL_GPL(poly1305_core_emit); + +int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) +{ + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); + __le32 digest[4]; + u64 f = 0; + + if (unlikely(!dctx->sset)) + return -ENOKEY; + + if (unlikely(dctx->buflen)) { + dctx->buf[dctx->buflen++] = 1; + memset(dctx->buf + dctx->buflen, 0, + POLY1305_BLOCK_SIZE - dctx->buflen); + poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 0); + } + + poly1305_core_emit(&dctx->h, digest); /* mac = (h + s) % (2^128) */ - f = (f >> 32) + h0 + dctx->s[0]; put_unaligned_le32(f, dst + 0); - f = (f >> 32) + h1 + dctx->s[1]; put_unaligned_le32(f, dst + 4); - f = (f >> 32) + h2 + dctx->s[2]; put_unaligned_le32(f, dst + 8); - f = (f >> 32) + h3 + dctx->s[3]; put_unaligned_le32(f, dst + 12); + f = (f >> 32) + le32_to_cpu(digest[0]) + dctx->s[0]; + put_unaligned_le32(f, dst + 0); + f = (f >> 32) + le32_to_cpu(digest[1]) + dctx->s[1]; + put_unaligned_le32(f, dst + 4); + f = (f >> 32) + le32_to_cpu(digest[2]) + dctx->s[2]; + put_unaligned_le32(f, dst + 8); + f = (f >> 32) + le32_to_cpu(digest[3]) + dctx->s[3]; + put_unaligned_le32(f, dst + 12); return 0; } diff --git a/crypto/rng.c b/crypto/rng.c index 547f16ecbfb0..33c38a72bff5 100644 --- a/crypto/rng.c +++ b/crypto/rng.c @@ -35,9 +35,11 @@ static int crypto_default_rng_refcnt; int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen) { + struct crypto_alg *alg = tfm->base.__crt_alg; u8 *buf = NULL; int err; + crypto_stats_get(alg); if (!seed && slen) { buf = kmalloc(slen, GFP_KERNEL); if (!buf) @@ -50,7 +52,7 @@ int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen) } err = crypto_rng_alg(tfm)->seed(tfm, seed, slen); - crypto_stat_rng_seed(tfm, err); + crypto_stats_rng_seed(alg, err); out: kzfree(buf); return err; @@ -74,17 +76,13 @@ static int crypto_rng_report(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_rng rrng; - strncpy(rrng.type, "rng", sizeof(rrng.type)); + memset(&rrng, 0, sizeof(rrng)); + + strscpy(rrng.type, "rng", sizeof(rrng.type)); rrng.seedsize = seedsize(alg); - if (nla_put(skb, CRYPTOCFGA_REPORT_RNG, - sizeof(struct crypto_report_rng), &rrng)) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -EMSGSIZE; + return nla_put(skb, CRYPTOCFGA_REPORT_RNG, sizeof(rrng), &rrng); } #else static int crypto_rng_report(struct sk_buff *skb, struct crypto_alg *alg) diff --git a/crypto/salsa20_generic.c b/crypto/salsa20_generic.c index 8c77bc78a09f..00fce32ae17a 100644 --- a/crypto/salsa20_generic.c +++ b/crypto/salsa20_generic.c @@ -159,7 +159,7 @@ static int salsa20_crypt(struct skcipher_request *req) u32 state[16]; int err; - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); salsa20_init(state, ctx, walk.iv); diff --git a/crypto/scompress.c b/crypto/scompress.c index 968bbcf65c94..6f8305f8c300 100644 --- a/crypto/scompress.c +++ b/crypto/scompress.c @@ -40,15 +40,12 @@ static int crypto_scomp_report(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_comp rscomp; - strncpy(rscomp.type, "scomp", sizeof(rscomp.type)); + memset(&rscomp, 0, sizeof(rscomp)); - if (nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS, - sizeof(struct crypto_report_comp), &rscomp)) - goto nla_put_failure; - return 0; + strscpy(rscomp.type, "scomp", sizeof(rscomp.type)); -nla_put_failure: - return -EMSGSIZE; + return nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS, + sizeof(rscomp), &rscomp); } #else static int crypto_scomp_report(struct sk_buff *skb, struct crypto_alg *alg) diff --git a/crypto/shash.c b/crypto/shash.c index d21f04d70dce..44d297b82a8f 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -408,18 +408,14 @@ static int crypto_shash_report(struct sk_buff *skb, struct crypto_alg *alg) struct crypto_report_hash rhash; struct shash_alg *salg = __crypto_shash_alg(alg); - strncpy(rhash.type, "shash", sizeof(rhash.type)); + memset(&rhash, 0, sizeof(rhash)); + + strscpy(rhash.type, "shash", sizeof(rhash.type)); rhash.blocksize = alg->cra_blocksize; rhash.digestsize = salg->digestsize; - if (nla_put(skb, CRYPTOCFGA_REPORT_HASH, - sizeof(struct crypto_report_hash), &rhash)) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -EMSGSIZE; + return nla_put(skb, CRYPTOCFGA_REPORT_HASH, sizeof(rhash), &rhash); } #else static int crypto_shash_report(struct sk_buff *skb, struct crypto_alg *alg) diff --git a/crypto/skcipher.c b/crypto/skcipher.c index 4caab81d2d02..2a969296bc24 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -474,6 +474,8 @@ int skcipher_walk_virt(struct skcipher_walk *walk, { int err; + might_sleep_if(req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP); + walk->flags &= ~SKCIPHER_WALK_PHYS; err = skcipher_walk_skcipher(walk, req); @@ -577,8 +579,7 @@ static unsigned int crypto_skcipher_extsize(struct crypto_alg *alg) if (alg->cra_type == &crypto_blkcipher_type) return sizeof(struct crypto_blkcipher *); - if (alg->cra_type == &crypto_ablkcipher_type || - alg->cra_type == &crypto_givcipher_type) + if (alg->cra_type == &crypto_ablkcipher_type) return sizeof(struct crypto_ablkcipher *); return crypto_alg_extsize(alg); @@ -842,8 +843,7 @@ static int crypto_skcipher_init_tfm(struct crypto_tfm *tfm) if (tfm->__crt_alg->cra_type == &crypto_blkcipher_type) return crypto_init_skcipher_ops_blkcipher(tfm); - if (tfm->__crt_alg->cra_type == &crypto_ablkcipher_type || - tfm->__crt_alg->cra_type == &crypto_givcipher_type) + if (tfm->__crt_alg->cra_type == &crypto_ablkcipher_type) return crypto_init_skcipher_ops_ablkcipher(tfm); skcipher->setkey = skcipher_setkey; @@ -897,21 +897,18 @@ static int crypto_skcipher_report(struct sk_buff *skb, struct crypto_alg *alg) struct skcipher_alg *skcipher = container_of(alg, struct skcipher_alg, base); - strncpy(rblkcipher.type, "skcipher", sizeof(rblkcipher.type)); - strncpy(rblkcipher.geniv, "", sizeof(rblkcipher.geniv)); + memset(&rblkcipher, 0, sizeof(rblkcipher)); + + strscpy(rblkcipher.type, "skcipher", sizeof(rblkcipher.type)); + strscpy(rblkcipher.geniv, "", sizeof(rblkcipher.geniv)); rblkcipher.blocksize = alg->cra_blocksize; rblkcipher.min_keysize = skcipher->min_keysize; rblkcipher.max_keysize = skcipher->max_keysize; rblkcipher.ivsize = skcipher->ivsize; - if (nla_put(skb, CRYPTOCFGA_REPORT_BLKCIPHER, - sizeof(struct crypto_report_blkcipher), &rblkcipher)) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -EMSGSIZE; + return nla_put(skb, CRYPTOCFGA_REPORT_BLKCIPHER, + sizeof(rblkcipher), &rblkcipher); } #else static int crypto_skcipher_report(struct sk_buff *skb, struct crypto_alg *alg) diff --git a/crypto/streebog_generic.c b/crypto/streebog_generic.c new file mode 100644 index 000000000000..03272a22afce --- /dev/null +++ b/crypto/streebog_generic.c @@ -0,0 +1,1140 @@ +// SPDX-License-Identifier: GPL-2.0+ OR BSD-2-Clause +/* + * Streebog hash function as specified by GOST R 34.11-2012 and + * described at https://tools.ietf.org/html/rfc6986 + * + * Copyright (c) 2013 Alexey Degtyarev + * Copyright (c) 2018 Vitaly Chikunov + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include +#include +#include +#include + +static const struct streebog_uint512 buffer0 = { { + 0, 0, 0, 0, 0, 0, 0, 0 +} }; + +static const struct streebog_uint512 buffer512 = { { + cpu_to_le64(0x200), 0, 0, 0, 0, 0, 0, 0 +} }; + +static const struct streebog_uint512 C[12] = { + { { + cpu_to_le64(0xdd806559f2a64507ULL), + cpu_to_le64(0x05767436cc744d23ULL), + cpu_to_le64(0xa2422a08a460d315ULL), + cpu_to_le64(0x4b7ce09192676901ULL), + cpu_to_le64(0x714eb88d7585c4fcULL), + cpu_to_le64(0x2f6a76432e45d016ULL), + cpu_to_le64(0xebcb2f81c0657c1fULL), + cpu_to_le64(0xb1085bda1ecadae9ULL) + } }, + { { + cpu_to_le64(0xe679047021b19bb7ULL), + cpu_to_le64(0x55dda21bd7cbcd56ULL), + cpu_to_le64(0x5cb561c2db0aa7caULL), + cpu_to_le64(0x9ab5176b12d69958ULL), + cpu_to_le64(0x61d55e0f16b50131ULL), + cpu_to_le64(0xf3feea720a232b98ULL), + cpu_to_le64(0x4fe39d460f70b5d7ULL), + cpu_to_le64(0x6fa3b58aa99d2f1aULL) + } }, + { { + cpu_to_le64(0x991e96f50aba0ab2ULL), + cpu_to_le64(0xc2b6f443867adb31ULL), + cpu_to_le64(0xc1c93a376062db09ULL), + cpu_to_le64(0xd3e20fe490359eb1ULL), + cpu_to_le64(0xf2ea7514b1297b7bULL), + cpu_to_le64(0x06f15e5f529c1f8bULL), + cpu_to_le64(0x0a39fc286a3d8435ULL), + cpu_to_le64(0xf574dcac2bce2fc7ULL) + } }, + { { + cpu_to_le64(0x220cbebc84e3d12eULL), + cpu_to_le64(0x3453eaa193e837f1ULL), + cpu_to_le64(0xd8b71333935203beULL), + cpu_to_le64(0xa9d72c82ed03d675ULL), + cpu_to_le64(0x9d721cad685e353fULL), + cpu_to_le64(0x488e857e335c3c7dULL), + cpu_to_le64(0xf948e1a05d71e4ddULL), + cpu_to_le64(0xef1fdfb3e81566d2ULL) + } }, + { { + cpu_to_le64(0x601758fd7c6cfe57ULL), + cpu_to_le64(0x7a56a27ea9ea63f5ULL), + cpu_to_le64(0xdfff00b723271a16ULL), + cpu_to_le64(0xbfcd1747253af5a3ULL), + cpu_to_le64(0x359e35d7800fffbdULL), + cpu_to_le64(0x7f151c1f1686104aULL), + cpu_to_le64(0x9a3f410c6ca92363ULL), + cpu_to_le64(0x4bea6bacad474799ULL) + } }, + { { + cpu_to_le64(0xfa68407a46647d6eULL), + cpu_to_le64(0xbf71c57236904f35ULL), + cpu_to_le64(0x0af21f66c2bec6b6ULL), + cpu_to_le64(0xcffaa6b71c9ab7b4ULL), + cpu_to_le64(0x187f9ab49af08ec6ULL), + cpu_to_le64(0x2d66c4f95142a46cULL), + cpu_to_le64(0x6fa4c33b7a3039c0ULL), + cpu_to_le64(0xae4faeae1d3ad3d9ULL) + } }, + { { + cpu_to_le64(0x8886564d3a14d493ULL), + cpu_to_le64(0x3517454ca23c4af3ULL), + cpu_to_le64(0x06476983284a0504ULL), + cpu_to_le64(0x0992abc52d822c37ULL), + cpu_to_le64(0xd3473e33197a93c9ULL), + cpu_to_le64(0x399ec6c7e6bf87c9ULL), + cpu_to_le64(0x51ac86febf240954ULL), + cpu_to_le64(0xf4c70e16eeaac5ecULL) + } }, + { { + cpu_to_le64(0xa47f0dd4bf02e71eULL), + cpu_to_le64(0x36acc2355951a8d9ULL), + cpu_to_le64(0x69d18d2bd1a5c42fULL), + cpu_to_le64(0xf4892bcb929b0690ULL), + cpu_to_le64(0x89b4443b4ddbc49aULL), + cpu_to_le64(0x4eb7f8719c36de1eULL), + cpu_to_le64(0x03e7aa020c6e4141ULL), + cpu_to_le64(0x9b1f5b424d93c9a7ULL) + } }, + { { + cpu_to_le64(0x7261445183235adbULL), + cpu_to_le64(0x0e38dc92cb1f2a60ULL), + cpu_to_le64(0x7b2b8a9aa6079c54ULL), + cpu_to_le64(0x800a440bdbb2ceb1ULL), + cpu_to_le64(0x3cd955b7e00d0984ULL), + cpu_to_le64(0x3a7d3a1b25894224ULL), + cpu_to_le64(0x944c9ad8ec165fdeULL), + cpu_to_le64(0x378f5a541631229bULL) + } }, + { { + cpu_to_le64(0x74b4c7fb98459cedULL), + cpu_to_le64(0x3698fad1153bb6c3ULL), + cpu_to_le64(0x7a1e6c303b7652f4ULL), + cpu_to_le64(0x9fe76702af69334bULL), + cpu_to_le64(0x1fffe18a1b336103ULL), + cpu_to_le64(0x8941e71cff8a78dbULL), + cpu_to_le64(0x382ae548b2e4f3f3ULL), + cpu_to_le64(0xabbedea680056f52ULL) + } }, + { { + cpu_to_le64(0x6bcaa4cd81f32d1bULL), + cpu_to_le64(0xdea2594ac06fd85dULL), + cpu_to_le64(0xefbacd1d7d476e98ULL), + cpu_to_le64(0x8a1d71efea48b9caULL), + cpu_to_le64(0x2001802114846679ULL), + cpu_to_le64(0xd8fa6bbbebab0761ULL), + cpu_to_le64(0x3002c6cd635afe94ULL), + cpu_to_le64(0x7bcd9ed0efc889fbULL) + } }, + { { + cpu_to_le64(0x48bc924af11bd720ULL), + cpu_to_le64(0xfaf417d5d9b21b99ULL), + cpu_to_le64(0xe71da4aa88e12852ULL), + cpu_to_le64(0x5d80ef9d1891cc86ULL), + cpu_to_le64(0xf82012d430219f9bULL), + cpu_to_le64(0xcda43c32bcdf1d77ULL), + cpu_to_le64(0xd21380b00449b17aULL), + cpu_to_le64(0x378ee767f11631baULL) + } } +}; + +static const u8 Tau[64] = { + 0, 8, 16, 24, 32, 40, 48, 56, + 1, 9, 17, 25, 33, 41, 49, 57, + 2, 10, 18, 26, 34, 42, 50, 58, + 3, 11, 19, 27, 35, 43, 51, 59, + 4, 12, 20, 28, 36, 44, 52, 60, + 5, 13, 21, 29, 37, 45, 53, 61, + 6, 14, 22, 30, 38, 46, 54, 62, + 7, 15, 23, 31, 39, 47, 55, 63 +}; + +static const u8 Pi[256] = { + 252, 238, 221, 17, 207, 110, 49, 22, + 251, 196, 250, 218, 35, 197, 4, 77, + 233, 119, 240, 219, 147, 46, 153, 186, + 23, 54, 241, 187, 20, 205, 95, 193, + 249, 24, 101, 90, 226, 92, 239, 33, + 129, 28, 60, 66, 139, 1, 142, 79, + 5, 132, 2, 174, 227, 106, 143, 160, + 6, 11, 237, 152, 127, 212, 211, 31, + 235, 52, 44, 81, 234, 200, 72, 171, + 242, 42, 104, 162, 253, 58, 206, 204, + 181, 112, 14, 86, 8, 12, 118, 18, + 191, 114, 19, 71, 156, 183, 93, 135, + 21, 161, 150, 41, 16, 123, 154, 199, + 243, 145, 120, 111, 157, 158, 178, 177, + 50, 117, 25, 61, 255, 53, 138, 126, + 109, 84, 198, 128, 195, 189, 13, 87, + 223, 245, 36, 169, 62, 168, 67, 201, + 215, 121, 214, 246, 124, 34, 185, 3, + 224, 15, 236, 222, 122, 148, 176, 188, + 220, 232, 40, 80, 78, 51, 10, 74, + 167, 151, 96, 115, 30, 0, 98, 68, + 26, 184, 56, 130, 100, 159, 38, 65, + 173, 69, 70, 146, 39, 94, 85, 47, + 140, 163, 165, 125, 105, 213, 149, 59, + 7, 88, 179, 64, 134, 172, 29, 247, + 48, 55, 107, 228, 136, 217, 231, 137, + 225, 27, 131, 73, 76, 63, 248, 254, + 141, 83, 170, 144, 202, 216, 133, 97, + 32, 113, 103, 164, 45, 43, 9, 91, + 203, 155, 37, 208, 190, 229, 108, 82, + 89, 166, 116, 210, 230, 244, 180, 192, + 209, 102, 175, 194, 57, 75, 99, 182 +}; + +static const unsigned long long Ax[8][256] = { + { + 0xd01f715b5c7ef8e6ULL, 0x16fa240980778325ULL, 0xa8a42e857ee049c8ULL, + 0x6ac1068fa186465bULL, 0x6e417bd7a2e9320bULL, 0x665c8167a437daabULL, + 0x7666681aa89617f6ULL, 0x4b959163700bdcf5ULL, 0xf14be6b78df36248ULL, + 0xc585bd689a625cffULL, 0x9557d7fca67d82cbULL, 0x89f0b969af6dd366ULL, + 0xb0833d48749f6c35ULL, 0xa1998c23b1ecbc7cULL, 0x8d70c431ac02a736ULL, + 0xd6dfbc2fd0a8b69eULL, 0x37aeb3e551fa198bULL, 0x0b7d128a40b5cf9cULL, + 0x5a8f2008b5780cbcULL, 0xedec882284e333e5ULL, 0xd25fc177d3c7c2ceULL, + 0x5e0f5d50b61778ecULL, 0x1d873683c0c24cb9ULL, 0xad040bcbb45d208cULL, + 0x2f89a0285b853c76ULL, 0x5732fff6791b8d58ULL, 0x3e9311439ef6ec3fULL, + 0xc9183a809fd3c00fULL, 0x83adf3f5260a01eeULL, 0xa6791941f4e8ef10ULL, + 0x103ae97d0ca1cd5dULL, 0x2ce948121dee1b4aULL, 0x39738421dbf2bf53ULL, + 0x093da2a6cf0cf5b4ULL, 0xcd9847d89cbcb45fULL, 0xf9561c078b2d8ae8ULL, + 0x9c6a755a6971777fULL, 0xbc1ebaa0712ef0c5ULL, 0x72e61542abf963a6ULL, + 0x78bb5fde229eb12eULL, 0x14ba94250fceb90dULL, 0x844d6697630e5282ULL, + 0x98ea08026a1e032fULL, 0xf06bbea144217f5cULL, 0xdb6263d11ccb377aULL, + 0x641c314b2b8ee083ULL, 0x320e96ab9b4770cfULL, 0x1ee7deb986a96b85ULL, + 0xe96cf57a878c47b5ULL, 0xfdd6615f8842feb8ULL, 0xc83862965601dd1bULL, + 0x2ea9f83e92572162ULL, 0xf876441142ff97fcULL, 0xeb2c455608357d9dULL, + 0x5612a7e0b0c9904cULL, 0x6c01cbfb2d500823ULL, 0x4548a6a7fa037a2dULL, + 0xabc4c6bf388b6ef4ULL, 0xbade77d4fdf8bebdULL, 0x799b07c8eb4cac3aULL, + 0x0c9d87e805b19cf0ULL, 0xcb588aac106afa27ULL, 0xea0c1d40c1e76089ULL, + 0x2869354a1e816f1aULL, 0xff96d17307fbc490ULL, 0x9f0a9d602f1a5043ULL, + 0x96373fc6e016a5f7ULL, 0x5292dab8b3a6e41cULL, 0x9b8ae0382c752413ULL, + 0x4f15ec3b7364a8a5ULL, 0x3fb349555724f12bULL, 0xc7c50d4415db66d7ULL, + 0x92b7429ee379d1a7ULL, 0xd37f99611a15dfdaULL, 0x231427c05e34a086ULL, + 0xa439a96d7b51d538ULL, 0xb403401077f01865ULL, 0xdda2aea5901d7902ULL, + 0x0a5d4a9c8967d288ULL, 0xc265280adf660f93ULL, 0x8bb0094520d4e94eULL, + 0x2a29856691385532ULL, 0x42a833c5bf072941ULL, 0x73c64d54622b7eb2ULL, + 0x07e095624504536cULL, 0x8a905153e906f45aULL, 0x6f6123c16b3b2f1fULL, + 0xc6e55552dc097bc3ULL, 0x4468feb133d16739ULL, 0xe211e7f0c7398829ULL, + 0xa2f96419f7879b40ULL, 0x19074bdbc3ad38e9ULL, 0xf4ebc3f9474e0b0cULL, + 0x43886bd376d53455ULL, 0xd8028beb5aa01046ULL, 0x51f23282f5cdc320ULL, + 0xe7b1c2be0d84e16dULL, 0x081dfab006dee8a0ULL, 0x3b33340d544b857bULL, + 0x7f5bcabc679ae242ULL, 0x0edd37c48a08a6d8ULL, 0x81ed43d9a9b33bc6ULL, + 0xb1a3655ebd4d7121ULL, 0x69a1eeb5e7ed6167ULL, 0xf6ab73d5c8f73124ULL, + 0x1a67a3e185c61fd5ULL, 0x2dc91004d43c065eULL, 0x0240b02c8fb93a28ULL, + 0x90f7f2b26cc0eb8fULL, 0x3cd3a16f114fd617ULL, 0xaae49ea9f15973e0ULL, + 0x06c0cd748cd64e78ULL, 0xda423bc7d5192a6eULL, 0xc345701c16b41287ULL, + 0x6d2193ede4821537ULL, 0xfcf639494190e3acULL, 0x7c3b228621f1c57eULL, + 0xfb16ac2b0494b0c0ULL, 0xbf7e529a3745d7f9ULL, 0x6881b6a32e3f7c73ULL, + 0xca78d2bad9b8e733ULL, 0xbbfe2fc2342aa3a9ULL, 0x0dbddffecc6381e4ULL, + 0x70a6a56e2440598eULL, 0xe4d12a844befc651ULL, 0x8c509c2765d0ba22ULL, + 0xee8c6018c28814d9ULL, 0x17da7c1f49a59e31ULL, 0x609c4c1328e194d3ULL, + 0xb3e3d57232f44b09ULL, 0x91d7aaa4a512f69bULL, 0x0ffd6fd243dabbccULL, + 0x50d26a943c1fde34ULL, 0x6be15e9968545b4fULL, 0x94778fea6faf9fdfULL, + 0x2b09dd7058ea4826ULL, 0x677cd9716de5c7bfULL, 0x49d5214fffb2e6ddULL, + 0x0360e83a466b273cULL, 0x1fc786af4f7b7691ULL, 0xa0b9d435783ea168ULL, + 0xd49f0c035f118cb6ULL, 0x01205816c9d21d14ULL, 0xac2453dd7d8f3d98ULL, + 0x545217cc3f70aa64ULL, 0x26b4028e9489c9c2ULL, 0xdec2469fd6765e3eULL, + 0x04807d58036f7450ULL, 0xe5f17292823ddb45ULL, 0xf30b569b024a5860ULL, + 0x62dcfc3fa758aefbULL, 0xe84cad6c4e5e5aa1ULL, 0xccb81fce556ea94bULL, + 0x53b282ae7a74f908ULL, 0x1b47fbf74c1402c1ULL, 0x368eebf39828049fULL, + 0x7afbeff2ad278b06ULL, 0xbe5e0a8cfe97caedULL, 0xcfd8f7f413058e77ULL, + 0xf78b2bc301252c30ULL, 0x4d555c17fcdd928dULL, 0x5f2f05467fc565f8ULL, + 0x24f4b2a21b30f3eaULL, 0x860dd6bbecb768aaULL, 0x4c750401350f8f99ULL, + 0x0000000000000000ULL, 0xecccd0344d312ef1ULL, 0xb5231806be220571ULL, + 0xc105c030990d28afULL, 0x653c695de25cfd97ULL, 0x159acc33c61ca419ULL, + 0xb89ec7f872418495ULL, 0xa9847693b73254dcULL, 0x58cf90243ac13694ULL, + 0x59efc832f3132b80ULL, 0x5c4fed7c39ae42c4ULL, 0x828dabe3efd81cfaULL, + 0xd13f294d95ace5f2ULL, 0x7d1b7a90e823d86aULL, 0xb643f03cf849224dULL, + 0x3df3f979d89dcb03ULL, 0x7426d836272f2ddeULL, 0xdfe21e891fa4432aULL, + 0x3a136c1b9d99986fULL, 0xfa36f43dcd46add4ULL, 0xc025982650df35bbULL, + 0x856d3e81aadc4f96ULL, 0xc4a5e57e53b041ebULL, 0x4708168b75ba4005ULL, + 0xaf44bbe73be41aa4ULL, 0x971767d029c4b8e3ULL, 0xb9be9feebb939981ULL, + 0x215497ecd18d9aaeULL, 0x316e7e91dd2c57f3ULL, 0xcef8afe2dad79363ULL, + 0x3853dc371220a247ULL, 0x35ee03c9de4323a3ULL, 0xe6919aa8c456fc79ULL, + 0xe05157dc4880b201ULL, 0x7bdbb7e464f59612ULL, 0x127a59518318f775ULL, + 0x332ecebd52956ddbULL, 0x8f30741d23bb9d1eULL, 0xd922d3fd93720d52ULL, + 0x7746300c61440ae2ULL, 0x25d4eab4d2e2eefeULL, 0x75068020eefd30caULL, + 0x135a01474acaea61ULL, 0x304e268714fe4ae7ULL, 0xa519f17bb283c82cULL, + 0xdc82f6b359cf6416ULL, 0x5baf781e7caa11a8ULL, 0xb2c38d64fb26561dULL, + 0x34ce5bdf17913eb7ULL, 0x5d6fb56af07c5fd0ULL, 0x182713cd0a7f25fdULL, + 0x9e2ac576e6c84d57ULL, 0x9aaab82ee5a73907ULL, 0xa3d93c0f3e558654ULL, + 0x7e7b92aaae48ff56ULL, 0x872d8ead256575beULL, 0x41c8dbfff96c0e7dULL, + 0x99ca5014a3cc1e3bULL, 0x40e883e930be1369ULL, 0x1ca76e95091051adULL, + 0x4e35b42dbab6b5b1ULL, 0x05a0254ecabd6944ULL, 0xe1710fca8152af15ULL, + 0xf22b0e8dcb984574ULL, 0xb763a82a319b3f59ULL, 0x63fca4296e8ab3efULL, + 0x9d4a2d4ca0a36a6bULL, 0xe331bfe60eeb953dULL, 0xd5bf541596c391a2ULL, + 0xf5cb9bef8e9c1618ULL, 0x46284e9dbc685d11ULL, 0x2074cffa185f87baULL, + 0xbd3ee2b6b8fcedd1ULL, 0xae64e3f1f23607b0ULL, 0xfeb68965ce29d984ULL, + 0x55724fdaf6a2b770ULL, 0x29496d5cd753720eULL, 0xa75941573d3af204ULL, + 0x8e102c0bea69800aULL, 0x111ab16bc573d049ULL, 0xd7ffe439197aab8aULL, + 0xefac380e0b5a09cdULL, 0x48f579593660fbc9ULL, 0x22347fd697e6bd92ULL, + 0x61bc1405e13389c7ULL, 0x4ab5c975b9d9c1e1ULL, 0x80cd1bcf606126d2ULL, + 0x7186fd78ed92449aULL, 0x93971a882aabccb3ULL, 0x88d0e17f66bfce72ULL, + 0x27945a985d5bd4d6ULL + }, { + 0xde553f8c05a811c8ULL, 0x1906b59631b4f565ULL, 0x436e70d6b1964ff7ULL, + 0x36d343cb8b1e9d85ULL, 0x843dfacc858aab5aULL, 0xfdfc95c299bfc7f9ULL, + 0x0f634bdea1d51fa2ULL, 0x6d458b3b76efb3cdULL, 0x85c3f77cf8593f80ULL, + 0x3c91315fbe737cb2ULL, 0x2148b03366ace398ULL, 0x18f8b8264c6761bfULL, + 0xc830c1c495c9fb0fULL, 0x981a76102086a0aaULL, 0xaa16012142f35760ULL, + 0x35cc54060c763cf6ULL, 0x42907d66cc45db2dULL, 0x8203d44b965af4bcULL, + 0x3d6f3cefc3a0e868ULL, 0xbc73ff69d292bda7ULL, 0x8722ed0102e20a29ULL, + 0x8f8185e8cd34deb7ULL, 0x9b0561dda7ee01d9ULL, 0x5335a0193227fad6ULL, + 0xc9cecc74e81a6fd5ULL, 0x54f5832e5c2431eaULL, 0x99e47ba05d553470ULL, + 0xf7bee756acd226ceULL, 0x384e05a5571816fdULL, 0xd1367452a47d0e6aULL, + 0xf29fde1c386ad85bULL, 0x320c77316275f7caULL, 0xd0c879e2d9ae9ab0ULL, + 0xdb7406c69110ef5dULL, 0x45505e51a2461011ULL, 0xfc029872e46c5323ULL, + 0xfa3cb6f5f7bc0cc5ULL, 0x031f17cd8768a173ULL, 0xbd8df2d9af41297dULL, + 0x9d3b4f5ab43e5e3fULL, 0x4071671b36feee84ULL, 0x716207e7d3e3b83dULL, + 0x48d20ff2f9283a1aULL, 0x27769eb4757cbc7eULL, 0x5c56ebc793f2e574ULL, + 0xa48b474f9ef5dc18ULL, 0x52cbada94ff46e0cULL, 0x60c7da982d8199c6ULL, + 0x0e9d466edc068b78ULL, 0x4eec2175eaf865fcULL, 0x550b8e9e21f7a530ULL, + 0x6b7ba5bc653fec2bULL, 0x5eb7f1ba6949d0ddULL, 0x57ea94e3db4c9099ULL, + 0xf640eae6d101b214ULL, 0xdd4a284182c0b0bbULL, 0xff1d8fbf6304f250ULL, + 0xb8accb933bf9d7e8ULL, 0xe8867c478eb68c4dULL, 0x3f8e2692391bddc1ULL, + 0xcb2fd60912a15a7cULL, 0xaec935dbab983d2fULL, 0xf55ffd2b56691367ULL, + 0x80e2ce366ce1c115ULL, 0x179bf3f8edb27e1dULL, 0x01fe0db07dd394daULL, + 0xda8a0b76ecc37b87ULL, 0x44ae53e1df9584cbULL, 0xb310b4b77347a205ULL, + 0xdfab323c787b8512ULL, 0x3b511268d070b78eULL, 0x65e6e3d2b9396753ULL, + 0x6864b271e2574d58ULL, 0x259784c98fc789d7ULL, 0x02e11a7dfabb35a9ULL, + 0x8841a6dfa337158bULL, 0x7ade78c39b5dcdd0ULL, 0xb7cf804d9a2cc84aULL, + 0x20b6bd831b7f7742ULL, 0x75bd331d3a88d272ULL, 0x418f6aab4b2d7a5eULL, + 0xd9951cbb6babdaf4ULL, 0xb6318dfde7ff5c90ULL, 0x1f389b112264aa83ULL, + 0x492c024284fbaec0ULL, 0xe33a0363c608f9a0ULL, 0x2688930408af28a4ULL, + 0xc7538a1a341ce4adULL, 0x5da8e677ee2171aeULL, 0x8c9e92254a5c7fc4ULL, + 0x63d8cd55aae938b5ULL, 0x29ebd8daa97a3706ULL, 0x959827b37be88aa1ULL, + 0x1484e4356adadf6eULL, 0xa7945082199d7d6bULL, 0xbf6ce8a455fa1cd4ULL, + 0x9cc542eac9edcae5ULL, 0x79c16f0e1c356ca3ULL, 0x89bfab6fdee48151ULL, + 0xd4174d1830c5f0ffULL, 0x9258048415eb419dULL, 0x6139d72850520d1cULL, + 0x6a85a80c18ec78f1ULL, 0xcd11f88e0171059aULL, 0xcceff53e7ca29140ULL, + 0xd229639f2315af19ULL, 0x90b91ef9ef507434ULL, 0x5977d28d074a1be1ULL, + 0x311360fce51d56b9ULL, 0xc093a92d5a1f2f91ULL, 0x1a19a25bb6dc5416ULL, + 0xeb996b8a09de2d3eULL, 0xfee3820f1ed7668aULL, 0xd7085ad5b7ad518cULL, + 0x7fff41890fe53345ULL, 0xec5948bd67dde602ULL, 0x2fd5f65dbaaa68e0ULL, + 0xa5754affe32648c2ULL, 0xf8ddac880d07396cULL, 0x6fa491468c548664ULL, + 0x0c7c5c1326bdbed1ULL, 0x4a33158f03930fb3ULL, 0x699abfc19f84d982ULL, + 0xe4fa2054a80b329cULL, 0x6707f9af438252faULL, 0x08a368e9cfd6d49eULL, + 0x47b1442c58fd25b8ULL, 0xbbb3dc5ebc91769bULL, 0x1665fe489061eac7ULL, + 0x33f27a811fa66310ULL, 0x93a609346838d547ULL, 0x30ed6d4c98cec263ULL, + 0x1dd9816cd8df9f2aULL, 0x94662a03063b1e7bULL, 0x83fdd9fbeb896066ULL, + 0x7b207573e68e590aULL, 0x5f49fc0a149a4407ULL, 0x343259b671a5a82cULL, + 0xfbc2bb458a6f981fULL, 0xc272b350a0a41a38ULL, 0x3aaf1fd8ada32354ULL, + 0x6cbb868b0b3c2717ULL, 0xa2b569c88d2583feULL, 0xf180c9d1bf027928ULL, + 0xaf37386bd64ba9f5ULL, 0x12bacab2790a8088ULL, 0x4c0d3b0810435055ULL, + 0xb2eeb9070e9436dfULL, 0xc5b29067cea7d104ULL, 0xdcb425f1ff132461ULL, + 0x4f122cc5972bf126ULL, 0xac282fa651230886ULL, 0xe7e537992f6393efULL, + 0xe61b3a2952b00735ULL, 0x709c0a57ae302ce7ULL, 0xe02514ae416058d3ULL, + 0xc44c9dd7b37445deULL, 0x5a68c5408022ba92ULL, 0x1c278cdca50c0bf0ULL, + 0x6e5a9cf6f18712beULL, 0x86dce0b17f319ef3ULL, 0x2d34ec2040115d49ULL, + 0x4bcd183f7e409b69ULL, 0x2815d56ad4a9a3dcULL, 0x24698979f2141d0dULL, + 0x0000000000000000ULL, 0x1ec696a15fb73e59ULL, 0xd86b110b16784e2eULL, + 0x8e7f8858b0e74a6dULL, 0x063e2e8713d05fe6ULL, 0xe2c40ed3bbdb6d7aULL, + 0xb1f1aeca89fc97acULL, 0xe1db191e3cb3cc09ULL, 0x6418ee62c4eaf389ULL, + 0xc6ad87aa49cf7077ULL, 0xd6f65765ca7ec556ULL, 0x9afb6c6dda3d9503ULL, + 0x7ce05644888d9236ULL, 0x8d609f95378feb1eULL, 0x23a9aa4e9c17d631ULL, + 0x6226c0e5d73aac6fULL, 0x56149953a69f0443ULL, 0xeeb852c09d66d3abULL, + 0x2b0ac2a753c102afULL, 0x07c023376e03cb3cULL, 0x2ccae1903dc2c993ULL, + 0xd3d76e2f5ec63bc3ULL, 0x9e2458973356ff4cULL, 0xa66a5d32644ee9b1ULL, + 0x0a427294356de137ULL, 0x783f62be61e6f879ULL, 0x1344c70204d91452ULL, + 0x5b96c8f0fdf12e48ULL, 0xa90916ecc59bf613ULL, 0xbe92e5142829880eULL, + 0x727d102a548b194eULL, 0x1be7afebcb0fc0ccULL, 0x3e702b2244c8491bULL, + 0xd5e940a84d166425ULL, 0x66f9f41f3e51c620ULL, 0xabe80c913f20c3baULL, + 0xf07ec461c2d1edf2ULL, 0xf361d3ac45b94c81ULL, 0x0521394a94b8fe95ULL, + 0xadd622162cf09c5cULL, 0xe97871f7f3651897ULL, 0xf4a1f09b2bba87bdULL, + 0x095d6559b2054044ULL, 0x0bbc7f2448be75edULL, 0x2af4cf172e129675ULL, + 0x157ae98517094bb4ULL, 0x9fda55274e856b96ULL, 0x914713499283e0eeULL, + 0xb952c623462a4332ULL, 0x74433ead475b46a8ULL, 0x8b5eb112245fb4f8ULL, + 0xa34b6478f0f61724ULL, 0x11a5dd7ffe6221fbULL, 0xc16da49d27ccbb4bULL, + 0x76a224d0bde07301ULL, 0x8aa0bca2598c2022ULL, 0x4df336b86d90c48fULL, + 0xea67663a740db9e4ULL, 0xef465f70e0b54771ULL, 0x39b008152acb8227ULL, + 0x7d1e5bf4f55e06ecULL, 0x105bd0cf83b1b521ULL, 0x775c2960c033e7dbULL, + 0x7e014c397236a79fULL, 0x811cc386113255cfULL, 0xeda7450d1a0e72d8ULL, + 0x5889df3d7a998f3bULL, 0x2e2bfbedc779fc3aULL, 0xce0eef438619a4e9ULL, + 0x372d4e7bf6cd095fULL, 0x04df34fae96b6a4fULL, 0xf923a13870d4adb6ULL, + 0xa1aa7e050a4d228dULL, 0xa8f71b5cb84862c9ULL, 0xb52e9a306097fde3ULL, + 0x0d8251a35b6e2a0bULL, 0x2257a7fee1c442ebULL, 0x73831d9a29588d94ULL, + 0x51d4ba64c89ccf7fULL, 0x502ab7d4b54f5ba5ULL, 0x97793dce8153bf08ULL, + 0xe5042de4d5d8a646ULL, 0x9687307efc802bd2ULL, 0xa05473b5779eb657ULL, + 0xb4d097801d446939ULL, 0xcff0e2f3fbca3033ULL, 0xc38cbee0dd778ee2ULL, + 0x464f499c252eb162ULL, 0xcad1dbb96f72cea6ULL, 0xba4dd1eec142e241ULL, + 0xb00fa37af42f0376ULL + }, { + 0xcce4cd3aa968b245ULL, 0x089d5484e80b7fafULL, 0x638246c1b3548304ULL, + 0xd2fe0ec8c2355492ULL, 0xa7fbdf7ff2374eeeULL, 0x4df1600c92337a16ULL, + 0x84e503ea523b12fbULL, 0x0790bbfd53ab0c4aULL, 0x198a780f38f6ea9dULL, + 0x2ab30c8f55ec48cbULL, 0xe0f7fed6b2c49db5ULL, 0xb6ecf3f422cadbdcULL, + 0x409c9a541358df11ULL, 0xd3ce8a56dfde3fe3ULL, 0xc3e9224312c8c1a0ULL, + 0x0d6dfa58816ba507ULL, 0xddf3e1b179952777ULL, 0x04c02a42748bb1d9ULL, + 0x94c2abff9f2decb8ULL, 0x4f91752da8f8acf4ULL, 0x78682befb169bf7bULL, + 0xe1c77a48af2ff6c4ULL, 0x0c5d7ec69c80ce76ULL, 0x4cc1e4928fd81167ULL, + 0xfeed3d24d9997b62ULL, 0x518bb6dfc3a54a23ULL, 0x6dbf2d26151f9b90ULL, + 0xb5bc624b05ea664fULL, 0xe86aaa525acfe21aULL, 0x4801ced0fb53a0beULL, + 0xc91463e6c00868edULL, 0x1027a815cd16fe43ULL, 0xf67069a0319204cdULL, + 0xb04ccc976c8abce7ULL, 0xc0b9b3fc35e87c33ULL, 0xf380c77c58f2de65ULL, + 0x50bb3241de4e2152ULL, 0xdf93f490435ef195ULL, 0xf1e0d25d62390887ULL, + 0xaf668bfb1a3c3141ULL, 0xbc11b251f00a7291ULL, 0x73a5eed47e427d47ULL, + 0x25bee3f6ee4c3b2eULL, 0x43cc0beb34786282ULL, 0xc824e778dde3039cULL, + 0xf97d86d98a327728ULL, 0xf2b043e24519b514ULL, 0xe297ebf7880f4b57ULL, + 0x3a94a49a98fab688ULL, 0x868516cb68f0c419ULL, 0xeffa11af0964ee50ULL, + 0xa4ab4ec0d517f37dULL, 0xa9c6b498547c567aULL, 0x8e18424f80fbbbb6ULL, + 0x0bcdc53bcf2bc23cULL, 0x137739aaea3643d0ULL, 0x2c1333ec1bac2ff0ULL, + 0x8d48d3f0a7db0625ULL, 0x1e1ac3f26b5de6d7ULL, 0xf520f81f16b2b95eULL, + 0x9f0f6ec450062e84ULL, 0x0130849e1deb6b71ULL, 0xd45e31ab8c7533a9ULL, + 0x652279a2fd14e43fULL, 0x3209f01e70f1c927ULL, 0xbe71a770cac1a473ULL, + 0x0e3d6be7a64b1894ULL, 0x7ec8148cff29d840ULL, 0xcb7476c7fac3be0fULL, + 0x72956a4a63a91636ULL, 0x37f95ec21991138fULL, 0x9e3fea5a4ded45f5ULL, + 0x7b38ba50964902e8ULL, 0x222e580bbde73764ULL, 0x61e253e0899f55e6ULL, + 0xfc8d2805e352ad80ULL, 0x35994be3235ac56dULL, 0x09add01af5e014deULL, + 0x5e8659a6780539c6ULL, 0xb17c48097161d796ULL, 0x026015213acbd6e2ULL, + 0xd1ae9f77e515e901ULL, 0xb7dc776a3f21b0adULL, 0xaba6a1b96eb78098ULL, + 0x9bcf4486248d9f5dULL, 0x582666c536455efdULL, 0xfdbdac9bfeb9c6f1ULL, + 0xc47999be4163cdeaULL, 0x765540081722a7efULL, 0x3e548ed8ec710751ULL, + 0x3d041f67cb51bac2ULL, 0x7958af71ac82d40aULL, 0x36c9da5c047a78feULL, + 0xed9a048e33af38b2ULL, 0x26ee7249c96c86bdULL, 0x900281bdeba65d61ULL, + 0x11172c8bd0fd9532ULL, 0xea0abf73600434f8ULL, 0x42fc8f75299309f3ULL, + 0x34a9cf7d3eb1ae1cULL, 0x2b838811480723baULL, 0x5ce64c8742ceef24ULL, + 0x1adae9b01fd6570eULL, 0x3c349bf9d6bad1b3ULL, 0x82453c891c7b75c0ULL, + 0x97923a40b80d512bULL, 0x4a61dbf1c198765cULL, 0xb48ce6d518010d3eULL, + 0xcfb45c858e480fd6ULL, 0xd933cbf30d1e96aeULL, 0xd70ea014ab558e3aULL, + 0xc189376228031742ULL, 0x9262949cd16d8b83ULL, 0xeb3a3bed7def5f89ULL, + 0x49314a4ee6b8cbcfULL, 0xdcc3652f647e4c06ULL, 0xda635a4c2a3e2b3dULL, + 0x470c21a940f3d35bULL, 0x315961a157d174b4ULL, 0x6672e81dda3459acULL, + 0x5b76f77a1165e36eULL, 0x445cb01667d36ec8ULL, 0xc5491d205c88a69bULL, + 0x456c34887a3805b9ULL, 0xffddb9bac4721013ULL, 0x99af51a71e4649bfULL, + 0xa15be01cbc7729d5ULL, 0x52db2760e485f7b0ULL, 0x8c78576eba306d54ULL, + 0xae560f6507d75a30ULL, 0x95f22f6182c687c9ULL, 0x71c5fbf54489aba5ULL, + 0xca44f259e728d57eULL, 0x88b87d2ccebbdc8dULL, 0xbab18d32be4a15aaULL, + 0x8be8ec93e99b611eULL, 0x17b713e89ebdf209ULL, 0xb31c5d284baa0174ULL, + 0xeeca9531148f8521ULL, 0xb8d198138481c348ULL, 0x8988f9b2d350b7fcULL, + 0xb9e11c8d996aa839ULL, 0x5a4673e40c8e881fULL, 0x1687977683569978ULL, + 0xbf4123eed72acf02ULL, 0x4ea1f1b3b513c785ULL, 0xe767452be16f91ffULL, + 0x7505d1b730021a7cULL, 0xa59bca5ec8fc980cULL, 0xad069eda20f7e7a3ULL, + 0x38f4b1bba231606aULL, 0x60d2d77e94743e97ULL, 0x9affc0183966f42cULL, + 0x248e6768f3a7505fULL, 0xcdd449a4b483d934ULL, 0x87b59255751baf68ULL, + 0x1bea6d2e023d3c7fULL, 0x6b1f12455b5ffcabULL, 0x743555292de9710dULL, + 0xd8034f6d10f5fddfULL, 0xc6198c9f7ba81b08ULL, 0xbb8109aca3a17edbULL, + 0xfa2d1766ad12cabbULL, 0xc729080166437079ULL, 0x9c5fff7b77269317ULL, + 0x0000000000000000ULL, 0x15d706c9a47624ebULL, 0x6fdf38072fd44d72ULL, + 0x5fb6dd3865ee52b7ULL, 0xa33bf53d86bcff37ULL, 0xe657c1b5fc84fa8eULL, + 0xaa962527735cebe9ULL, 0x39c43525bfda0b1bULL, 0x204e4d2a872ce186ULL, + 0x7a083ece8ba26999ULL, 0x554b9c9db72efbfaULL, 0xb22cd9b656416a05ULL, + 0x96a2bedea5e63a5aULL, 0x802529a826b0a322ULL, 0x8115ad363b5bc853ULL, + 0x8375b81701901eb1ULL, 0x3069e53f4a3a1fc5ULL, 0xbd2136cfede119e0ULL, + 0x18bafc91251d81ecULL, 0x1d4a524d4c7d5b44ULL, 0x05f0aedc6960daa8ULL, + 0x29e39d3072ccf558ULL, 0x70f57f6b5962c0d4ULL, 0x989fd53903ad22ceULL, + 0xf84d024797d91c59ULL, 0x547b1803aac5908bULL, 0xf0d056c37fd263f6ULL, + 0xd56eb535919e58d8ULL, 0x1c7ad6d351963035ULL, 0x2e7326cd2167f912ULL, + 0xac361a443d1c8cd2ULL, 0x697f076461942a49ULL, 0x4b515f6fdc731d2dULL, + 0x8ad8680df4700a6fULL, 0x41ac1eca0eb3b460ULL, 0x7d988533d80965d3ULL, + 0xa8f6300649973d0bULL, 0x7765c4960ac9cc9eULL, 0x7ca801adc5e20ea2ULL, + 0xdea3700e5eb59ae4ULL, 0xa06b6482a19c42a4ULL, 0x6a2f96db46b497daULL, + 0x27def6d7d487edccULL, 0x463ca5375d18b82aULL, 0xa6cb5be1efdc259fULL, + 0x53eba3fef96e9cc1ULL, 0xce84d81b93a364a7ULL, 0xf4107c810b59d22fULL, + 0x333974806d1aa256ULL, 0x0f0def79bba073e5ULL, 0x231edc95a00c5c15ULL, + 0xe437d494c64f2c6cULL, 0x91320523f64d3610ULL, 0x67426c83c7df32ddULL, + 0x6eefbc99323f2603ULL, 0x9d6f7be56acdf866ULL, 0x5916e25b2bae358cULL, + 0x7ff89012e2c2b331ULL, 0x035091bf2720bd93ULL, 0x561b0d22900e4669ULL, + 0x28d319ae6f279e29ULL, 0x2f43a2533c8c9263ULL, 0xd09e1be9f8fe8270ULL, + 0xf740ed3e2c796fbcULL, 0xdb53ded237d5404cULL, 0x62b2c25faebfe875ULL, + 0x0afd41a5d2c0a94dULL, 0x6412fd3ce0ff8f4eULL, 0xe3a76f6995e42026ULL, + 0x6c8fa9b808f4f0e1ULL, 0xc2d9a6dd0f23aad1ULL, 0x8f28c6d19d10d0c7ULL, + 0x85d587744fd0798aULL, 0xa20b71a39b579446ULL, 0x684f83fa7c7f4138ULL, + 0xe507500adba4471dULL, 0x3f640a46f19a6c20ULL, 0x1247bd34f7dd28a1ULL, + 0x2d23b77206474481ULL, 0x93521002cc86e0f2ULL, 0x572b89bc8de52d18ULL, + 0xfb1d93f8b0f9a1caULL, 0xe95a2ecc4724896bULL, 0x3ba420048511ddf9ULL, + 0xd63e248ab6bee54bULL, 0x5dd6c8195f258455ULL, 0x06a03f634e40673bULL, + 0x1f2a476c76b68da6ULL, 0x217ec9b49ac78af7ULL, 0xecaa80102e4453c3ULL, + 0x14e78257b99d4f9aULL + }, { + 0x20329b2cc87bba05ULL, 0x4f5eb6f86546a531ULL, 0xd4f44775f751b6b1ULL, + 0x8266a47b850dfa8bULL, 0xbb986aa15a6ca985ULL, 0xc979eb08f9ae0f99ULL, + 0x2da6f447a2375ea1ULL, 0x1e74275dcd7d8576ULL, 0xbc20180a800bc5f8ULL, + 0xb4a2f701b2dc65beULL, 0xe726946f981b6d66ULL, 0x48e6c453bf21c94cULL, + 0x42cad9930f0a4195ULL, 0xefa47b64aacccd20ULL, 0x71180a8960409a42ULL, + 0x8bb3329bf6a44e0cULL, 0xd34c35de2d36daccULL, 0xa92f5b7cbc23dc96ULL, + 0xb31a85aa68bb09c3ULL, 0x13e04836a73161d2ULL, 0xb24dfc4129c51d02ULL, + 0x8ae44b70b7da5acdULL, 0xe671ed84d96579a7ULL, 0xa4bb3417d66f3832ULL, + 0x4572ab38d56d2de8ULL, 0xb1b47761ea47215cULL, 0xe81c09cf70aba15dULL, + 0xffbdb872ce7f90acULL, 0xa8782297fd5dc857ULL, 0x0d946f6b6a4ce4a4ULL, + 0xe4df1f4f5b995138ULL, 0x9ebc71edca8c5762ULL, 0x0a2c1dc0b02b88d9ULL, + 0x3b503c115d9d7b91ULL, 0xc64376a8111ec3a2ULL, 0xcec199a323c963e4ULL, + 0xdc76a87ec58616f7ULL, 0x09d596e073a9b487ULL, 0x14583a9d7d560dafULL, + 0xf4c6dc593f2a0cb4ULL, 0xdd21d19584f80236ULL, 0x4a4836983ddde1d3ULL, + 0xe58866a41ae745f9ULL, 0xf591a5b27e541875ULL, 0x891dc05074586693ULL, + 0x5b068c651810a89eULL, 0xa30346bc0c08544fULL, 0x3dbf3751c684032dULL, + 0x2a1e86ec785032dcULL, 0xf73f5779fca830eaULL, 0xb60c05ca30204d21ULL, + 0x0cc316802b32f065ULL, 0x8770241bdd96be69ULL, 0xb861e18199ee95dbULL, + 0xf805cad91418fcd1ULL, 0x29e70dccbbd20e82ULL, 0xc7140f435060d763ULL, + 0x0f3a9da0e8b0cc3bULL, 0xa2543f574d76408eULL, 0xbd7761e1c175d139ULL, + 0x4b1f4f737ca3f512ULL, 0x6dc2df1f2fc137abULL, 0xf1d05c3967b14856ULL, + 0xa742bf3715ed046cULL, 0x654030141d1697edULL, 0x07b872abda676c7dULL, + 0x3ce84eba87fa17ecULL, 0xc1fb0403cb79afdfULL, 0x3e46bc7105063f73ULL, + 0x278ae987121cd678ULL, 0xa1adb4778ef47cd0ULL, 0x26dd906c5362c2b9ULL, + 0x05168060589b44e2ULL, 0xfbfc41f9d79ac08fULL, 0x0e6de44ba9ced8faULL, + 0x9feb08068bf243a3ULL, 0x7b341749d06b129bULL, 0x229c69e74a87929aULL, + 0xe09ee6c4427c011bULL, 0x5692e30e725c4c3aULL, 0xda99a33e5e9f6e4bULL, + 0x353dd85af453a36bULL, 0x25241b4c90e0fee7ULL, 0x5de987258309d022ULL, + 0xe230140fc0802984ULL, 0x93281e86a0c0b3c6ULL, 0xf229d719a4337408ULL, + 0x6f6c2dd4ad3d1f34ULL, 0x8ea5b2fbae3f0aeeULL, 0x8331dd90c473ee4aULL, + 0x346aa1b1b52db7aaULL, 0xdf8f235e06042aa9ULL, 0xcc6f6b68a1354b7bULL, + 0x6c95a6f46ebf236aULL, 0x52d31a856bb91c19ULL, 0x1a35ded6d498d555ULL, + 0xf37eaef2e54d60c9ULL, 0x72e181a9a3c2a61cULL, 0x98537aad51952fdeULL, + 0x16f6c856ffaa2530ULL, 0xd960281e9d1d5215ULL, 0x3a0745fa1ce36f50ULL, + 0x0b7b642bf1559c18ULL, 0x59a87eae9aec8001ULL, 0x5e100c05408bec7cULL, + 0x0441f98b19e55023ULL, 0xd70dcc5534d38aefULL, 0x927f676de1bea707ULL, + 0x9769e70db925e3e5ULL, 0x7a636ea29115065aULL, 0x468b201816ef11b6ULL, + 0xab81a9b73edff409ULL, 0xc0ac7de88a07bb1eULL, 0x1f235eb68c0391b7ULL, + 0x6056b074458dd30fULL, 0xbe8eeac102f7ed67ULL, 0xcd381283e04b5fbaULL, + 0x5cbefecec277c4e3ULL, 0xd21b4c356c48ce0dULL, 0x1019c31664b35d8cULL, + 0x247362a7d19eea26ULL, 0xebe582efb3299d03ULL, 0x02aef2cb82fc289fULL, + 0x86275df09ce8aaa8ULL, 0x28b07427faac1a43ULL, 0x38a9b7319e1f47cfULL, + 0xc82e92e3b8d01b58ULL, 0x06ef0b409b1978bcULL, 0x62f842bfc771fb90ULL, + 0x9904034610eb3b1fULL, 0xded85ab5477a3e68ULL, 0x90d195a663428f98ULL, + 0x5384636e2ac708d8ULL, 0xcbd719c37b522706ULL, 0xae9729d76644b0ebULL, + 0x7c8c65e20a0c7ee6ULL, 0x80c856b007f1d214ULL, 0x8c0b40302cc32271ULL, + 0xdbcedad51fe17a8aULL, 0x740e8ae938dbdea0ULL, 0xa615c6dc549310adULL, + 0x19cc55f6171ae90bULL, 0x49b1bdb8fe5fdd8dULL, 0xed0a89af2830e5bfULL, + 0x6a7aadb4f5a65bd6ULL, 0x7e22972988f05679ULL, 0xf952b3325566e810ULL, + 0x39fecedadf61530eULL, 0x6101c99f04f3c7ceULL, 0x2e5f7f6761b562ffULL, + 0xf08725d226cf5c97ULL, 0x63af3b54860fef51ULL, 0x8ff2cb10ef411e2fULL, + 0x884ab9bb35267252ULL, 0x4df04433e7ba8daeULL, 0x9afd8866d3690741ULL, + 0x66b9bb34de94abb3ULL, 0x9baaf18d92171380ULL, 0x543c11c5f0a064a5ULL, + 0x17a1b1bdbed431f1ULL, 0xb5f58eeaf3a2717fULL, 0xc355f6c849858740ULL, + 0xec5df044694ef17eULL, 0xd83751f5dc6346d4ULL, 0xfc4433520dfdacf2ULL, + 0x0000000000000000ULL, 0x5a51f58e596ebc5fULL, 0x3285aaf12e34cf16ULL, + 0x8d5c39db6dbd36b0ULL, 0x12b731dde64f7513ULL, 0x94906c2d7aa7dfbbULL, + 0x302b583aacc8e789ULL, 0x9d45facd090e6b3cULL, 0x2165e2c78905aec4ULL, + 0x68d45f7f775a7349ULL, 0x189b2c1d5664fdcaULL, 0xe1c99f2f030215daULL, + 0x6983269436246788ULL, 0x8489af3b1e148237ULL, 0xe94b702431d5b59cULL, + 0x33d2d31a6f4adbd7ULL, 0xbfd9932a4389f9a6ULL, 0xb0e30e8aab39359dULL, + 0xd1e2c715afcaf253ULL, 0x150f43763c28196eULL, 0xc4ed846393e2eb3dULL, + 0x03f98b20c3823c5eULL, 0xfd134ab94c83b833ULL, 0x556b682eb1de7064ULL, + 0x36c4537a37d19f35ULL, 0x7559f30279a5ca61ULL, 0x799ae58252973a04ULL, + 0x9c12832648707ffdULL, 0x78cd9c6913e92ec5ULL, 0x1d8dac7d0effb928ULL, + 0x439da0784e745554ULL, 0x413352b3cc887dcbULL, 0xbacf134a1b12bd44ULL, + 0x114ebafd25cd494dULL, 0x2f08068c20cb763eULL, 0x76a07822ba27f63fULL, + 0xeab2fb04f25789c2ULL, 0xe3676de481fe3d45ULL, 0x1b62a73d95e6c194ULL, + 0x641749ff5c68832cULL, 0xa5ec4dfc97112cf3ULL, 0xf6682e92bdd6242bULL, + 0x3f11c59a44782bb2ULL, 0x317c21d1edb6f348ULL, 0xd65ab5be75ad9e2eULL, + 0x6b2dd45fb4d84f17ULL, 0xfaab381296e4d44eULL, 0xd0b5befeeeb4e692ULL, + 0x0882ef0b32d7a046ULL, 0x512a91a5a83b2047ULL, 0x963e9ee6f85bf724ULL, + 0x4e09cf132438b1f0ULL, 0x77f701c9fb59e2feULL, 0x7ddb1c094b726a27ULL, + 0x5f4775ee01f5f8bdULL, 0x9186ec4d223c9b59ULL, 0xfeeac1998f01846dULL, + 0xac39db1ce4b89874ULL, 0xb75b7c21715e59e0ULL, 0xafc0503c273aa42aULL, + 0x6e3b543fec430bf5ULL, 0x704f7362213e8e83ULL, 0x58ff0745db9294c0ULL, + 0x67eec2df9feabf72ULL, 0xa0facd9ccf8a6811ULL, 0xb936986ad890811aULL, + 0x95c715c63bd9cb7aULL, 0xca8060283a2c33c7ULL, 0x507de84ee9453486ULL, + 0x85ded6d05f6a96f6ULL, 0x1cdad5964f81ade9ULL, 0xd5a33e9eb62fa270ULL, + 0x40642b588df6690aULL, 0x7f75eec2c98e42b8ULL, 0x2cf18dace3494a60ULL, + 0x23cb100c0bf9865bULL, 0xeef3028febb2d9e1ULL, 0x4425d2d394133929ULL, + 0xaad6d05c7fa1e0c8ULL, 0xad6ea2f7a5c68cb5ULL, 0xc2028f2308fb9381ULL, + 0x819f2f5b468fc6d5ULL, 0xc5bafd88d29cfffcULL, 0x47dc59f357910577ULL, + 0x2b49ff07392e261dULL, 0x57c59ae5332258fbULL, 0x73b6f842e2bcb2ddULL, + 0xcf96e04862b77725ULL, 0x4ca73dd8a6c4996fULL, 0x015779eb417e14c1ULL, + 0x37932a9176af8bf4ULL + }, { + 0x190a2c9b249df23eULL, 0x2f62f8b62263e1e9ULL, 0x7a7f754740993655ULL, + 0x330b7ba4d5564d9fULL, 0x4c17a16a46672582ULL, 0xb22f08eb7d05f5b8ULL, + 0x535f47f40bc148ccULL, 0x3aec5d27d4883037ULL, 0x10ed0a1825438f96ULL, + 0x516101f72c233d17ULL, 0x13cc6f949fd04eaeULL, 0x739853c441474bfdULL, + 0x653793d90d3f5b1bULL, 0x5240647b96b0fc2fULL, 0x0c84890ad27623e0ULL, + 0xd7189b32703aaea3ULL, 0x2685de3523bd9c41ULL, 0x99317c5b11bffefaULL, + 0x0d9baa854f079703ULL, 0x70b93648fbd48ac5ULL, 0xa80441fce30bc6beULL, + 0x7287704bdc36ff1eULL, 0xb65384ed33dc1f13ULL, 0xd36417343ee34408ULL, + 0x39cd38ab6e1bf10fULL, 0x5ab861770a1f3564ULL, 0x0ebacf09f594563bULL, + 0xd04572b884708530ULL, 0x3cae9722bdb3af47ULL, 0x4a556b6f2f5cbaf2ULL, + 0xe1704f1f76c4bd74ULL, 0x5ec4ed7144c6dfcfULL, 0x16afc01d4c7810e6ULL, + 0x283f113cd629ca7aULL, 0xaf59a8761741ed2dULL, 0xeed5a3991e215facULL, + 0x3bf37ea849f984d4ULL, 0xe413e096a56ce33cULL, 0x2c439d3a98f020d1ULL, + 0x637559dc6404c46bULL, 0x9e6c95d1e5f5d569ULL, 0x24bb9836045fe99aULL, + 0x44efa466dac8ecc9ULL, 0xc6eab2a5c80895d6ULL, 0x803b50c035220cc4ULL, + 0x0321658cba93c138ULL, 0x8f9ebc465dc7ee1cULL, 0xd15a5137190131d3ULL, + 0x0fa5ec8668e5e2d8ULL, 0x91c979578d1037b1ULL, 0x0642ca05693b9f70ULL, + 0xefca80168350eb4fULL, 0x38d21b24f36a45ecULL, 0xbeab81e1af73d658ULL, + 0x8cbfd9cae7542f24ULL, 0xfd19cc0d81f11102ULL, 0x0ac6430fbb4dbc90ULL, + 0x1d76a09d6a441895ULL, 0x2a01573ff1cbbfa1ULL, 0xb572e161894fde2bULL, + 0x8124734fa853b827ULL, 0x614b1fdf43e6b1b0ULL, 0x68ac395c4238cc18ULL, + 0x21d837bfd7f7b7d2ULL, 0x20c714304a860331ULL, 0x5cfaab726324aa14ULL, + 0x74c5ba4eb50d606eULL, 0xf3a3030474654739ULL, 0x23e671bcf015c209ULL, + 0x45f087e947b9582aULL, 0xd8bd77b418df4c7bULL, 0xe06f6c90ebb50997ULL, + 0x0bd96080263c0873ULL, 0x7e03f9410e40dcfeULL, 0xb8e94be4c6484928ULL, + 0xfb5b0608e8ca8e72ULL, 0x1a2b49179e0e3306ULL, 0x4e29e76961855059ULL, + 0x4f36c4e6fcf4e4baULL, 0x49740ee395cf7bcaULL, 0xc2963ea386d17f7dULL, + 0x90d65ad810618352ULL, 0x12d34c1b02a1fa4dULL, 0xfa44258775bb3a91ULL, + 0x18150f14b9ec46ddULL, 0x1491861e6b9a653dULL, 0x9a1019d7ab2c3fc2ULL, + 0x3668d42d06fe13d7ULL, 0xdcc1fbb25606a6d0ULL, 0x969490dd795a1c22ULL, + 0x3549b1a1bc6dd2efULL, 0xc94f5e23a0ed770eULL, 0xb9f6686b5b39fdcbULL, + 0xc4d4f4a6efeae00dULL, 0xe732851a1fff2204ULL, 0x94aad6de5eb869f9ULL, + 0x3f8ff2ae07206e7fULL, 0xfe38a9813b62d03aULL, 0xa7a1ad7a8bee2466ULL, + 0x7b6056c8dde882b6ULL, 0x302a1e286fc58ca7ULL, 0x8da0fa457a259bc7ULL, + 0xb3302b64e074415bULL, 0x5402ae7eff8b635fULL, 0x08f8050c9cafc94bULL, + 0xae468bf98a3059ceULL, 0x88c355cca98dc58fULL, 0xb10e6d67c7963480ULL, + 0xbad70de7e1aa3cf3ULL, 0xbfb4a26e320262bbULL, 0xcb711820870f02d5ULL, + 0xce12b7a954a75c9dULL, 0x563ce87dd8691684ULL, 0x9f73b65e7884618aULL, + 0x2b1e74b06cba0b42ULL, 0x47cec1ea605b2df1ULL, 0x1c698312f735ac76ULL, + 0x5fdbcefed9b76b2cULL, 0x831a354c8fb1cdfcULL, 0x820516c312c0791fULL, + 0xb74ca762aeadabf0ULL, 0xfc06ef821c80a5e1ULL, 0x5723cbf24518a267ULL, + 0x9d4df05d5f661451ULL, 0x588627742dfd40bfULL, 0xda8331b73f3d39a0ULL, + 0x17b0e392d109a405ULL, 0xf965400bcf28fba9ULL, 0x7c3dbf4229a2a925ULL, + 0x023e460327e275dbULL, 0x6cd0b55a0ce126b3ULL, 0xe62da695828e96e7ULL, + 0x42ad6e63b3f373b9ULL, 0xe50cc319381d57dfULL, 0xc5cbd729729b54eeULL, + 0x46d1e265fd2a9912ULL, 0x6428b056904eeff8ULL, 0x8be23040131e04b7ULL, + 0x6709d5da2add2ec0ULL, 0x075de98af44a2b93ULL, 0x8447dcc67bfbe66fULL, + 0x6616f655b7ac9a23ULL, 0xd607b8bded4b1a40ULL, 0x0563af89d3a85e48ULL, + 0x3db1b4ad20c21ba4ULL, 0x11f22997b8323b75ULL, 0x292032b34b587e99ULL, + 0x7f1cdace9331681dULL, 0x8e819fc9c0b65affULL, 0xa1e3677fe2d5bb16ULL, + 0xcd33d225ee349da5ULL, 0xd9a2543b85aef898ULL, 0x795e10cbfa0af76dULL, + 0x25a4bbb9992e5d79ULL, 0x78413344677b438eULL, 0xf0826688cef68601ULL, + 0xd27b34bba392f0ebULL, 0x551d8df162fad7bcULL, 0x1e57c511d0d7d9adULL, + 0xdeffbdb171e4d30bULL, 0xf4feea8e802f6caaULL, 0xa480c8f6317de55eULL, + 0xa0fc44f07fa40ff5ULL, 0x95b5f551c3c9dd1aULL, 0x22f952336d6476eaULL, + 0x0000000000000000ULL, 0xa6be8ef5169f9085ULL, 0xcc2cf1aa73452946ULL, + 0x2e7ddb39bf12550aULL, 0xd526dd3157d8db78ULL, 0x486b2d6c08becf29ULL, + 0x9b0f3a58365d8b21ULL, 0xac78cdfaadd22c15ULL, 0xbc95c7e28891a383ULL, + 0x6a927f5f65dab9c3ULL, 0xc3891d2c1ba0cb9eULL, 0xeaa92f9f50f8b507ULL, + 0xcf0d9426c9d6e87eULL, 0xca6e3baf1a7eb636ULL, 0xab25247059980786ULL, + 0x69b31ad3df4978fbULL, 0xe2512a93cc577c4cULL, 0xff278a0ea61364d9ULL, + 0x71a615c766a53e26ULL, 0x89dc764334fc716cULL, 0xf87a638452594f4aULL, + 0xf2bc208be914f3daULL, 0x8766b94ac1682757ULL, 0xbbc82e687cdb8810ULL, + 0x626a7a53f9757088ULL, 0xa2c202f358467a2eULL, 0x4d0882e5db169161ULL, + 0x09e7268301de7da8ULL, 0xe897699c771ac0dcULL, 0xc8507dac3d9cc3edULL, + 0xc0a878a0a1330aa6ULL, 0x978bb352e42ba8c1ULL, 0xe9884a13ea6b743fULL, + 0x279afdbabecc28a2ULL, 0x047c8c064ed9eaabULL, 0x507e2278b15289f4ULL, + 0x599904fbb08cf45cULL, 0xbd8ae46d15e01760ULL, 0x31353da7f2b43844ULL, + 0x8558ff49e68a528cULL, 0x76fbfc4d92ef15b5ULL, 0x3456922e211c660cULL, + 0x86799ac55c1993b4ULL, 0x3e90d1219a51da9cULL, 0x2d5cbeb505819432ULL, + 0x982e5fd48cce4a19ULL, 0xdb9c1238a24c8d43ULL, 0xd439febecaa96f9bULL, + 0x418c0bef0960b281ULL, 0x158ea591f6ebd1deULL, 0x1f48e69e4da66d4eULL, + 0x8afd13cf8e6fb054ULL, 0xf5e1c9011d5ed849ULL, 0xe34e091c5126c8afULL, + 0xad67ee7530a398f6ULL, 0x43b24dec2e82c75aULL, 0x75da99c1287cd48dULL, + 0x92e81cdb3783f689ULL, 0xa3dd217cc537cecdULL, 0x60543c50de970553ULL, + 0x93f73f54aaf2426aULL, 0xa91b62737e7a725dULL, 0xf19d4507538732e2ULL, + 0x77e4dfc20f9ea156ULL, 0x7d229ccdb4d31dc6ULL, 0x1b346a98037f87e5ULL, + 0xedf4c615a4b29e94ULL, 0x4093286094110662ULL, 0xb0114ee85ae78063ULL, + 0x6ff1d0d6b672e78bULL, 0x6dcf96d591909250ULL, 0xdfe09e3eec9567e8ULL, + 0x3214582b4827f97cULL, 0xb46dc2ee143e6ac8ULL, 0xf6c0ac8da7cd1971ULL, + 0xebb60c10cd8901e4ULL, 0xf7df8f023abcad92ULL, 0x9c52d3d2c217a0b2ULL, + 0x6b8d5cd0f8ab0d20ULL, 0x3777f7a29b8fa734ULL, 0x011f238f9d71b4e3ULL, + 0xc1b75b2f3c42be45ULL, 0x5de588fdfe551ef7ULL, 0x6eeef3592b035368ULL, + 0xaa3a07ffc4e9b365ULL, 0xecebe59a39c32a77ULL, 0x5ba742f8976e8187ULL, + 0x4b4a48e0b22d0e11ULL, 0xddded83dcb771233ULL, 0xa59feb79ac0c51bdULL, + 0xc7f5912a55792135ULL + }, { + 0x6d6ae04668a9b08aULL, 0x3ab3f04b0be8c743ULL, 0xe51e166b54b3c908ULL, + 0xbe90a9eb35c2f139ULL, 0xb2c7066637f2bec1ULL, 0xaa6945613392202cULL, + 0x9a28c36f3b5201ebULL, 0xddce5a93ab536994ULL, 0x0e34133ef6382827ULL, + 0x52a02ba1ec55048bULL, 0xa2f88f97c4b2a177ULL, 0x8640e513ca2251a5ULL, + 0xcdf1d36258137622ULL, 0xfe6cb708dedf8ddbULL, 0x8a174a9ec8121e5dULL, + 0x679896036b81560eULL, 0x59ed033395795feeULL, 0x1dd778ab8b74edafULL, + 0xee533ef92d9f926dULL, 0x2a8c79baf8a8d8f5ULL, 0x6bcf398e69b119f6ULL, + 0xe20491742fafdd95ULL, 0x276488e0809c2aecULL, 0xea955b82d88f5cceULL, + 0x7102c63a99d9e0c4ULL, 0xf9763017a5c39946ULL, 0x429fa2501f151b3dULL, + 0x4659c72bea05d59eULL, 0x984b7fdccf5a6634ULL, 0xf742232953fbb161ULL, + 0x3041860e08c021c7ULL, 0x747bfd9616cd9386ULL, 0x4bb1367192312787ULL, + 0x1b72a1638a6c44d3ULL, 0x4a0e68a6e8359a66ULL, 0x169a5039f258b6caULL, + 0xb98a2ef44edee5a4ULL, 0xd9083fe85e43a737ULL, 0x967f6ce239624e13ULL, + 0x8874f62d3c1a7982ULL, 0x3c1629830af06e3fULL, 0x9165ebfd427e5a8eULL, + 0xb5dd81794ceeaa5cULL, 0x0de8f15a7834f219ULL, 0x70bd98ede3dd5d25ULL, + 0xaccc9ca9328a8950ULL, 0x56664eda1945ca28ULL, 0x221db34c0f8859aeULL, + 0x26dbd637fa98970dULL, 0x1acdffb4f068f932ULL, 0x4585254f64090fa0ULL, + 0x72de245e17d53afaULL, 0x1546b25d7c546cf4ULL, 0x207e0ffffb803e71ULL, + 0xfaaad2732bcf4378ULL, 0xb462dfae36ea17bdULL, 0xcf926fd1ac1b11fdULL, + 0xe0672dc7dba7ba4aULL, 0xd3fa49ad5d6b41b3ULL, 0x8ba81449b216a3bcULL, + 0x14f9ec8a0650d115ULL, 0x40fc1ee3eb1d7ce2ULL, 0x23a2ed9b758ce44fULL, + 0x782c521b14fddc7eULL, 0x1c68267cf170504eULL, 0xbcf31558c1ca96e6ULL, + 0xa781b43b4ba6d235ULL, 0xf6fd7dfe29ff0c80ULL, 0xb0a4bad5c3fad91eULL, + 0xd199f51ea963266cULL, 0x414340349119c103ULL, 0x5405f269ed4dadf7ULL, + 0xabd61bb649969dcdULL, 0x6813dbeae7bdc3c8ULL, 0x65fb2ab09f8931d1ULL, + 0xf1e7fae152e3181dULL, 0xc1a67cef5a2339daULL, 0x7a4feea8e0f5bba1ULL, + 0x1e0b9acf05783791ULL, 0x5b8ebf8061713831ULL, 0x80e53cdbcb3af8d9ULL, + 0x7e898bd315e57502ULL, 0xc6bcfbf0213f2d47ULL, 0x95a38e86b76e942dULL, + 0x092e94218d243cbaULL, 0x8339debf453622e7ULL, 0xb11be402b9fe64ffULL, + 0x57d9100d634177c9ULL, 0xcc4e8db52217cbc3ULL, 0x3b0cae9c71ec7aa2ULL, + 0xfb158ca451cbfe99ULL, 0x2b33276d82ac6514ULL, 0x01bf5ed77a04bde1ULL, + 0xc5601994af33f779ULL, 0x75c4a3416cc92e67ULL, 0xf3844652a6eb7fc2ULL, + 0x3487e375fdd0ef64ULL, 0x18ae430704609eedULL, 0x4d14efb993298efbULL, + 0x815a620cb13e4538ULL, 0x125c354207487869ULL, 0x9eeea614ce42cf48ULL, + 0xce2d3106d61fac1cULL, 0xbbe99247bad6827bULL, 0x071a871f7b1c149dULL, + 0x2e4a1cc10db81656ULL, 0x77a71ff298c149b8ULL, 0x06a5d9c80118a97cULL, + 0xad73c27e488e34b1ULL, 0x443a7b981e0db241ULL, 0xe3bbcfa355ab6074ULL, + 0x0af276450328e684ULL, 0x73617a896dd1871bULL, 0x58525de4ef7de20fULL, + 0xb7be3dcab8e6cd83ULL, 0x19111dd07e64230cULL, 0x842359a03e2a367aULL, + 0x103f89f1f3401fb6ULL, 0xdc710444d157d475ULL, 0xb835702334da5845ULL, + 0x4320fc876511a6dcULL, 0xd026abc9d3679b8dULL, 0x17250eee885c0b2bULL, + 0x90dab52a387ae76fULL, 0x31fed8d972c49c26ULL, 0x89cba8fa461ec463ULL, + 0x2ff5421677bcabb7ULL, 0x396f122f85e41d7dULL, 0xa09b332430bac6a8ULL, + 0xc888e8ced7070560ULL, 0xaeaf201ac682ee8fULL, 0x1180d7268944a257ULL, + 0xf058a43628e7a5fcULL, 0xbd4c4b8fbbce2b07ULL, 0xa1246df34abe7b49ULL, + 0x7d5569b79be9af3cULL, 0xa9b5a705bd9efa12ULL, 0xdb6b835baa4bc0e8ULL, + 0x05793bac8f147342ULL, 0x21c1512881848390ULL, 0xfdb0556c50d357e5ULL, + 0x613d4fcb6a99ff72ULL, 0x03dce2648e0cda3eULL, 0xe949b9e6568386f0ULL, + 0xfc0f0bbb2ad7ea04ULL, 0x6a70675913b5a417ULL, 0x7f36d5046fe1c8e3ULL, + 0x0c57af8d02304ff8ULL, 0x32223abdfcc84618ULL, 0x0891caf6f720815bULL, + 0xa63eeaec31a26fd4ULL, 0x2507345374944d33ULL, 0x49d28ac266394058ULL, + 0xf5219f9aa7f3d6beULL, 0x2d96fea583b4cc68ULL, 0x5a31e1571b7585d0ULL, + 0x8ed12fe53d02d0feULL, 0xdfade6205f5b0e4bULL, 0x4cabb16ee92d331aULL, + 0x04c6657bf510cea3ULL, 0xd73c2cd6a87b8f10ULL, 0xe1d87310a1a307abULL, + 0x6cd5be9112ad0d6bULL, 0x97c032354366f3f2ULL, 0xd4e0ceb22677552eULL, + 0x0000000000000000ULL, 0x29509bde76a402cbULL, 0xc27a9e8bd42fe3e4ULL, + 0x5ef7842cee654b73ULL, 0xaf107ecdbc86536eULL, 0x3fcacbe784fcb401ULL, + 0xd55f90655c73e8cfULL, 0xe6c2f40fdabf1336ULL, 0xe8f6e7312c873b11ULL, + 0xeb2a0555a28be12fULL, 0xe4a148bc2eb774e9ULL, 0x9b979db84156bc0aULL, + 0x6eb60222e6a56ab4ULL, 0x87ffbbc4b026ec44ULL, 0xc703a5275b3b90a6ULL, + 0x47e699fc9001687fULL, 0x9c8d1aa73a4aa897ULL, 0x7cea3760e1ed12ddULL, + 0x4ec80ddd1d2554c5ULL, 0x13e36b957d4cc588ULL, 0x5d2b66486069914dULL, + 0x92b90999cc7280b0ULL, 0x517cc9c56259deb5ULL, 0xc937b619ad03b881ULL, + 0xec30824ad997f5b2ULL, 0xa45d565fc5aa080bULL, 0xd6837201d27f32f1ULL, + 0x635ef3789e9198adULL, 0x531f75769651b96aULL, 0x4f77530a6721e924ULL, + 0x486dd4151c3dfdb9ULL, 0x5f48dafb9461f692ULL, 0x375b011173dc355aULL, + 0x3da9775470f4d3deULL, 0x8d0dcd81b30e0ac0ULL, 0x36e45fc609d888bbULL, + 0x55baacbe97491016ULL, 0x8cb29356c90ab721ULL, 0x76184125e2c5f459ULL, + 0x99f4210bb55edbd5ULL, 0x6f095cf59ca1d755ULL, 0x9f51f8c3b44672a9ULL, + 0x3538bda287d45285ULL, 0x50c39712185d6354ULL, 0xf23b1885dcefc223ULL, + 0x79930ccc6ef9619fULL, 0xed8fdc9da3934853ULL, 0xcb540aaa590bdf5eULL, + 0x5c94389f1a6d2cacULL, 0xe77daad8a0bbaed7ULL, 0x28efc5090ca0bf2aULL, + 0xbf2ff73c4fc64cd8ULL, 0xb37858b14df60320ULL, 0xf8c96ec0dfc724a7ULL, + 0x828680683f329f06ULL, 0x941cd051cd6a29ccULL, 0xc3c5c05cae2b5e05ULL, + 0xb601631dc2e27062ULL, 0xc01922382027843bULL, 0x24b86a840e90f0d2ULL, + 0xd245177a276ffc52ULL, 0x0f8b4de98c3c95c6ULL, 0x3e759530fef809e0ULL, + 0x0b4d2892792c5b65ULL, 0xc4df4743d5374a98ULL, 0xa5e20888bfaeb5eaULL, + 0xba56cc90c0d23f9aULL, 0x38d04cf8ffe0a09cULL, 0x62e1adafe495254cULL, + 0x0263bcb3f40867dfULL, 0xcaeb547d230f62bfULL, 0x6082111c109d4293ULL, + 0xdad4dd8cd04f7d09ULL, 0xefec602e579b2f8cULL, 0x1fb4c4187f7c8a70ULL, + 0xffd3e9dfa4db303aULL, 0x7bf0b07f9af10640ULL, 0xf49ec14dddf76b5fULL, + 0x8f6e713247066d1fULL, 0x339d646a86ccfbf9ULL, 0x64447467e58d8c30ULL, + 0x2c29a072f9b07189ULL, 0xd8b7613f24471ad6ULL, 0x6627c8d41185ebefULL, + 0xa347d140beb61c96ULL, 0xde12b8f7255fb3aaULL, 0x9d324470404e1576ULL, + 0x9306574eb6763d51ULL, 0xa80af9d2c79a47f3ULL, 0x859c0777442e8b9bULL, + 0x69ac853d9db97e29ULL + }, { + 0xc3407dfc2de6377eULL, 0x5b9e93eea4256f77ULL, 0xadb58fdd50c845e0ULL, + 0x5219ff11a75bed86ULL, 0x356b61cfd90b1de9ULL, 0xfb8f406e25abe037ULL, + 0x7a5a0231c0f60796ULL, 0x9d3cd216e1f5020bULL, 0x0c6550fb6b48d8f3ULL, + 0xf57508c427ff1c62ULL, 0x4ad35ffa71cb407dULL, 0x6290a2da1666aa6dULL, + 0xe284ec2349355f9fULL, 0xb3c307c53d7c84ecULL, 0x05e23c0468365a02ULL, + 0x190bac4d6c9ebfa8ULL, 0x94bbbee9e28b80faULL, 0xa34fc777529cb9b5ULL, + 0xcc7b39f095bcd978ULL, 0x2426addb0ce532e3ULL, 0x7e79329312ce4fc7ULL, + 0xab09a72eebec2917ULL, 0xf8d15499f6b9d6c2ULL, 0x1a55b8babf8c895dULL, + 0xdb8add17fb769a85ULL, 0xb57f2f368658e81bULL, 0x8acd36f18f3f41f6ULL, + 0x5ce3b7bba50f11d3ULL, 0x114dcc14d5ee2f0aULL, 0xb91a7fcded1030e8ULL, + 0x81d5425fe55de7a1ULL, 0xb6213bc1554adeeeULL, 0x80144ef95f53f5f2ULL, + 0x1e7688186db4c10cULL, 0x3b912965db5fe1bcULL, 0xc281715a97e8252dULL, + 0x54a5d7e21c7f8171ULL, 0x4b12535ccbc5522eULL, 0x1d289cefbea6f7f9ULL, + 0x6ef5f2217d2e729eULL, 0xe6a7dc819b0d17ceULL, 0x1b94b41c05829b0eULL, + 0x33d7493c622f711eULL, 0xdcf7f942fa5ce421ULL, 0x600fba8b7f7a8ecbULL, + 0x46b60f011a83988eULL, 0x235b898e0dcf4c47ULL, 0x957ab24f588592a9ULL, + 0x4354330572b5c28cULL, 0xa5f3ef84e9b8d542ULL, 0x8c711e02341b2d01ULL, + 0x0b1874ae6a62a657ULL, 0x1213d8e306fc19ffULL, 0xfe6d7c6a4d9dba35ULL, + 0x65ed868f174cd4c9ULL, 0x88522ea0e6236550ULL, 0x899322065c2d7703ULL, + 0xc01e690bfef4018bULL, 0x915982ed8abddaf8ULL, 0xbe675b98ec3a4e4cULL, + 0xa996bf7f82f00db1ULL, 0xe1daf8d49a27696aULL, 0x2effd5d3dc8986e7ULL, + 0xd153a51f2b1a2e81ULL, 0x18caa0ebd690adfbULL, 0x390e3134b243c51aULL, + 0x2778b92cdff70416ULL, 0x029f1851691c24a6ULL, 0x5e7cafeacc133575ULL, + 0xfa4e4cc89fa5f264ULL, 0x5a5f9f481e2b7d24ULL, 0x484c47ab18d764dbULL, + 0x400a27f2a1a7f479ULL, 0xaeeb9b2a83da7315ULL, 0x721c626879869734ULL, + 0x042330a2d2384851ULL, 0x85f672fd3765aff0ULL, 0xba446b3a3e02061dULL, + 0x73dd6ecec3888567ULL, 0xffac70ccf793a866ULL, 0xdfa9edb5294ed2d4ULL, + 0x6c6aea7014325638ULL, 0x834a5a0e8c41c307ULL, 0xcdba35562fb2cb2bULL, + 0x0ad97808d06cb404ULL, 0x0f3b440cb85aee06ULL, 0xe5f9c876481f213bULL, + 0x98deee1289c35809ULL, 0x59018bbfcd394bd1ULL, 0xe01bf47220297b39ULL, + 0xde68e1139340c087ULL, 0x9fa3ca4788e926adULL, 0xbb85679c840c144eULL, + 0x53d8f3b71d55ffd5ULL, 0x0da45c5dd146caa0ULL, 0x6f34fe87c72060cdULL, + 0x57fbc315cf6db784ULL, 0xcee421a1fca0fddeULL, 0x3d2d0196607b8d4bULL, + 0x642c8a29ad42c69aULL, 0x14aff010bdd87508ULL, 0xac74837beac657b3ULL, + 0x3216459ad821634dULL, 0x3fb219c70967a9edULL, 0x06bc28f3bb246cf7ULL, + 0xf2082c9126d562c6ULL, 0x66b39278c45ee23cULL, 0xbd394f6f3f2878b9ULL, + 0xfd33689d9e8f8cc0ULL, 0x37f4799eb017394fULL, 0x108cc0b26fe03d59ULL, + 0xda4bd1b1417888d6ULL, 0xb09d1332ee6eb219ULL, 0x2f3ed975668794b4ULL, + 0x58c0871977375982ULL, 0x7561463d78ace990ULL, 0x09876cff037e82f1ULL, + 0x7fb83e35a8c05d94ULL, 0x26b9b58a65f91645ULL, 0xef20b07e9873953fULL, + 0x3148516d0b3355b8ULL, 0x41cb2b541ba9e62aULL, 0x790416c613e43163ULL, + 0xa011d380818e8f40ULL, 0x3a5025c36151f3efULL, 0xd57095bdf92266d0ULL, + 0x498d4b0da2d97688ULL, 0x8b0c3a57353153a5ULL, 0x21c491df64d368e1ULL, + 0x8f2f0af5e7091bf4ULL, 0x2da1c1240f9bb012ULL, 0xc43d59a92ccc49daULL, + 0xbfa6573e56345c1fULL, 0x828b56a8364fd154ULL, 0x9a41f643e0df7cafULL, + 0xbcf843c985266aeaULL, 0x2b1de9d7b4bfdce5ULL, 0x20059d79dedd7ab2ULL, + 0x6dabe6d6ae3c446bULL, 0x45e81bf6c991ae7bULL, 0x6351ae7cac68b83eULL, + 0xa432e32253b6c711ULL, 0xd092a9b991143cd2ULL, 0xcac711032e98b58fULL, + 0xd8d4c9e02864ac70ULL, 0xc5fc550f96c25b89ULL, 0xd7ef8dec903e4276ULL, + 0x67729ede7e50f06fULL, 0xeac28c7af045cf3dULL, 0xb15c1f945460a04aULL, + 0x9cfddeb05bfb1058ULL, 0x93c69abce3a1fe5eULL, 0xeb0380dc4a4bdd6eULL, + 0xd20db1e8f8081874ULL, 0x229a8528b7c15e14ULL, 0x44291750739fbc28ULL, + 0xd3ccbd4e42060a27ULL, 0xf62b1c33f4ed2a97ULL, 0x86a8660ae4779905ULL, + 0xd62e814a2a305025ULL, 0x477703a7a08d8addULL, 0x7b9b0e977af815c5ULL, + 0x78c51a60a9ea2330ULL, 0xa6adfb733aaae3b7ULL, 0x97e5aa1e3199b60fULL, + 0x0000000000000000ULL, 0xf4b404629df10e31ULL, 0x5564db44a6719322ULL, + 0x9207961a59afec0dULL, 0x9624a6b88b97a45cULL, 0x363575380a192b1cULL, + 0x2c60cd82b595a241ULL, 0x7d272664c1dc7932ULL, 0x7142769faa94a1c1ULL, + 0xa1d0df263b809d13ULL, 0x1630e841d4c451aeULL, 0xc1df65ad44fa13d8ULL, + 0x13d2d445bcf20bacULL, 0xd915c546926abe23ULL, 0x38cf3d92084dd749ULL, + 0xe766d0272103059dULL, 0xc7634d5effde7f2fULL, 0x077d2455012a7ea4ULL, + 0xedbfa82ff16fb199ULL, 0xaf2a978c39d46146ULL, 0x42953fa3c8bbd0dfULL, + 0xcb061da59496a7dcULL, 0x25e7a17db6eb20b0ULL, 0x34aa6d6963050fbaULL, + 0xa76cf7d580a4f1e4ULL, 0xf7ea10954ee338c4ULL, 0xfcf2643b24819e93ULL, + 0xcf252d0746aeef8dULL, 0x4ef06f58a3f3082cULL, 0x563acfb37563a5d7ULL, + 0x5086e740ce47c920ULL, 0x2982f186dda3f843ULL, 0x87696aac5e798b56ULL, + 0x5d22bb1d1f010380ULL, 0x035e14f7d31236f5ULL, 0x3cec0d30da759f18ULL, + 0xf3c920379cdb7095ULL, 0xb8db736b571e22bbULL, 0xdd36f5e44052f672ULL, + 0xaac8ab8851e23b44ULL, 0xa857b3d938fe1fe2ULL, 0x17f1e4e76eca43fdULL, + 0xec7ea4894b61a3caULL, 0x9e62c6e132e734feULL, 0xd4b1991b432c7483ULL, + 0x6ad6c283af163acfULL, 0x1ce9904904a8e5aaULL, 0x5fbda34c761d2726ULL, + 0xf910583f4cb7c491ULL, 0xc6a241f845d06d7cULL, 0x4f3163fe19fd1a7fULL, + 0xe99c988d2357f9c8ULL, 0x8eee06535d0709a7ULL, 0x0efa48aa0254fc55ULL, + 0xb4be23903c56fa48ULL, 0x763f52caabbedf65ULL, 0xeee1bcd8227d876cULL, + 0xe345e085f33b4dccULL, 0x3e731561b369bbbeULL, 0x2843fd2067adea10ULL, + 0x2adce5710eb1ceb6ULL, 0xb7e03767ef44ccbdULL, 0x8db012a48e153f52ULL, + 0x61ceb62dc5749c98ULL, 0xe85d942b9959eb9bULL, 0x4c6f7709caef2c8aULL, + 0x84377e5b8d6bbda3ULL, 0x30895dcbb13d47ebULL, 0x74a04a9bc2a2fbc3ULL, + 0x6b17ce251518289cULL, 0xe438c4d0f2113368ULL, 0x1fb784bed7bad35fULL, + 0x9b80fae55ad16efcULL, 0x77fe5e6c11b0cd36ULL, 0xc858095247849129ULL, + 0x08466059b97090a2ULL, 0x01c10ca6ba0e1253ULL, 0x6988d6747c040c3aULL, + 0x6849dad2c60a1e69ULL, 0x5147ebe67449db73ULL, 0xc99905f4fd8a837aULL, + 0x991fe2b433cd4a5aULL, 0xf09734c04fc94660ULL, 0xa28ecbd1e892abe6ULL, + 0xf1563866f5c75433ULL, 0x4dae7baf70e13ed9ULL, 0x7ce62ac27bd26b61ULL, + 0x70837a39109ab392ULL, 0x90988e4b30b3c8abULL, 0xb2020b63877296bfULL, + 0x156efcb607d6675bULL + }, { + 0xe63f55ce97c331d0ULL, 0x25b506b0015bba16ULL, 0xc8706e29e6ad9ba8ULL, + 0x5b43d3775d521f6aULL, 0x0bfa3d577035106eULL, 0xab95fc172afb0e66ULL, + 0xf64b63979e7a3276ULL, 0xf58b4562649dad4bULL, 0x48f7c3dbae0c83f1ULL, + 0xff31916642f5c8c5ULL, 0xcbb048dc1c4a0495ULL, 0x66b8f83cdf622989ULL, + 0x35c130e908e2b9b0ULL, 0x7c761a61f0b34fa1ULL, 0x3601161cf205268dULL, + 0x9e54ccfe2219b7d6ULL, 0x8b7d90a538940837ULL, 0x9cd403588ea35d0bULL, + 0xbc3c6fea9ccc5b5aULL, 0xe5ff733b6d24aeedULL, 0xceed22de0f7eb8d2ULL, + 0xec8581cab1ab545eULL, 0xb96105e88ff8e71dULL, 0x8ca03501871a5eadULL, + 0x76ccce65d6db2a2fULL, 0x5883f582a7b58057ULL, 0x3f7be4ed2e8adc3eULL, + 0x0fe7be06355cd9c9ULL, 0xee054e6c1d11be83ULL, 0x1074365909b903a6ULL, + 0x5dde9f80b4813c10ULL, 0x4a770c7d02b6692cULL, 0x5379c8d5d7809039ULL, + 0xb4067448161ed409ULL, 0x5f5e5026183bd6cdULL, 0xe898029bf4c29df9ULL, + 0x7fb63c940a54d09cULL, 0xc5171f897f4ba8bcULL, 0xa6f28db7b31d3d72ULL, + 0x2e4f3be7716eaa78ULL, 0x0d6771a099e63314ULL, 0x82076254e41bf284ULL, + 0x2f0fd2b42733df98ULL, 0x5c9e76d3e2dc49f0ULL, 0x7aeb569619606cdbULL, + 0x83478b07b2468764ULL, 0xcfadcb8d5923cd32ULL, 0x85dac7f05b95a41eULL, + 0xb5469d1b4043a1e9ULL, 0xb821ecbbd9a592fdULL, 0x1b8e0b0e798c13c8ULL, + 0x62a57b6d9a0be02eULL, 0xfcf1b793b81257f8ULL, 0x9d94ea0bd8fe28ebULL, + 0x4cea408aeb654a56ULL, 0x23284a47e888996cULL, 0x2d8f1d128b893545ULL, + 0xf4cbac3132c0d8abULL, 0xbd7c86b9ca912ebaULL, 0x3a268eef3dbe6079ULL, + 0xf0d62f6077a9110cULL, 0x2735c916ade150cbULL, 0x89fd5f03942ee2eaULL, + 0x1acee25d2fd16628ULL, 0x90f39bab41181bffULL, 0x430dfe8cde39939fULL, + 0xf70b8ac4c8274796ULL, 0x1c53aeaac6024552ULL, 0x13b410acf35e9c9bULL, + 0xa532ab4249faa24fULL, 0x2b1251e5625a163fULL, 0xd7e3e676da4841c7ULL, + 0xa7b264e4e5404892ULL, 0xda8497d643ae72d3ULL, 0x861ae105a1723b23ULL, + 0x38a6414991048aa4ULL, 0x6578dec92585b6b4ULL, 0x0280cfa6acbaeaddULL, + 0x88bdb650c273970aULL, 0x9333bd5ebbff84c2ULL, 0x4e6a8f2c47dfa08bULL, + 0x321c954db76cef2aULL, 0x418d312a72837942ULL, 0xb29b38bfffcdf773ULL, + 0x6c022c38f90a4c07ULL, 0x5a033a240b0f6a8aULL, 0x1f93885f3ce5da6fULL, + 0xc38a537e96988bc6ULL, 0x39e6a81ac759ff44ULL, 0x29929e43cee0fce2ULL, + 0x40cdd87924de0ca2ULL, 0xe9d8ebc8a29fe819ULL, 0x0c2798f3cfbb46f4ULL, + 0x55e484223e53b343ULL, 0x4650948ecd0d2fd8ULL, 0x20e86cb2126f0651ULL, + 0x6d42c56baf5739e7ULL, 0xa06fc1405ace1e08ULL, 0x7babbfc54f3d193bULL, + 0x424d17df8864e67fULL, 0xd8045870ef14980eULL, 0xc6d7397c85ac3781ULL, + 0x21a885e1443273b1ULL, 0x67f8116f893f5c69ULL, 0x24f5efe35706cff6ULL, + 0xd56329d076f2ab1aULL, 0x5e1eb9754e66a32dULL, 0x28d2771098bd8902ULL, + 0x8f6013f47dfdc190ULL, 0x17a993fdb637553cULL, 0xe0a219397e1012aaULL, + 0x786b9930b5da8606ULL, 0x6e82e39e55b0a6daULL, 0x875a0856f72f4ec3ULL, + 0x3741ff4fa458536dULL, 0xac4859b3957558fcULL, 0x7ef6d5c75c09a57cULL, + 0xc04a758b6c7f14fbULL, 0xf9acdd91ab26ebbfULL, 0x7391a467c5ef9668ULL, + 0x335c7c1ee1319acaULL, 0xa91533b18641e4bbULL, 0xe4bf9a683b79db0dULL, + 0x8e20faa72ba0b470ULL, 0x51f907737b3a7ae4ULL, 0x2268a314bed5ec8cULL, + 0xd944b123b949edeeULL, 0x31dcb3b84d8b7017ULL, 0xd3fe65279f218860ULL, + 0x097af2f1dc8ffab3ULL, 0x9b09a6fc312d0b91ULL, 0xcc6ded78a3c4520fULL, + 0x3481d9ba5ebfcc50ULL, 0x4f2a667f1182d56bULL, 0xdfd9fdd4509ace94ULL, + 0x26752045fbbc252bULL, 0xbffc491f662bc467ULL, 0xdd593272fc202449ULL, + 0x3cbbc218d46d4303ULL, 0x91b372f817456e1fULL, 0x681faf69bc6385a0ULL, + 0xb686bbeebaa43ed4ULL, 0x1469b5084cd0ca01ULL, 0x98c98009cbca94acULL, + 0x6438379a73d8c354ULL, 0xc2caba2dc0c5fe26ULL, 0x3e3b0dbe78d7a9deULL, + 0x50b9ee202d670f04ULL, 0x4590b27b37eab0e5ULL, 0x6025b4cb36b10af3ULL, + 0xfb2c1237079c0162ULL, 0xa12f28130c936be8ULL, 0x4b37e52e54eb1cccULL, + 0x083a1ba28ad28f53ULL, 0xc10a9cd83a22611bULL, 0x9f1425ad7444c236ULL, + 0x069d4cf7e9d3237aULL, 0xedc56899e7f621beULL, 0x778c273680865fcfULL, + 0x309c5aeb1bd605f7ULL, 0x8de0dc52d1472b4dULL, 0xf8ec34c2fd7b9e5fULL, + 0xea18cd3d58787724ULL, 0xaad515447ca67b86ULL, 0x9989695a9d97e14cULL, + 0x0000000000000000ULL, 0xf196c63321f464ecULL, 0x71116bc169557cb5ULL, + 0xaf887f466f92c7c1ULL, 0x972e3e0ffe964d65ULL, 0x190ec4a8d536f915ULL, + 0x95aef1a9522ca7b8ULL, 0xdc19db21aa7d51a9ULL, 0x94ee18fa0471d258ULL, + 0x8087adf248a11859ULL, 0xc457f6da2916dd5cULL, 0xfa6cfb6451c17482ULL, + 0xf256e0c6db13fbd1ULL, 0x6a9f60cf10d96f7dULL, 0x4daaa9d9bd383fb6ULL, + 0x03c026f5fae79f3dULL, 0xde99148706c7bb74ULL, 0x2a52b8b6340763dfULL, + 0x6fc20acd03edd33aULL, 0xd423c08320afdefaULL, 0xbbe1ca4e23420dc0ULL, + 0x966ed75ca8cb3885ULL, 0xeb58246e0e2502c4ULL, 0x055d6a021334bc47ULL, + 0xa47242111fa7d7afULL, 0xe3623fcc84f78d97ULL, 0x81c744a11efc6db9ULL, + 0xaec8961539cfb221ULL, 0xf31609958d4e8e31ULL, 0x63e5923ecc5695ceULL, + 0x47107ddd9b505a38ULL, 0xa3afe7b5a0298135ULL, 0x792b7063e387f3e6ULL, + 0x0140e953565d75e0ULL, 0x12f4f9ffa503e97bULL, 0x750ce8902c3cb512ULL, + 0xdbc47e8515f30733ULL, 0x1ed3610c6ab8af8fULL, 0x5239218681dde5d9ULL, + 0xe222d69fd2aaf877ULL, 0xfe71783514a8bd25ULL, 0xcaf0a18f4a177175ULL, + 0x61655d9860ec7f13ULL, 0xe77fbc9dc19e4430ULL, 0x2ccff441ddd440a5ULL, + 0x16e97aaee06a20dcULL, 0xa855dae2d01c915bULL, 0x1d1347f9905f30b2ULL, + 0xb7c652bdecf94b34ULL, 0xd03e43d265c6175dULL, 0xfdb15ec0ee4f2218ULL, + 0x57644b8492e9599eULL, 0x07dda5a4bf8e569aULL, 0x54a46d71680ec6a3ULL, + 0x5624a2d7c4b42c7eULL, 0xbebca04c3076b187ULL, 0x7d36f332a6ee3a41ULL, + 0x3b6667bc6be31599ULL, 0x695f463aea3ef040ULL, 0xad08b0e0c3282d1cULL, + 0xb15b1e4a052a684eULL, 0x44d05b2861b7c505ULL, 0x15295c5b1a8dbfe1ULL, + 0x744c01c37a61c0f2ULL, 0x59c31cd1f1e8f5b7ULL, 0xef45a73f4b4ccb63ULL, + 0x6bdf899c46841a9dULL, 0x3dfb2b4b823036e3ULL, 0xa2ef0ee6f674f4d5ULL, + 0x184e2dfb836b8cf5ULL, 0x1134df0a5fe47646ULL, 0xbaa1231d751f7820ULL, + 0xd17eaa81339b62bdULL, 0xb01bf71953771daeULL, 0x849a2ea30dc8d1feULL, + 0x705182923f080955ULL, 0x0ea757556301ac29ULL, 0x041d83514569c9a7ULL, + 0x0abad4042668658eULL, 0x49b72a88f851f611ULL, 0x8a3d79f66ec97dd7ULL, + 0xcd2d042bf59927efULL, 0xc930877ab0f0ee48ULL, 0x9273540deda2f122ULL, + 0xc797d02fd3f14261ULL, 0xe1e2f06a284d674aULL, 0xd2be8c74c97cfd80ULL, + 0x9a494faf67707e71ULL, 0xb3dbd1eca9908293ULL, 0x72d14d3493b2e388ULL, + 0xd6a30f258c153427ULL + } +}; /* Ax */ + +static void streebog_xor(const struct streebog_uint512 *x, + const struct streebog_uint512 *y, + struct streebog_uint512 *z) +{ + z->qword[0] = x->qword[0] ^ y->qword[0]; + z->qword[1] = x->qword[1] ^ y->qword[1]; + z->qword[2] = x->qword[2] ^ y->qword[2]; + z->qword[3] = x->qword[3] ^ y->qword[3]; + z->qword[4] = x->qword[4] ^ y->qword[4]; + z->qword[5] = x->qword[5] ^ y->qword[5]; + z->qword[6] = x->qword[6] ^ y->qword[6]; + z->qword[7] = x->qword[7] ^ y->qword[7]; +} + +static void streebog_xlps(const struct streebog_uint512 *x, + const struct streebog_uint512 *y, + struct streebog_uint512 *data) +{ + u64 r0, r1, r2, r3, r4, r5, r6, r7; + int i; + + r0 = le64_to_cpu(x->qword[0] ^ y->qword[0]); + r1 = le64_to_cpu(x->qword[1] ^ y->qword[1]); + r2 = le64_to_cpu(x->qword[2] ^ y->qword[2]); + r3 = le64_to_cpu(x->qword[3] ^ y->qword[3]); + r4 = le64_to_cpu(x->qword[4] ^ y->qword[4]); + r5 = le64_to_cpu(x->qword[5] ^ y->qword[5]); + r6 = le64_to_cpu(x->qword[6] ^ y->qword[6]); + r7 = le64_to_cpu(x->qword[7] ^ y->qword[7]); + + for (i = 0; i <= 7; i++) { + data->qword[i] = cpu_to_le64(Ax[0][r0 & 0xFF]); + data->qword[i] ^= cpu_to_le64(Ax[1][r1 & 0xFF]); + data->qword[i] ^= cpu_to_le64(Ax[2][r2 & 0xFF]); + data->qword[i] ^= cpu_to_le64(Ax[3][r3 & 0xFF]); + data->qword[i] ^= cpu_to_le64(Ax[4][r4 & 0xFF]); + data->qword[i] ^= cpu_to_le64(Ax[5][r5 & 0xFF]); + data->qword[i] ^= cpu_to_le64(Ax[6][r6 & 0xFF]); + data->qword[i] ^= cpu_to_le64(Ax[7][r7 & 0xFF]); + r0 >>= 8; + r1 >>= 8; + r2 >>= 8; + r3 >>= 8; + r4 >>= 8; + r5 >>= 8; + r6 >>= 8; + r7 >>= 8; + } +} + +static void streebog_round(int i, struct streebog_uint512 *Ki, + struct streebog_uint512 *data) +{ + streebog_xlps(Ki, &C[i], Ki); + streebog_xlps(Ki, data, data); +} + +static int streebog_init(struct shash_desc *desc) +{ + struct streebog_state *ctx = shash_desc_ctx(desc); + unsigned int digest_size = crypto_shash_digestsize(desc->tfm); + unsigned int i; + + memset(ctx, 0, sizeof(struct streebog_state)); + for (i = 0; i < 8; i++) { + if (digest_size == STREEBOG256_DIGEST_SIZE) + ctx->h.qword[i] = 0x0101010101010101ULL; + } + return 0; +} + +static void streebog_pad(struct streebog_state *ctx) +{ + if (ctx->fillsize >= STREEBOG_BLOCK_SIZE) + return; + + memset(ctx->buffer + ctx->fillsize, 0, + sizeof(ctx->buffer) - ctx->fillsize); + + ctx->buffer[ctx->fillsize] = 1; +} + +static void streebog_add512(const struct streebog_uint512 *x, + const struct streebog_uint512 *y, + struct streebog_uint512 *r) +{ + u64 carry = 0; + int i; + + for (i = 0; i < 8; i++) { + const u64 left = le64_to_cpu(x->qword[i]); + u64 sum; + + sum = left + le64_to_cpu(y->qword[i]) + carry; + if (sum != left) + carry = (sum < left); + r->qword[i] = cpu_to_le64(sum); + } +} + +static void streebog_g(struct streebog_uint512 *h, + const struct streebog_uint512 *N, + const u8 *m) +{ + struct streebog_uint512 Ki, data; + unsigned int i; + + streebog_xlps(h, N, &data); + + /* Starting E() */ + Ki = data; + streebog_xlps(&Ki, (const struct streebog_uint512 *)&m[0], &data); + + for (i = 0; i < 11; i++) + streebog_round(i, &Ki, &data); + + streebog_xlps(&Ki, &C[11], &Ki); + streebog_xor(&Ki, &data, &data); + /* E() done */ + + streebog_xor(&data, h, &data); + streebog_xor(&data, (const struct streebog_uint512 *)&m[0], h); +} + +static void streebog_stage2(struct streebog_state *ctx, const u8 *data) +{ + streebog_g(&ctx->h, &ctx->N, data); + + streebog_add512(&ctx->N, &buffer512, &ctx->N); + streebog_add512(&ctx->Sigma, (const struct streebog_uint512 *)data, + &ctx->Sigma); +} + +static void streebog_stage3(struct streebog_state *ctx) +{ + struct streebog_uint512 buf = { { 0 } }; + + buf.qword[0] = cpu_to_le64(ctx->fillsize << 3); + streebog_pad(ctx); + + streebog_g(&ctx->h, &ctx->N, (const u8 *)&ctx->buffer); + streebog_add512(&ctx->N, &buf, &ctx->N); + streebog_add512(&ctx->Sigma, + (const struct streebog_uint512 *)&ctx->buffer[0], + &ctx->Sigma); + streebog_g(&ctx->h, &buffer0, (const u8 *)&ctx->N); + streebog_g(&ctx->h, &buffer0, (const u8 *)&ctx->Sigma); + memcpy(&ctx->hash, &ctx->h, sizeof(struct streebog_uint512)); +} + +static int streebog_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct streebog_state *ctx = shash_desc_ctx(desc); + size_t chunksize; + + if (ctx->fillsize) { + chunksize = STREEBOG_BLOCK_SIZE - ctx->fillsize; + if (chunksize > len) + chunksize = len; + memcpy(&ctx->buffer[ctx->fillsize], data, chunksize); + ctx->fillsize += chunksize; + len -= chunksize; + data += chunksize; + + if (ctx->fillsize == STREEBOG_BLOCK_SIZE) { + streebog_stage2(ctx, ctx->buffer); + ctx->fillsize = 0; + } + } + + while (len >= STREEBOG_BLOCK_SIZE) { + streebog_stage2(ctx, data); + data += STREEBOG_BLOCK_SIZE; + len -= STREEBOG_BLOCK_SIZE; + } + + if (len) { + memcpy(&ctx->buffer, data, len); + ctx->fillsize = len; + } + return 0; +} + +static int streebog_final(struct shash_desc *desc, u8 *digest) +{ + struct streebog_state *ctx = shash_desc_ctx(desc); + + streebog_stage3(ctx); + ctx->fillsize = 0; + if (crypto_shash_digestsize(desc->tfm) == STREEBOG256_DIGEST_SIZE) + memcpy(digest, &ctx->hash.qword[4], STREEBOG256_DIGEST_SIZE); + else + memcpy(digest, &ctx->hash.qword[0], STREEBOG512_DIGEST_SIZE); + return 0; +} + +static struct shash_alg algs[2] = { { + .digestsize = STREEBOG256_DIGEST_SIZE, + .init = streebog_init, + .update = streebog_update, + .final = streebog_final, + .descsize = sizeof(struct streebog_state), + .base = { + .cra_name = "streebog256", + .cra_driver_name = "streebog256-generic", + .cra_blocksize = STREEBOG_BLOCK_SIZE, + .cra_module = THIS_MODULE, + }, +}, { + .digestsize = STREEBOG512_DIGEST_SIZE, + .init = streebog_init, + .update = streebog_update, + .final = streebog_final, + .descsize = sizeof(struct streebog_state), + .base = { + .cra_name = "streebog512", + .cra_driver_name = "streebog512-generic", + .cra_blocksize = STREEBOG_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; + +static int __init streebog_mod_init(void) +{ + return crypto_register_shashes(algs, ARRAY_SIZE(algs)); +} + +static void __exit streebog_mod_fini(void) +{ + crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); +} + +module_init(streebog_mod_init); +module_exit(streebog_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Vitaly Chikunov "); +MODULE_DESCRIPTION("Streebog Hash Function"); + +MODULE_ALIAS_CRYPTO("streebog256"); +MODULE_ALIAS_CRYPTO("streebog256-generic"); +MODULE_ALIAS_CRYPTO("streebog512"); +MODULE_ALIAS_CRYPTO("streebog512-generic"); diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index c20c9f5c18f2..e7fb87e114a5 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -76,10 +76,12 @@ static char *check[] = { "cast6", "arc4", "michael_mic", "deflate", "crc32c", "tea", "xtea", "khazad", "wp512", "wp384", "wp256", "tnepres", "xeta", "fcrypt", "camellia", "seed", "salsa20", "rmd128", "rmd160", "rmd256", "rmd320", - "lzo", "cts", "sha3-224", "sha3-256", "sha3-384", "sha3-512", NULL + "lzo", "cts", "sha3-224", "sha3-256", "sha3-384", "sha3-512", + "streebog256", "streebog512", + NULL }; -static u32 block_sizes[] = { 16, 64, 256, 1024, 8192, 0 }; +static u32 block_sizes[] = { 16, 64, 256, 1024, 1472, 8192, 0 }; static u32 aead_sizes[] = { 16, 64, 256, 512, 1024, 2048, 4096, 8192, 0 }; #define XBUFSIZE 8 @@ -1736,6 +1738,7 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) ret += tcrypt_test("ctr(aes)"); ret += tcrypt_test("rfc3686(ctr(aes))"); ret += tcrypt_test("ofb(aes)"); + ret += tcrypt_test("cfb(aes)"); break; case 11: @@ -1913,6 +1916,14 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) ret += tcrypt_test("sm3"); break; + case 53: + ret += tcrypt_test("streebog256"); + break; + + case 54: + ret += tcrypt_test("streebog512"); + break; + case 100: ret += tcrypt_test("hmac(md5)"); break; @@ -1969,6 +1980,14 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) ret += tcrypt_test("hmac(sha3-512)"); break; + case 115: + ret += tcrypt_test("hmac(streebog256)"); + break; + + case 116: + ret += tcrypt_test("hmac(streebog512)"); + break; + case 150: ret += tcrypt_test("ansi_cprng"); break; @@ -2060,6 +2079,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) speed_template_16_24_32); test_cipher_speed("ctr(aes)", DECRYPT, sec, NULL, 0, speed_template_16_24_32); + test_cipher_speed("cfb(aes)", ENCRYPT, sec, NULL, 0, + speed_template_16_24_32); + test_cipher_speed("cfb(aes)", DECRYPT, sec, NULL, 0, + speed_template_16_24_32); break; case 201: @@ -2297,6 +2320,18 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) test_cipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0, speed_template_16); break; + + case 219: + test_cipher_speed("adiantum(xchacha12,aes)", ENCRYPT, sec, NULL, + 0, speed_template_32); + test_cipher_speed("adiantum(xchacha12,aes)", DECRYPT, sec, NULL, + 0, speed_template_32); + test_cipher_speed("adiantum(xchacha20,aes)", ENCRYPT, sec, NULL, + 0, speed_template_32); + test_cipher_speed("adiantum(xchacha20,aes)", DECRYPT, sec, NULL, + 0, speed_template_32); + break; + case 300: if (alg) { test_hash_speed(alg, sec, generic_hash_speed_template); @@ -2407,6 +2442,16 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) test_hash_speed("sm3", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; /* fall through */ + case 327: + test_hash_speed("streebog256", sec, + generic_hash_speed_template); + if (mode > 300 && mode < 400) break; + /* fall through */ + case 328: + test_hash_speed("streebog512", sec, + generic_hash_speed_template); + if (mode > 300 && mode < 400) break; + /* fall through */ case 399: break; @@ -2520,6 +2565,16 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) num_mb); if (mode > 400 && mode < 500) break; /* fall through */ + case 426: + test_mb_ahash_speed("streebog256", sec, + generic_hash_speed_template, num_mb); + if (mode > 400 && mode < 500) break; + /* fall through */ + case 427: + test_mb_ahash_speed("streebog512", sec, + generic_hash_speed_template, num_mb); + if (mode > 400 && mode < 500) break; + /* fall through */ case 499: break; diff --git a/crypto/testmgr.c b/crypto/testmgr.c index b1f79c6bf409..0f684a414acb 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -2404,6 +2404,18 @@ static int alg_test_null(const struct alg_test_desc *desc, /* Please keep this list sorted by algorithm name. */ static const struct alg_test_desc alg_test_descs[] = { { + .alg = "adiantum(xchacha12,aes)", + .test = alg_test_skcipher, + .suite = { + .cipher = __VECS(adiantum_xchacha12_aes_tv_template) + }, + }, { + .alg = "adiantum(xchacha20,aes)", + .test = alg_test_skcipher, + .suite = { + .cipher = __VECS(adiantum_xchacha20_aes_tv_template) + }, + }, { .alg = "aegis128", .test = alg_test_aead, .suite = { @@ -2690,6 +2702,13 @@ static const struct alg_test_desc alg_test_descs[] = { .dec = __VECS(aes_ccm_dec_tv_template) } } + }, { + .alg = "cfb(aes)", + .test = alg_test_skcipher, + .fips_allowed = 1, + .suite = { + .cipher = __VECS(aes_cfb_tv_template) + }, }, { .alg = "chacha20", .test = alg_test_skcipher, @@ -2805,6 +2824,7 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "cts(cbc(aes))", .test = alg_test_skcipher, + .fips_allowed = 1, .suite = { .cipher = __VECS(cts_mode_tv_template) } @@ -3184,6 +3204,18 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .hash = __VECS(hmac_sha512_tv_template) } + }, { + .alg = "hmac(streebog256)", + .test = alg_test_hash, + .suite = { + .hash = __VECS(hmac_streebog256_tv_template) + } + }, { + .alg = "hmac(streebog512)", + .test = alg_test_hash, + .suite = { + .hash = __VECS(hmac_streebog512_tv_template) + } }, { .alg = "jitterentropy_rng", .fips_allowed = 1, @@ -3291,6 +3323,12 @@ static const struct alg_test_desc alg_test_descs[] = { .dec = __VECS(morus640_dec_tv_template), } } + }, { + .alg = "nhpoly1305", + .test = alg_test_hash, + .suite = { + .hash = __VECS(nhpoly1305_tv_template) + } }, { .alg = "ofb(aes)", .test = alg_test_skcipher, @@ -3496,6 +3534,18 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .hash = __VECS(sm3_tv_template) } + }, { + .alg = "streebog256", + .test = alg_test_hash, + .suite = { + .hash = __VECS(streebog256_tv_template) + } + }, { + .alg = "streebog512", + .test = alg_test_hash, + .suite = { + .hash = __VECS(streebog512_tv_template) + } }, { .alg = "tgr128", .test = alg_test_hash, @@ -3544,6 +3594,18 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .hash = __VECS(aes_xcbc128_tv_template) } + }, { + .alg = "xchacha12", + .test = alg_test_skcipher, + .suite = { + .cipher = __VECS(xchacha12_tv_template) + }, + }, { + .alg = "xchacha20", + .test = alg_test_skcipher, + .suite = { + .cipher = __VECS(xchacha20_tv_template) + }, }, { .alg = "xts(aes)", .test = alg_test_skcipher, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 1fe7b97ba03f..e8f47d7b92cd 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -27,7 +27,7 @@ #define MAX_DIGEST_SIZE 64 #define MAX_TAP 8 -#define MAX_KEYLEN 160 +#define MAX_KEYLEN 1088 #define MAX_IVLEN 32 struct hash_testvec { @@ -35,10 +35,10 @@ struct hash_testvec { const char *key; const char *plaintext; const char *digest; - unsigned char tap[MAX_TAP]; + unsigned short tap[MAX_TAP]; + unsigned short np; unsigned short psize; - unsigned char np; - unsigned char ksize; + unsigned short ksize; }; /* @@ -2307,6 +2307,122 @@ static const struct hash_testvec crct10dif_tv_template[] = { } }; +/* + * Streebog test vectors from RFC 6986 and GOST R 34.11-2012 + */ +static const struct hash_testvec streebog256_tv_template[] = { + { /* M1 */ + .plaintext = "012345678901234567890123456789012345678901234567890123456789012", + .psize = 63, + .digest = + "\x9d\x15\x1e\xef\xd8\x59\x0b\x89" + "\xda\xa6\xba\x6c\xb7\x4a\xf9\x27" + "\x5d\xd0\x51\x02\x6b\xb1\x49\xa4" + "\x52\xfd\x84\xe5\xe5\x7b\x55\x00", + }, + { /* M2 */ + .plaintext = + "\xd1\xe5\x20\xe2\xe5\xf2\xf0\xe8" + "\x2c\x20\xd1\xf2\xf0\xe8\xe1\xee" + "\xe6\xe8\x20\xe2\xed\xf3\xf6\xe8" + "\x2c\x20\xe2\xe5\xfe\xf2\xfa\x20" + "\xf1\x20\xec\xee\xf0\xff\x20\xf1" + "\xf2\xf0\xe5\xeb\xe0\xec\xe8\x20" + "\xed\xe0\x20\xf5\xf0\xe0\xe1\xf0" + "\xfb\xff\x20\xef\xeb\xfa\xea\xfb" + "\x20\xc8\xe3\xee\xf0\xe5\xe2\xfb", + .psize = 72, + .digest = + "\x9d\xd2\xfe\x4e\x90\x40\x9e\x5d" + "\xa8\x7f\x53\x97\x6d\x74\x05\xb0" + "\xc0\xca\xc6\x28\xfc\x66\x9a\x74" + "\x1d\x50\x06\x3c\x55\x7e\x8f\x50", + }, +}; + +static const struct hash_testvec streebog512_tv_template[] = { + { /* M1 */ + .plaintext = "012345678901234567890123456789012345678901234567890123456789012", + .psize = 63, + .digest = + "\x1b\x54\xd0\x1a\x4a\xf5\xb9\xd5" + "\xcc\x3d\x86\xd6\x8d\x28\x54\x62" + "\xb1\x9a\xbc\x24\x75\x22\x2f\x35" + "\xc0\x85\x12\x2b\xe4\xba\x1f\xfa" + "\x00\xad\x30\xf8\x76\x7b\x3a\x82" + "\x38\x4c\x65\x74\xf0\x24\xc3\x11" + "\xe2\xa4\x81\x33\x2b\x08\xef\x7f" + "\x41\x79\x78\x91\xc1\x64\x6f\x48", + }, + { /* M2 */ + .plaintext = + "\xd1\xe5\x20\xe2\xe5\xf2\xf0\xe8" + "\x2c\x20\xd1\xf2\xf0\xe8\xe1\xee" + "\xe6\xe8\x20\xe2\xed\xf3\xf6\xe8" + "\x2c\x20\xe2\xe5\xfe\xf2\xfa\x20" + "\xf1\x20\xec\xee\xf0\xff\x20\xf1" + "\xf2\xf0\xe5\xeb\xe0\xec\xe8\x20" + "\xed\xe0\x20\xf5\xf0\xe0\xe1\xf0" + "\xfb\xff\x20\xef\xeb\xfa\xea\xfb" + "\x20\xc8\xe3\xee\xf0\xe5\xe2\xfb", + .psize = 72, + .digest = + "\x1e\x88\xe6\x22\x26\xbf\xca\x6f" + "\x99\x94\xf1\xf2\xd5\x15\x69\xe0" + "\xda\xf8\x47\x5a\x3b\x0f\xe6\x1a" + "\x53\x00\xee\xe4\x6d\x96\x13\x76" + "\x03\x5f\xe8\x35\x49\xad\xa2\xb8" + "\x62\x0f\xcd\x7c\x49\x6c\xe5\xb3" + "\x3f\x0c\xb9\xdd\xdc\x2b\x64\x60" + "\x14\x3b\x03\xda\xba\xc9\xfb\x28", + }, +}; + +/* + * Two HMAC-Streebog test vectors from RFC 7836 and R 50.1.113-2016 A + */ +static const struct hash_testvec hmac_streebog256_tv_template[] = { + { + .key = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17" + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + .ksize = 32, + .plaintext = + "\x01\x26\xbd\xb8\x78\x00\xaf\x21" + "\x43\x41\x45\x65\x63\x78\x01\x00", + .psize = 16, + .digest = + "\xa1\xaa\x5f\x7d\xe4\x02\xd7\xb3" + "\xd3\x23\xf2\x99\x1c\x8d\x45\x34" + "\x01\x31\x37\x01\x0a\x83\x75\x4f" + "\xd0\xaf\x6d\x7c\xd4\x92\x2e\xd9", + }, +}; + +static const struct hash_testvec hmac_streebog512_tv_template[] = { + { + .key = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17" + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + .ksize = 32, + .plaintext = + "\x01\x26\xbd\xb8\x78\x00\xaf\x21" + "\x43\x41\x45\x65\x63\x78\x01\x00", + .psize = 16, + .digest = + "\xa5\x9b\xab\x22\xec\xae\x19\xc6" + "\x5f\xbd\xe6\xe5\xf4\xe9\xf5\xd8" + "\x54\x9d\x31\xf0\x37\xf9\xdf\x9b" + "\x90\x55\x00\xe1\x71\x92\x3a\x77" + "\x3d\x5f\x15\x30\xf2\xed\x7e\x96" + "\x4c\xb2\xee\xdc\x29\xe9\xad\x2f" + "\x3a\xfe\x93\xb2\x81\x4f\x79\xf5" + "\x00\x0f\xfc\x03\x66\xc2\x51\xe6", + }, +}; + /* Example vectors below taken from * http://www.oscca.gov.cn/UpFile/20101222141857786.pdf * @@ -5593,6 +5709,1238 @@ static const struct hash_testvec poly1305_tv_template[] = { }, }; +/* NHPoly1305 test vectors from https://github.com/google/adiantum */ +static const struct hash_testvec nhpoly1305_tv_template[] = { + { + .key = "\xd2\x5d\x4c\xdd\x8d\x2b\x7f\x7a" + "\xd9\xbe\x71\xec\xd1\x83\x52\xe3" + "\xe1\xad\xd7\x5c\x0a\x75\x9d\xec" + "\x1d\x13\x7e\x5d\x71\x07\xc9\xe4" + "\x57\x2d\x44\x68\xcf\xd8\xd6\xc5" + "\x39\x69\x7d\x32\x75\x51\x4f\x7e" + "\xb2\x4c\xc6\x90\x51\x6e\xd9\xd6" + "\xa5\x8b\x2d\xf1\x94\xf9\xf7\x5e" + "\x2c\x84\x7b\x41\x0f\x88\x50\x89" + "\x30\xd9\xa1\x38\x46\x6c\xc0\x4f" + "\xe8\xdf\xdc\x66\xab\x24\x43\x41" + "\x91\x55\x29\x65\x86\x28\x5e\x45" + "\xd5\x2d\xb7\x80\x08\x9a\xc3\xd4" + "\x9a\x77\x0a\xd4\xef\x3e\xe6\x3f" + "\x6f\x2f\x9b\x3a\x7d\x12\x1e\x80" + "\x6c\x44\xa2\x25\xe1\xf6\x60\xe9" + "\x0d\xaf\xc5\x3c\xa5\x79\xae\x64" + "\xbc\xa0\x39\xa3\x4d\x10\xe5\x4d" + "\xd5\xe7\x89\x7a\x13\xee\x06\x78" + "\xdc\xa4\xdc\x14\x27\xe6\x49\x38" + "\xd0\xe0\x45\x25\x36\xc5\xf4\x79" + "\x2e\x9a\x98\x04\xe4\x2b\x46\x52" + "\x7c\x33\xca\xe2\x56\x51\x50\xe2" + "\xa5\x9a\xae\x18\x6a\x13\xf8\xd2" + "\x21\x31\x66\x02\xe2\xda\x8d\x7e" + "\x41\x19\xb2\x61\xee\x48\x8f\xf1" + "\x65\x24\x2e\x1e\x68\xce\x05\xd9" + "\x2a\xcf\xa5\x3a\x57\xdd\x35\x91" + "\x93\x01\xca\x95\xfc\x2b\x36\x04" + "\xe6\x96\x97\x28\xf6\x31\xfe\xa3" + "\x9d\xf6\x6a\x1e\x80\x8d\xdc\xec" + "\xaf\x66\x11\x13\x02\x88\xd5\x27" + "\x33\xb4\x1a\xcd\xa3\xf6\xde\x31" + "\x8e\xc0\x0e\x6c\xd8\x5a\x97\x5e" + "\xdd\xfd\x60\x69\x38\x46\x3f\x90" + "\x5e\x97\xd3\x32\x76\xc7\x82\x49" + "\xfe\xba\x06\x5f\x2f\xa2\xfd\xff" + "\x80\x05\x40\xe4\x33\x03\xfb\x10" + "\xc0\xde\x65\x8c\xc9\x8d\x3a\x9d" + "\xb5\x7b\x36\x4b\xb5\x0c\xcf\x00" + "\x9c\x87\xe4\x49\xad\x90\xda\x4a" + "\xdd\xbd\xff\xe2\x32\x57\xd6\x78" + "\x36\x39\x6c\xd3\x5b\x9b\x88\x59" + "\x2d\xf0\x46\xe4\x13\x0e\x2b\x35" + "\x0d\x0f\x73\x8a\x4f\x26\x84\x75" + "\x88\x3c\xc5\x58\x66\x18\x1a\xb4" + "\x64\x51\x34\x27\x1b\xa4\x11\xc9" + "\x6d\x91\x8a\xfa\x32\x60\x9d\xd7" + "\x87\xe5\xaa\x43\x72\xf8\xda\xd1" + "\x48\x44\x13\x61\xdc\x8c\x76\x17" + "\x0c\x85\x4e\xf3\xdd\xa2\x42\xd2" + "\x74\xc1\x30\x1b\xeb\x35\x31\x29" + "\x5b\xd7\x4c\x94\x46\x35\xa1\x23" + "\x50\xf2\xa2\x8e\x7e\x4f\x23\x4f" + "\x51\xff\xe2\xc9\xa3\x7d\x56\x8b" + "\x41\xf2\xd0\xc5\x57\x7e\x59\xac" + "\xbb\x65\xf3\xfe\xf7\x17\xef\x63" + "\x7c\x6f\x23\xdd\x22\x8e\xed\x84" + "\x0e\x3b\x09\xb3\xf3\xf4\x8f\xcd" + "\x37\xa8\xe1\xa7\x30\xdb\xb1\xa2" + "\x9c\xa2\xdf\x34\x17\x3e\x68\x44" + "\xd0\xde\x03\x50\xd1\x48\x6b\x20" + "\xe2\x63\x45\xa5\xea\x87\xc2\x42" + "\x95\x03\x49\x05\xed\xe0\x90\x29" + "\x1a\xb8\xcf\x9b\x43\xcf\x29\x7a" + "\x63\x17\x41\x9f\xe0\xc9\x10\xfd" + "\x2c\x56\x8c\x08\x55\xb4\xa9\x27" + "\x0f\x23\xb1\x05\x6a\x12\x46\xc7" + "\xe1\xfe\x28\x93\x93\xd7\x2f\xdc" + "\x98\x30\xdb\x75\x8a\xbe\x97\x7a" + "\x02\xfb\x8c\xba\xbe\x25\x09\xbe" + "\xce\xcb\xa2\xef\x79\x4d\x0e\x9d" + "\x1b\x9d\xb6\x39\x34\x38\xfa\x07" + "\xec\xe8\xfc\x32\x85\x1d\xf7\x85" + "\x63\xc3\x3c\xc0\x02\x75\xd7\x3f" + "\xb2\x68\x60\x66\x65\x81\xc6\xb1" + "\x42\x65\x4b\x4b\x28\xd7\xc7\xaa" + "\x9b\xd2\xdc\x1b\x01\xe0\x26\x39" + "\x01\xc1\x52\x14\xd1\x3f\xb7\xe6" + "\x61\x41\xc7\x93\xd2\xa2\x67\xc6" + "\xf7\x11\xb5\xf5\xea\xdd\x19\xfb" + "\x4d\x21\x12\xd6\x7d\xf1\x10\xb0" + "\x89\x07\xc7\x5a\x52\x73\x70\x2f" + "\x32\xef\x65\x2b\x12\xb2\xf0\xf5" + "\x20\xe0\x90\x59\x7e\x64\xf1\x4c" + "\x41\xb3\xa5\x91\x08\xe6\x5e\x5f" + "\x05\x56\x76\xb4\xb0\xcd\x70\x53" + "\x10\x48\x9c\xff\xc2\x69\x55\x24" + "\x87\xef\x84\xea\xfb\xa7\xbf\xa0" + "\x91\x04\xad\x4f\x8b\x57\x54\x4b" + "\xb6\xe9\xd1\xac\x37\x2f\x1d\x2e" + "\xab\xa5\xa4\xe8\xff\xfb\xd9\x39" + "\x2f\xb7\xac\xd1\xfe\x0b\x9a\x80" + "\x0f\xb6\xf4\x36\x39\x90\x51\xe3" + "\x0a\x2f\xb6\x45\x76\x89\xcd\x61" + "\xfe\x48\x5f\x75\x1d\x13\x00\x62" + "\x80\x24\x47\xe7\xbc\x37\xd7\xe3" + "\x15\xe8\x68\x22\xaf\x80\x6f\x4b" + "\xa8\x9f\x01\x10\x48\x14\xc3\x02" + "\x52\xd2\xc7\x75\x9b\x52\x6d\x30" + "\xac\x13\x85\xc8\xf7\xa3\x58\x4b" + "\x49\xf7\x1c\x45\x55\x8c\x39\x9a" + "\x99\x6d\x97\x27\x27\xe6\xab\xdd" + "\x2c\x42\x1b\x35\xdd\x9d\x73\xbb" + "\x6c\xf3\x64\xf1\xfb\xb9\xf7\xe6" + "\x4a\x3c\xc0\x92\xc0\x2e\xb7\x1a" + "\xbe\xab\xb3\x5a\xe5\xea\xb1\x48" + "\x58\x13\x53\x90\xfd\xc3\x8e\x54" + "\xf9\x18\x16\x73\xe8\xcb\x6d\x39" + "\x0e\xd7\xe0\xfe\xb6\x9f\x43\x97" + "\xe8\xd0\x85\x56\x83\x3e\x98\x68" + "\x7f\xbd\x95\xa8\x9a\x61\x21\x8f" + "\x06\x98\x34\xa6\xc8\xd6\x1d\xf3" + "\x3d\x43\xa4\x9a\x8c\xe5\xd3\x5a" + "\x32\xa2\x04\x22\xa4\x19\x1a\x46" + "\x42\x7e\x4d\xe5\xe0\xe6\x0e\xca" + "\xd5\x58\x9d\x2c\xaf\xda\x33\x5c" + "\xb0\x79\x9e\xc9\xfc\xca\xf0\x2f" + "\xa8\xb2\x77\xeb\x7a\xa2\xdd\x37" + "\x35\x83\x07\xd6\x02\x1a\xb6\x6c" + "\x24\xe2\x59\x08\x0e\xfd\x3e\x46" + "\xec\x40\x93\xf4\x00\x26\x4f\x2a" + "\xff\x47\x2f\xeb\x02\x92\x26\x5b" + "\x53\x17\xc2\x8d\x2a\xc7\xa3\x1b" + "\xcd\xbc\xa7\xe8\xd1\x76\xe3\x80" + "\x21\xca\x5d\x3b\xe4\x9c\x8f\xa9" + "\x5b\x7f\x29\x7f\x7c\xd8\xed\x6d" + "\x8c\xb2\x86\x85\xe7\x77\xf2\x85" + "\xab\x38\xa9\x9d\xc1\x4e\xc5\x64" + "\x33\x73\x8b\x59\x03\xad\x05\xdf" + "\x25\x98\x31\xde\xef\x13\xf1\x9b" + "\x3c\x91\x9d\x7b\xb1\xfa\xe6\xbf" + "\x5b\xed\xa5\x55\xe6\xea\x6c\x74" + "\xf4\xb9\xe4\x45\x64\x72\x81\xc2" + "\x4c\x28\xd4\xcd\xac\xe2\xde\xf9" + "\xeb\x5c\xeb\x61\x60\x5a\xe5\x28", + .ksize = 1088, + .plaintext = "", + .psize = 0, + .digest = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + }, { + .key = "\x29\x21\x43\xcb\xcb\x13\x07\xde" + "\xbf\x48\xdf\x8a\x7f\xa2\x84\xde" + "\x72\x23\x9d\xf5\xf0\x07\xf2\x4c" + "\x20\x3a\x93\xb9\xcd\x5d\xfe\xcb" + "\x99\x2c\x2b\x58\xc6\x50\x5f\x94" + "\x56\xc3\x7c\x0d\x02\x3f\xb8\x5e" + "\x7b\xc0\x6c\x51\x34\x76\xc0\x0e" + "\xc6\x22\xc8\x9e\x92\xa0\x21\xc9" + "\x85\x5c\x7c\xf8\xe2\x64\x47\xc9" + "\xe4\xa2\x57\x93\xf8\xa2\x69\xcd" + "\x62\x98\x99\xf4\xd7\x7b\x14\xb1" + "\xd8\x05\xff\x04\x15\xc9\xe1\x6e" + "\x9b\xe6\x50\x6b\x0b\x3f\x22\x1f" + "\x08\xde\x0c\x5b\x08\x7e\xc6\x2f" + "\x6c\xed\xd6\xb2\x15\xa4\xb3\xf9" + "\xa7\x46\x38\x2a\xea\x69\xa5\xde" + "\x02\xc3\x96\x89\x4d\x55\x3b\xed" + "\x3d\x3a\x85\x77\xbf\x97\x45\x5c" + "\x9e\x02\x69\xe2\x1b\x68\xbe\x96" + "\xfb\x64\x6f\x0f\xf6\x06\x40\x67" + "\xfa\x04\xe3\x55\xfa\xbe\xa4\x60" + "\xef\x21\x66\x97\xe6\x9d\x5c\x1f" + "\x62\x37\xaa\x31\xde\xe4\x9c\x28" + "\x95\xe0\x22\x86\xf4\x4d\xf3\x07" + "\xfd\x5f\x3a\x54\x2c\x51\x80\x71" + "\xba\x78\x69\x5b\x65\xab\x1f\x81" + "\xed\x3b\xff\x34\xa3\xfb\xbc\x73" + "\x66\x7d\x13\x7f\xdf\x6e\xe2\xe2" + "\xeb\x4f\x6c\xda\x7d\x33\x57\xd0" + "\xd3\x7c\x95\x4f\x33\x58\x21\xc7" + "\xc0\xe5\x6f\x42\x26\xc6\x1f\x5e" + "\x85\x1b\x98\x9a\xa2\x1e\x55\x77" + "\x23\xdf\x81\x5e\x79\x55\x05\xfc" + "\xfb\xda\xee\xba\x5a\xba\xf7\x77" + "\x7f\x0e\xd3\xe1\x37\xfe\x8d\x2b" + "\xd5\x3f\xfb\xd0\xc0\x3c\x0b\x3f" + "\xcf\x3c\x14\xcf\xfb\x46\x72\x4c" + "\x1f\x39\xe2\xda\x03\x71\x6d\x23" + "\xef\x93\xcd\x39\xd9\x37\x80\x4d" + "\x65\x61\xd1\x2c\x03\xa9\x47\x72" + "\x4d\x1e\x0e\x16\x33\x0f\x21\x17" + "\xec\x92\xea\x6f\x37\x22\xa4\xd8" + "\x03\x33\x9e\xd8\x03\x69\x9a\xe8" + "\xb2\x57\xaf\x78\x99\x05\x12\xab" + "\x48\x90\x80\xf0\x12\x9b\x20\x64" + "\x7a\x1d\x47\x5f\xba\x3c\xf9\xc3" + "\x0a\x0d\x8d\xa1\xf9\x1b\x82\x13" + "\x3e\x0d\xec\x0a\x83\xc0\x65\xe1" + "\xe9\x95\xff\x97\xd6\xf2\xe4\xd5" + "\x86\xc0\x1f\x29\x27\x63\xd7\xde" + "\xb7\x0a\x07\x99\x04\x2d\xa3\x89" + "\xa2\x43\xcf\xf3\xe1\x43\xac\x4a" + "\x06\x97\xd0\x05\x4f\x87\xfa\xf9" + "\x9b\xbf\x52\x70\xbd\xbc\x6c\xf3" + "\x03\x13\x60\x41\x28\x09\xec\xcc" + "\xb1\x1a\xec\xd6\xfb\x6f\x2a\x89" + "\x5d\x0b\x53\x9c\x59\xc1\x84\x21" + "\x33\x51\x47\x19\x31\x9c\xd4\x0a" + "\x4d\x04\xec\x50\x90\x61\xbd\xbc" + "\x7e\xc8\xd9\x6c\x98\x1d\x45\x41" + "\x17\x5e\x97\x1c\xc5\xa8\xe8\xea" + "\x46\x58\x53\xf7\x17\xd5\xad\x11" + "\xc8\x54\xf5\x7a\x33\x90\xf5\x19" + "\xba\x36\xb4\xfc\x52\xa5\x72\x3d" + "\x14\xbb\x55\xa7\xe9\xe3\x12\xf7" + "\x1c\x30\xa2\x82\x03\xbf\x53\x91" + "\x2e\x60\x41\x9f\x5b\x69\x39\xf6" + "\x4d\xc8\xf8\x46\x7a\x7f\xa4\x98" + "\x36\xff\x06\xcb\xca\xe7\x33\xf2" + "\xc0\x4a\xf4\x3c\x14\x44\x5f\x6b" + "\x75\xef\x02\x36\x75\x08\x14\xfd" + "\x10\x8e\xa5\x58\xd0\x30\x46\x49" + "\xaf\x3a\xf8\x40\x3d\x35\xdb\x84" + "\x11\x2e\x97\x6a\xb7\x87\x7f\xad" + "\xf1\xfa\xa5\x63\x60\xd8\x5e\xbf" + "\x41\x78\x49\xcf\x77\xbb\x56\xbb" + "\x7d\x01\x67\x05\x22\xc8\x8f\x41" + "\xba\x81\xd2\xca\x2c\x38\xac\x76" + "\x06\xc1\x1a\xc2\xce\xac\x90\x67" + "\x57\x3e\x20\x12\x5b\xd9\x97\x58" + "\x65\x05\xb7\x04\x61\x7e\xd8\x3a" + "\xbf\x55\x3b\x13\xe9\x34\x5a\x37" + "\x36\xcb\x94\x45\xc5\x32\xb3\xa0" + "\x0c\x3e\x49\xc5\xd3\xed\xa7\xf0" + "\x1c\x69\xcc\xea\xcc\x83\xc9\x16" + "\x95\x72\x4b\xf4\x89\xd5\xb9\x10" + "\xf6\x2d\x60\x15\xea\x3c\x06\x66" + "\x9f\x82\xad\x17\xce\xd2\xa4\x48" + "\x7c\x65\xd9\xf8\x02\x4d\x9b\x4c" + "\x89\x06\x3a\x34\x85\x48\x89\x86" + "\xf9\x24\xa9\x54\x72\xdb\x44\x95" + "\xc7\x44\x1c\x19\x11\x4c\x04\xdc" + "\x13\xb9\x67\xc8\xc3\x3a\x6a\x50" + "\xfa\xd1\xfb\xe1\x88\xb6\xf1\xa3" + "\xc5\x3b\xdc\x38\x45\x16\x26\x02" + "\x3b\xb8\x8f\x8b\x58\x7d\x23\x04" + "\x50\x6b\x81\x9f\xae\x66\xac\x6f" + "\xcf\x2a\x9d\xf1\xfd\x1d\x57\x07" + "\xbe\x58\xeb\x77\x0c\xe3\xc2\x19" + "\x14\x74\x1b\x51\x1c\x4f\x41\xf3" + "\x32\x89\xb3\xe7\xde\x62\xf6\x5f" + "\xc7\x6a\x4a\x2a\x5b\x0f\x5f\x87" + "\x9c\x08\xb9\x02\x88\xc8\x29\xb7" + "\x94\x52\xfa\x52\xfe\xaa\x50\x10" + "\xba\x48\x75\x5e\x11\x1b\xe6\x39" + "\xd7\x82\x2c\x87\xf1\x1e\xa4\x38" + "\x72\x3e\x51\xe7\xd8\x3e\x5b\x7b" + "\x31\x16\x89\xba\xd6\xad\x18\x5e" + "\xba\xf8\x12\xb3\xf4\x6c\x47\x30" + "\xc0\x38\x58\xb3\x10\x8d\x58\x5d" + "\xb4\xfb\x19\x7e\x41\xc3\x66\xb8" + "\xd6\x72\x84\xe1\x1a\xc2\x71\x4c" + "\x0d\x4a\x21\x7a\xab\xa2\xc0\x36" + "\x15\xc5\xe9\x46\xd7\x29\x17\x76" + "\x5e\x47\x36\x7f\x72\x05\xa7\xcc" + "\x36\x63\xf9\x47\x7d\xe6\x07\x3c" + "\x8b\x79\x1d\x96\x61\x8d\x90\x65" + "\x7c\xf5\xeb\x4e\x6e\x09\x59\x6d" + "\x62\x50\x1b\x0f\xe0\xdc\x78\xf2" + "\x5b\x83\x1a\xa1\x11\x75\xfd\x18" + "\xd7\xe2\x8d\x65\x14\x21\xce\xbe" + "\xb5\x87\xe3\x0a\xda\x24\x0a\x64" + "\xa9\x9f\x03\x8d\x46\x5d\x24\x1a" + "\x8a\x0c\x42\x01\xca\xb1\x5f\x7c" + "\xa5\xac\x32\x4a\xb8\x07\x91\x18" + "\x6f\xb0\x71\x3c\xc9\xb1\xa8\xf8" + "\x5f\x69\xa5\xa1\xca\x9e\x7a\xaa" + "\xac\xe9\xc7\x47\x41\x75\x25\xc3" + "\x73\xe2\x0b\xdd\x6d\x52\x71\xbe" + "\xc5\xdc\xb4\xe7\x01\x26\x53\x77" + "\x86\x90\x85\x68\x6b\x7b\x03\x53" + "\xda\x52\x52\x51\x68\xc8\xf3\xec" + "\x6c\xd5\x03\x7a\xa3\x0e\xb4\x02" + "\x5f\x1a\xab\xee\xca\x67\x29\x7b" + "\xbd\x96\x59\xb3\x8b\x32\x7a\x92" + "\x9f\xd8\x25\x2b\xdf\xc0\x4c\xda", + .ksize = 1088, + .plaintext = "\xbc\xda\x81\xa8\x78\x79\x1c\xbf" + "\x77\x53\xba\x4c\x30\x5b\xb8\x33", + .psize = 16, + .digest = "\x04\xbf\x7f\x6a\xce\x72\xea\x6a" + "\x79\xdb\xb0\xc9\x60\xf6\x12\xcc", + .np = 6, + .tap = { 4, 4, 1, 1, 1, 5 }, + }, { + .key = "\x65\x4d\xe3\xf8\xd2\x4c\xac\x28" + "\x68\xf5\xb3\x81\x71\x4b\xa1\xfa" + "\x04\x0e\xd3\x81\x36\xbe\x0c\x81" + "\x5e\xaf\xbc\x3a\xa4\xc0\x8e\x8b" + "\x55\x63\xd3\x52\x97\x88\xd6\x19" + "\xbc\x96\xdf\x49\xff\x04\x63\xf5" + "\x0c\x11\x13\xaa\x9e\x1f\x5a\xf7" + "\xdd\xbd\x37\x80\xc3\xd0\xbe\xa7" + "\x05\xc8\x3c\x98\x1e\x05\x3c\x84" + "\x39\x61\xc4\xed\xed\x71\x1b\xc4" + "\x74\x45\x2c\xa1\x56\x70\x97\xfd" + "\x44\x18\x07\x7d\xca\x60\x1f\x73" + "\x3b\x6d\x21\xcb\x61\x87\x70\x25" + "\x46\x21\xf1\x1f\x21\x91\x31\x2d" + "\x5d\xcc\xb7\xd1\x84\x3e\x3d\xdb" + "\x03\x53\x2a\x82\xa6\x9a\x95\xbc" + "\x1a\x1e\x0a\x5e\x07\x43\xab\x43" + "\xaf\x92\x82\x06\x91\x04\x09\xf4" + "\x17\x0a\x9a\x2c\x54\xdb\xb8\xf4" + "\xd0\xf0\x10\x66\x24\x8d\xcd\xda" + "\xfe\x0e\x45\x9d\x6f\xc4\x4e\xf4" + "\x96\xaf\x13\xdc\xa9\xd4\x8c\xc4" + "\xc8\x57\x39\x3c\xc2\xd3\x0a\x76" + "\x4a\x1f\x75\x83\x44\xc7\xd1\x39" + "\xd8\xb5\x41\xba\x73\x87\xfa\x96" + "\xc7\x18\x53\xfb\x9b\xda\xa0\x97" + "\x1d\xee\x60\x85\x9e\x14\xc3\xce" + "\xc4\x05\x29\x3b\x95\x30\xa3\xd1" + "\x9f\x82\x6a\x04\xf5\xa7\x75\x57" + "\x82\x04\xfe\x71\x51\x71\xb1\x49" + "\x50\xf8\xe0\x96\xf1\xfa\xa8\x88" + "\x3f\xa0\x86\x20\xd4\x60\x79\x59" + "\x17\x2d\xd1\x09\xf4\xec\x05\x57" + "\xcf\x62\x7e\x0e\x7e\x60\x78\xe6" + "\x08\x60\x29\xd8\xd5\x08\x1a\x24" + "\xc4\x6c\x24\xe7\x92\x08\x3d\x8a" + "\x98\x7a\xcf\x99\x0a\x65\x0e\xdc" + "\x8c\x8a\xbe\x92\x82\x91\xcc\x62" + "\x30\xb6\xf4\x3f\xc6\x8a\x7f\x12" + "\x4a\x8a\x49\xfa\x3f\x5c\xd4\x5a" + "\xa6\x82\xa3\xe6\xaa\x34\x76\xb2" + "\xab\x0a\x30\xef\x6c\x77\x58\x3f" + "\x05\x6b\xcc\x5c\xae\xdc\xd7\xb9" + "\x51\x7e\x8d\x32\x5b\x24\x25\xbe" + "\x2b\x24\x01\xcf\x80\xda\x16\xd8" + "\x90\x72\x2c\xad\x34\x8d\x0c\x74" + "\x02\xcb\xfd\xcf\x6e\xef\x97\xb5" + "\x4c\xf2\x68\xca\xde\x43\x9e\x8a" + "\xc5\x5f\x31\x7f\x14\x71\x38\xec" + "\xbd\x98\xe5\x71\xc4\xb5\xdb\xef" + "\x59\xd2\xca\xc0\xc1\x86\x75\x01" + "\xd4\x15\x0d\x6f\xa4\xf7\x7b\x37" + "\x47\xda\x18\x93\x63\xda\xbe\x9e" + "\x07\xfb\xb2\x83\xd5\xc4\x34\x55" + "\xee\x73\xa1\x42\x96\xf9\x66\x41" + "\xa4\xcc\xd2\x93\x6e\xe1\x0a\xbb" + "\xd2\xdd\x18\x23\xe6\x6b\x98\x0b" + "\x8a\x83\x59\x2c\xc3\xa6\x59\x5b" + "\x01\x22\x59\xf7\xdc\xb0\x87\x7e" + "\xdb\x7d\xf4\x71\x41\xab\xbd\xee" + "\x79\xbe\x3c\x01\x76\x0b\x2d\x0a" + "\x42\xc9\x77\x8c\xbb\x54\x95\x60" + "\x43\x2e\xe0\x17\x52\xbd\x90\xc9" + "\xc2\x2c\xdd\x90\x24\x22\x76\x40" + "\x5c\xb9\x41\xc9\xa1\xd5\xbd\xe3" + "\x44\xe0\xa4\xab\xcc\xb8\xe2\x32" + "\x02\x15\x04\x1f\x8c\xec\x5d\x14" + "\xac\x18\xaa\xef\x6e\x33\x19\x6e" + "\xde\xfe\x19\xdb\xeb\x61\xca\x18" + "\xad\xd8\x3d\xbf\x09\x11\xc7\xa5" + "\x86\x0b\x0f\xe5\x3e\xde\xe8\xd9" + "\x0a\x69\x9e\x4c\x20\xff\xf9\xc5" + "\xfa\xf8\xf3\x7f\xa5\x01\x4b\x5e" + "\x0f\xf0\x3b\x68\xf0\x46\x8c\x2a" + "\x7a\xc1\x8f\xa0\xfe\x6a\x5b\x44" + "\x70\x5c\xcc\x92\x2c\x6f\x0f\xbd" + "\x25\x3e\xb7\x8e\x73\x58\xda\xc9" + "\xa5\xaa\x9e\xf3\x9b\xfd\x37\x3e" + "\xe2\x88\xa4\x7b\xc8\x5c\xa8\x93" + "\x0e\xe7\x9a\x9c\x2e\x95\x18\x9f" + "\xc8\x45\x0c\x88\x9e\x53\x4f\x3a" + "\x76\xc1\x35\xfa\x17\xd8\xac\xa0" + "\x0c\x2d\x47\x2e\x4f\x69\x9b\xf7" + "\xd0\xb6\x96\x0c\x19\xb3\x08\x01" + "\x65\x7a\x1f\xc7\x31\x86\xdb\xc8" + "\xc1\x99\x8f\xf8\x08\x4a\x9d\x23" + "\x22\xa8\xcf\x27\x01\x01\x88\x93" + "\x9c\x86\x45\xbd\xe0\x51\xca\x52" + "\x84\xba\xfe\x03\xf7\xda\xc5\xce" + "\x3e\x77\x75\x86\xaf\x84\xc8\x05" + "\x44\x01\x0f\x02\xf3\x58\xb0\x06" + "\x5a\xd7\x12\x30\x8d\xdf\x1f\x1f" + "\x0a\xe6\xd2\xea\xf6\x3a\x7a\x99" + "\x63\xe8\xd2\xc1\x4a\x45\x8b\x40" + "\x4d\x0a\xa9\x76\x92\xb3\xda\x87" + "\x36\x33\xf0\x78\xc3\x2f\x5f\x02" + "\x1a\x6a\x2c\x32\xcd\x76\xbf\xbd" + "\x5a\x26\x20\x28\x8c\x8c\xbc\x52" + "\x3d\x0a\xc9\xcb\xab\xa4\x21\xb0" + "\x54\x40\x81\x44\xc7\xd6\x1c\x11" + "\x44\xc6\x02\x92\x14\x5a\xbf\x1a" + "\x09\x8a\x18\xad\xcd\x64\x3d\x53" + "\x4a\xb6\xa5\x1b\x57\x0e\xef\xe0" + "\x8c\x44\x5f\x7d\xbd\x6c\xfd\x60" + "\xae\x02\x24\xb6\x99\xdd\x8c\xaf" + "\x59\x39\x75\x3c\xd1\x54\x7b\x86" + "\xcc\x99\xd9\x28\x0c\xb0\x94\x62" + "\xf9\x51\xd1\x19\x96\x2d\x66\xf5" + "\x55\xcf\x9e\x59\xe2\x6b\x2c\x08" + "\xc0\x54\x48\x24\x45\xc3\x8c\x73" + "\xea\x27\x6e\x66\x7d\x1d\x0e\x6e" + "\x13\xe8\x56\x65\x3a\xb0\x81\x5c" + "\xf0\xe8\xd8\x00\x6b\xcd\x8f\xad" + "\xdd\x53\xf3\xa4\x6c\x43\xd6\x31" + "\xaf\xd2\x76\x1e\x91\x12\xdb\x3c" + "\x8c\xc2\x81\xf0\x49\xdb\xe2\x6b" + "\x76\x62\x0a\x04\xe4\xaa\x8a\x7c" + "\x08\x0b\x5d\xd0\xee\x1d\xfb\xc4" + "\x02\x75\x42\xd6\xba\xa7\x22\xa8" + "\x47\x29\xb7\x85\x6d\x93\x3a\xdb" + "\x00\x53\x0b\xa2\xeb\xf8\xfe\x01" + "\x6f\x8a\x31\xd6\x17\x05\x6f\x67" + "\x88\x95\x32\xfe\x4f\xa6\x4b\xf8" + "\x03\xe4\xcd\x9a\x18\xe8\x4e\x2d" + "\xf7\x97\x9a\x0c\x7d\x9f\x7e\x44" + "\x69\x51\xe0\x32\x6b\x62\x86\x8f" + "\xa6\x8e\x0b\x21\x96\xe5\xaf\x77" + "\xc0\x83\xdf\xa5\x0e\xd0\xa1\x04" + "\xaf\xc1\x10\xcb\x5a\x40\xe4\xe3" + "\x38\x7e\x07\xe8\x4d\xfa\xed\xc5" + "\xf0\x37\xdf\xbb\x8a\xcf\x3d\xdc" + "\x61\xd2\xc6\x2b\xff\x07\xc9\x2f" + "\x0c\x2d\x5c\x07\xa8\x35\x6a\xfc" + "\xae\x09\x03\x45\x74\x51\x4d\xc4" + "\xb8\x23\x87\x4a\x99\x27\x20\x87" + "\x62\x44\x0a\x4a\xce\x78\x47\x22", + .ksize = 1088, + .plaintext = "\x8e\xb0\x4c\xde\x9c\x4a\x04\x5a" + "\xf6\xa9\x7f\x45\x25\xa5\x7b\x3a" + "\xbc\x4d\x73\x39\x81\xb5\xbd\x3d" + "\x21\x6f\xd7\x37\x50\x3c\x7b\x28" + "\xd1\x03\x3a\x17\xed\x7b\x7c\x2a" + "\x16\xbc\xdf\x19\x89\x52\x71\x31" + "\xb6\xc0\xfd\xb5\xd3\xba\x96\x99" + "\xb6\x34\x0b\xd0\x99\x93\xfc\x1a" + "\x01\x3c\x85\xc6\x9b\x78\x5c\x8b" + "\xfe\xae\xd2\xbf\xb2\x6f\xf9\xed" + "\xc8\x25\x17\xfe\x10\x3b\x7d\xda" + "\xf4\x8d\x35\x4b\x7c\x7b\x82\xe7" + "\xc2\xb3\xee\x60\x4a\x03\x86\xc9" + "\x4e\xb5\xc4\xbe\xd2\xbd\x66\xf1" + "\x13\xf1\x09\xab\x5d\xca\x63\x1f" + "\xfc\xfb\x57\x2a\xfc\xca\x66\xd8" + "\x77\x84\x38\x23\x1d\xac\xd3\xb3" + "\x7a\xad\x4c\x70\xfa\x9c\xc9\x61" + "\xa6\x1b\xba\x33\x4b\x4e\x33\xec" + "\xa0\xa1\x64\x39\x40\x05\x1c\xc2" + "\x3f\x49\x9d\xae\xf2\xc5\xf2\xc5" + "\xfe\xe8\xf4\xc2\xf9\x96\x2d\x28" + "\x92\x30\x44\xbc\xd2\x7f\xe1\x6e" + "\x62\x02\x8f\x3d\x1c\x80\xda\x0e" + "\x6a\x90\x7e\x75\xff\xec\x3e\xc4" + "\xcd\x16\x34\x3b\x05\x6d\x4d\x20" + "\x1c\x7b\xf5\x57\x4f\xfa\x3d\xac" + "\xd0\x13\x55\xe8\xb3\xe1\x1b\x78" + "\x30\xe6\x9f\x84\xd4\x69\xd1\x08" + "\x12\x77\xa7\x4a\xbd\xc0\xf2\xd2" + "\x78\xdd\xa3\x81\x12\xcb\x6c\x14" + "\x90\x61\xe2\x84\xc6\x2b\x16\xcc" + "\x40\x99\x50\x88\x01\x09\x64\x4f" + "\x0a\x80\xbe\x61\xae\x46\xc9\x0a" + "\x5d\xe0\xfb\x72\x7a\x1a\xdd\x61" + "\x63\x20\x05\xa0\x4a\xf0\x60\x69" + "\x7f\x92\xbc\xbf\x4e\x39\x4d\xdd" + "\x74\xd1\xb7\xc0\x5a\x34\xb7\xae" + "\x76\x65\x2e\xbc\x36\xb9\x04\x95" + "\x42\xe9\x6f\xca\x78\xb3\x72\x07" + "\xa3\xba\x02\x94\x67\x4c\xb1\xd7" + "\xe9\x30\x0d\xf0\x3b\xb8\x10\x6d" + "\xea\x2b\x21\xbf\x74\x59\x82\x97" + "\x85\xaa\xf1\xd7\x54\x39\xeb\x05" + "\xbd\xf3\x40\xa0\x97\xe6\x74\xfe" + "\xb4\x82\x5b\xb1\x36\xcb\xe8\x0d" + "\xce\x14\xd9\xdf\xf1\x94\x22\xcd" + "\xd6\x00\xba\x04\x4c\x05\x0c\xc0" + "\xd1\x5a\xeb\x52\xd5\xa8\x8e\xc8" + "\x97\xa1\xaa\xc1\xea\xc1\xbe\x7c" + "\x36\xb3\x36\xa0\xc6\x76\x66\xc5" + "\xe2\xaf\xd6\x5c\xe2\xdb\x2c\xb3" + "\x6c\xb9\x99\x7f\xff\x9f\x03\x24" + "\xe1\x51\x44\x66\xd8\x0c\x5d\x7f" + "\x5c\x85\x22\x2a\xcf\x6d\x79\x28" + "\xab\x98\x01\x72\xfe\x80\x87\x5f" + "\x46\xba\xef\x81\x24\xee\xbf\xb0" + "\x24\x74\xa3\x65\x97\x12\xc4\xaf" + "\x8b\xa0\x39\xda\x8a\x7e\x74\x6e" + "\x1b\x42\xb4\x44\x37\xfc\x59\xfd" + "\x86\xed\xfb\x8c\x66\x33\xda\x63" + "\x75\xeb\xe1\xa4\x85\x4f\x50\x8f" + "\x83\x66\x0d\xd3\x37\xfa\xe6\x9c" + "\x4f\x30\x87\x35\x18\xe3\x0b\xb7" + "\x6e\x64\x54\xcd\x70\xb3\xde\x54" + "\xb7\x1d\xe6\x4c\x4d\x55\x12\x12" + "\xaf\x5f\x7f\x5e\xee\x9d\xe8\x8e" + "\x32\x9d\x4e\x75\xeb\xc6\xdd\xaa" + "\x48\x82\xa4\x3f\x3c\xd7\xd3\xa8" + "\x63\x9e\x64\xfe\xe3\x97\x00\x62" + "\xe5\x40\x5d\xc3\xad\x72\xe1\x28" + "\x18\x50\xb7\x75\xef\xcd\x23\xbf" + "\x3f\xc0\x51\x36\xf8\x41\xc3\x08" + "\xcb\xf1\x8d\x38\x34\xbd\x48\x45" + "\x75\xed\xbc\x65\x7b\xb5\x0c\x9b" + "\xd7\x67\x7d\x27\xb4\xc4\x80\xd7" + "\xa9\xb9\xc7\x4a\x97\xaa\xda\xc8" + "\x3c\x74\xcf\x36\x8f\xe4\x41\xe3" + "\xd4\xd3\x26\xa7\xf3\x23\x9d\x8f" + "\x6c\x20\x05\x32\x3e\xe0\xc3\xc8" + "\x56\x3f\xa7\x09\xb7\xfb\xc7\xf7" + "\xbe\x2a\xdd\x0f\x06\x7b\x0d\xdd" + "\xb0\xb4\x86\x17\xfd\xb9\x04\xe5" + "\xc0\x64\x5d\xad\x2a\x36\x38\xdb" + "\x24\xaf\x5b\xff\xca\xf9\x41\xe8" + "\xf9\x2f\x1e\x5e\xf9\xf5\xd5\xf2" + "\xb2\x88\xca\xc9\xa1\x31\xe2\xe8" + "\x10\x95\x65\xbf\xf1\x11\x61\x7a" + "\x30\x1a\x54\x90\xea\xd2\x30\xf6" + "\xa5\xad\x60\xf9\x4d\x84\x21\x1b" + "\xe4\x42\x22\xc8\x12\x4b\xb0\x58" + "\x3e\x9c\x2d\x32\x95\x0a\x8e\xb0" + "\x0a\x7e\x77\x2f\xe8\x97\x31\x6a" + "\xf5\x59\xb4\x26\xe6\x37\x12\xc9" + "\xcb\xa0\x58\x33\x6f\xd5\x55\x55" + "\x3c\xa1\x33\xb1\x0b\x7e\x2e\xb4" + "\x43\x2a\x84\x39\xf0\x9c\xf4\x69" + "\x4f\x1e\x79\xa6\x15\x1b\x87\xbb" + "\xdb\x9b\xe0\xf1\x0b\xba\xe3\x6e" + "\xcc\x2f\x49\x19\x22\x29\xfc\x71" + "\xbb\x77\x38\x18\x61\xaf\x85\x76" + "\xeb\xd1\x09\xcc\x86\x04\x20\x9a" + "\x66\x53\x2f\x44\x8b\xc6\xa3\xd2" + "\x5f\xc7\x79\x82\x66\xa8\x6e\x75" + "\x7d\x94\xd1\x86\x75\x0f\xa5\x4f" + "\x3c\x7a\x33\xce\xd1\x6e\x9d\x7b" + "\x1f\x91\x37\xb8\x37\x80\xfb\xe0" + "\x52\x26\xd0\x9a\xd4\x48\x02\x41" + "\x05\xe3\x5a\x94\xf1\x65\x61\x19" + "\xb8\x88\x4e\x2b\xea\xba\x8b\x58" + "\x8b\x42\x01\x00\xa8\xfe\x00\x5c" + "\xfe\x1c\xee\x31\x15\x69\xfa\xb3" + "\x9b\x5f\x22\x8e\x0d\x2c\xe3\xa5" + "\x21\xb9\x99\x8a\x8e\x94\x5a\xef" + "\x13\x3e\x99\x96\x79\x6e\xd5\x42" + "\x36\x03\xa9\xe2\xca\x65\x4e\x8a" + "\x8a\x30\xd2\x7d\x74\xe7\xf0\xaa" + "\x23\x26\xdd\xcb\x82\x39\xfc\x9d" + "\x51\x76\x21\x80\xa2\xbe\x93\x03" + "\x47\xb0\xc1\xb6\xdc\x63\xfd\x9f" + "\xca\x9d\xa5\xca\x27\x85\xe2\xd8" + "\x15\x5b\x7e\x14\x7a\xc4\x89\xcc" + "\x74\x14\x4b\x46\xd2\xce\xac\x39" + "\x6b\x6a\x5a\xa4\x0e\xe3\x7b\x15" + "\x94\x4b\x0f\x74\xcb\x0c\x7f\xa9" + "\xbe\x09\x39\xa3\xdd\x56\x5c\xc7" + "\x99\x56\x65\x39\xf4\x0b\x7d\x87" + "\xec\xaa\xe3\x4d\x22\x65\x39\x4e", + .psize = 1024, + .digest = "\x64\x3a\xbc\xc3\x3f\x74\x40\x51" + "\x6e\x56\x01\x1a\x51\xec\x36\xde", + .np = 8, + .tap = { 64, 203, 267, 28, 263, 62, 54, 83 }, + }, { + .key = "\x1b\x82\x2e\x1b\x17\x23\xb9\x6d" + "\xdc\x9c\xda\x99\x07\xe3\x5f\xd8" + "\xd2\xf8\x43\x80\x8d\x86\x7d\x80" + "\x1a\xd0\xcc\x13\xb9\x11\x05\x3f" + "\x7e\xcf\x7e\x80\x0e\xd8\x25\x48" + "\x8b\xaa\x63\x83\x92\xd0\x72\xf5" + "\x4f\x67\x7e\x50\x18\x25\xa4\xd1" + "\xe0\x7e\x1e\xba\xd8\xa7\x6e\xdb" + "\x1a\xcc\x0d\xfe\x9f\x6d\x22\x35" + "\xe1\xe6\xe0\xa8\x7b\x9c\xb1\x66" + "\xa3\xf8\xff\x4d\x90\x84\x28\xbc" + "\xdc\x19\xc7\x91\x49\xfc\xf6\x33" + "\xc9\x6e\x65\x7f\x28\x6f\x68\x2e" + "\xdf\x1a\x75\xe9\xc2\x0c\x96\xb9" + "\x31\x22\xc4\x07\xc6\x0a\x2f\xfd" + "\x36\x06\x5f\x5c\xc5\xb1\x3a\xf4" + "\x5e\x48\xa4\x45\x2b\x88\xa7\xee" + "\xa9\x8b\x52\xcc\x99\xd9\x2f\xb8" + "\xa4\x58\x0a\x13\xeb\x71\x5a\xfa" + "\xe5\x5e\xbe\xf2\x64\xad\x75\xbc" + "\x0b\x5b\x34\x13\x3b\x23\x13\x9a" + "\x69\x30\x1e\x9a\xb8\x03\xb8\x8b" + "\x3e\x46\x18\x6d\x38\xd9\xb3\xd8" + "\xbf\xf1\xd0\x28\xe6\x51\x57\x80" + "\x5e\x99\xfb\xd0\xce\x1e\x83\xf7" + "\xe9\x07\x5a\x63\xa9\xef\xce\xa5" + "\xfb\x3f\x37\x17\xfc\x0b\x37\x0e" + "\xbb\x4b\x21\x62\xb7\x83\x0e\xa9" + "\x9e\xb0\xc4\xad\x47\xbe\x35\xe7" + "\x51\xb2\xf2\xac\x2b\x65\x7b\x48" + "\xe3\x3f\x5f\xb6\x09\x04\x0c\x58" + "\xce\x99\xa9\x15\x2f\x4e\xc1\xf2" + "\x24\x48\xc0\xd8\x6c\xd3\x76\x17" + "\x83\x5d\xe6\xe3\xfd\x01\x8e\xf7" + "\x42\xa5\x04\x29\x30\xdf\xf9\x00" + "\x4a\xdc\x71\x22\x1a\x33\x15\xb6" + "\xd7\x72\xfb\x9a\xb8\xeb\x2b\x38" + "\xea\xa8\x61\xa8\x90\x11\x9d\x73" + "\x2e\x6c\xce\x81\x54\x5a\x9f\xcd" + "\xcf\xd5\xbd\x26\x5d\x66\xdb\xfb" + "\xdc\x1e\x7c\x10\xfe\x58\x82\x10" + "\x16\x24\x01\xce\x67\x55\x51\xd1" + "\xdd\x6b\x44\xa3\x20\x8e\xa9\xa6" + "\x06\xa8\x29\x77\x6e\x00\x38\x5b" + "\xde\x4d\x58\xd8\x1f\x34\xdf\xf9" + "\x2c\xac\x3e\xad\xfb\x92\x0d\x72" + "\x39\xa4\xac\x44\x10\xc0\x43\xc4" + "\xa4\x77\x3b\xfc\xc4\x0d\x37\xd3" + "\x05\x84\xda\x53\x71\xf8\x80\xd3" + "\x34\x44\xdb\x09\xb4\x2b\x8e\xe3" + "\x00\x75\x50\x9e\x43\x22\x00\x0b" + "\x7c\x70\xab\xd4\x41\xf1\x93\xcd" + "\x25\x2d\x84\x74\xb5\xf2\x92\xcd" + "\x0a\x28\xea\x9a\x49\x02\x96\xcb" + "\x85\x9e\x2f\x33\x03\x86\x1d\xdc" + "\x1d\x31\xd5\xfc\x9d\xaa\xc5\xe9" + "\x9a\xc4\x57\xf5\x35\xed\xf4\x4b" + "\x3d\x34\xc2\x29\x13\x86\x36\x42" + "\x5d\xbf\x90\x86\x13\x77\xe5\xc3" + "\x62\xb4\xfe\x0b\x70\x39\x35\x65" + "\x02\xea\xf6\xce\x57\x0c\xbb\x74" + "\x29\xe3\xfd\x60\x90\xfd\x10\x38" + "\xd5\x4e\x86\xbd\x37\x70\xf0\x97" + "\xa6\xab\x3b\x83\x64\x52\xca\x66" + "\x2f\xf9\xa4\xca\x3a\x55\x6b\xb0" + "\xe8\x3a\x34\xdb\x9e\x48\x50\x2f" + "\x3b\xef\xfd\x08\x2d\x5f\xc1\x37" + "\x5d\xbe\x73\xe4\xd8\xe9\xac\xca" + "\x8a\xaa\x48\x7c\x5c\xf4\xa6\x96" + "\x5f\xfa\x70\xa6\xb7\x8b\x50\xcb" + "\xa6\xf5\xa9\xbd\x7b\x75\x4c\x22" + "\x0b\x19\x40\x2e\xc9\x39\x39\x32" + "\x83\x03\xa8\xa4\x98\xe6\x8e\x16" + "\xb9\xde\x08\xc5\xfc\xbf\xad\x39" + "\xa8\xc7\x93\x6c\x6f\x23\xaf\xc1" + "\xab\xe1\xdf\xbb\x39\xae\x93\x29" + "\x0e\x7d\x80\x8d\x3e\x65\xf3\xfd" + "\x96\x06\x65\x90\xa1\x28\x64\x4b" + "\x69\xf9\xa8\x84\x27\x50\xfc\x87" + "\xf7\xbf\x55\x8e\x56\x13\x58\x7b" + "\x85\xb4\x6a\x72\x0f\x40\xf1\x4f" + "\x83\x81\x1f\x76\xde\x15\x64\x7a" + "\x7a\x80\xe4\xc7\x5e\x63\x01\x91" + "\xd7\x6b\xea\x0b\x9b\xa2\x99\x3b" + "\x6c\x88\xd8\xfd\x59\x3c\x8d\x22" + "\x86\x56\xbe\xab\xa1\x37\x08\x01" + "\x50\x85\x69\x29\xee\x9f\xdf\x21" + "\x3e\x20\x20\xf5\xb0\xbb\x6b\xd0" + "\x9c\x41\x38\xec\x54\x6f\x2d\xbd" + "\x0f\xe1\xbd\xf1\x2b\x6e\x60\x56" + "\x29\xe5\x7a\x70\x1c\xe2\xfc\x97" + "\x82\x68\x67\xd9\x3d\x1f\xfb\xd8" + "\x07\x9f\xbf\x96\x74\xba\x6a\x0e" + "\x10\x48\x20\xd8\x13\x1e\xb5\x44" + "\xf2\xcc\xb1\x8b\xfb\xbb\xec\xd7" + "\x37\x70\x1f\x7c\x55\xd2\x4b\xb9" + "\xfd\x70\x5e\xa3\x91\x73\x63\x52" + "\x13\x47\x5a\x06\xfb\x01\x67\xa5" + "\xc0\xd0\x49\x19\x56\x66\x9a\x77" + "\x64\xaf\x8c\x25\x91\x52\x87\x0e" + "\x18\xf3\x5f\x97\xfd\x71\x13\xf8" + "\x05\xa5\x39\xcc\x65\xd3\xcc\x63" + "\x5b\xdb\x5f\x7e\x5f\x6e\xad\xc4" + "\xf4\xa0\xc5\xc2\x2b\x4d\x97\x38" + "\x4f\xbc\xfa\x33\x17\xb4\x47\xb9" + "\x43\x24\x15\x8d\xd2\xed\x80\x68" + "\x84\xdb\x04\x80\xca\x5e\x6a\x35" + "\x2c\x2c\xe7\xc5\x03\x5f\x54\xb0" + "\x5e\x4f\x1d\x40\x54\x3d\x78\x9a" + "\xac\xda\x80\x27\x4d\x15\x4c\x1a" + "\x6e\x80\xc9\xc4\x3b\x84\x0e\xd9" + "\x2e\x93\x01\x8c\xc3\xc8\x91\x4b" + "\xb3\xaa\x07\x04\x68\x5b\x93\xa5" + "\xe7\xc4\x9d\xe7\x07\xee\xf5\x3b" + "\x40\x89\xcc\x60\x34\x9d\xb4\x06" + "\x1b\xef\x92\xe6\xc1\x2a\x7d\x0f" + "\x81\xaa\x56\xe3\xd7\xed\xa7\xd4" + "\xa7\x3a\x49\xc4\xad\x81\x5c\x83" + "\x55\x8e\x91\x54\xb7\x7d\x65\xa5" + "\x06\x16\xd5\x9a\x16\xc1\xb0\xa2" + "\x06\xd8\x98\x47\x73\x7e\x73\xa0" + "\xb8\x23\xb1\x52\xbf\x68\x74\x5d" + "\x0b\xcb\xfa\x8c\x46\xe3\x24\xe6" + "\xab\xd4\x69\x8d\x8c\xf2\x8a\x59" + "\xbe\x48\x46\x50\x8c\x9a\xe8\xe3" + "\x31\x55\x0a\x06\xed\x4f\xf8\xb7" + "\x4f\xe3\x85\x17\x30\xbd\xd5\x20" + "\xe7\x5b\xb2\x32\xcf\x6b\x16\x44" + "\xd2\xf5\x7e\xd7\xd1\x2f\xee\x64" + "\x3e\x9d\x10\xef\x27\x35\x43\x64" + "\x67\xfb\x7a\x7b\xe0\x62\x31\x9a" + "\x4d\xdf\xa5\xab\xc0\x20\xbb\x01" + "\xe9\x7b\x54\xf1\xde\xb2\x79\x50" + "\x6c\x4b\x91\xdb\x7f\xbb\x50\xc1" + "\x55\x44\x38\x9a\xe0\x9f\xe8\x29" + "\x6f\x15\xf8\x4e\xa6\xec\xa0\x60", + .ksize = 1088, + .plaintext = "\x15\x68\x9e\x2f\xad\x15\x52\xdf" + "\xf0\x42\x62\x24\x2a\x2d\xea\xbf" + "\xc7\xf3\xb4\x1a\xf5\xed\xb2\x08" + "\x15\x60\x1c\x00\x77\xbf\x0b\x0e" + "\xb7\x2c\xcf\x32\x3a\xc7\x01\x77" + "\xef\xa6\x75\xd0\x29\xc7\x68\x20" + "\xb2\x92\x25\xbf\x12\x34\xe9\xa4" + "\xfd\x32\x7b\x3f\x7c\xbd\xa5\x02" + "\x38\x41\xde\xc9\xc1\x09\xd9\xfc" + "\x6e\x78\x22\x83\x18\xf7\x50\x8d" + "\x8f\x9c\x2d\x02\xa5\x30\xac\xff" + "\xea\x63\x2e\x80\x37\x83\xb0\x58" + "\xda\x2f\xef\x21\x55\xba\x7b\xb1" + "\xb6\xed\xf5\xd2\x4d\xaa\x8c\xa9" + "\xdd\xdb\x0f\xb4\xce\xc1\x9a\xb1" + "\xc1\xdc\xbd\xab\x86\xc2\xdf\x0b" + "\xe1\x2c\xf9\xbe\xf6\xd8\xda\x62" + "\x72\xdd\x98\x09\x52\xc0\xc4\xb6" + "\x7b\x17\x5c\xf5\xd8\x4b\x88\xd6" + "\x6b\xbf\x84\x4a\x3f\xf5\x4d\xd2" + "\x94\xe2\x9c\xff\xc7\x3c\xd9\xc8" + "\x37\x38\xbc\x8c\xf3\xe7\xb7\xd0" + "\x1d\x78\xc4\x39\x07\xc8\x5e\x79" + "\xb6\x5a\x90\x5b\x6e\x97\xc9\xd4" + "\x82\x9c\xf3\x83\x7a\xe7\x97\xfc" + "\x1d\xbb\xef\xdb\xce\xe0\x82\xad" + "\xca\x07\x6c\x54\x62\x6f\x81\xe6" + "\x7a\x5a\x96\x6e\x80\x3a\xa2\x37" + "\x6f\xc6\xa4\x29\xc3\x9e\x19\x94" + "\x9f\xb0\x3e\x38\xfb\x3c\x2b\x7d" + "\xaa\xb8\x74\xda\x54\x23\x51\x12" + "\x4b\x96\x36\x8f\x91\x4f\x19\x37" + "\x83\xc9\xdd\xc7\x1a\x32\x2d\xab" + "\xc7\x89\xe2\x07\x47\x6c\xe8\xa6" + "\x70\x6b\x8e\x0c\xda\x5c\x6a\x59" + "\x27\x33\x0e\xe1\xe1\x20\xe8\xc8" + "\xae\xdc\xd0\xe3\x6d\xa8\xa6\x06" + "\x41\xb4\xd4\xd4\xcf\x91\x3e\x06" + "\xb0\x9a\xf7\xf1\xaa\xa6\x23\x92" + "\x10\x86\xf0\x94\xd1\x7c\x2e\x07" + "\x30\xfb\xc5\xd8\xf3\x12\xa9\xe8" + "\x22\x1c\x97\x1a\xad\x96\xb0\xa1" + "\x72\x6a\x6b\xb4\xfd\xf7\xe8\xfa" + "\xe2\x74\xd8\x65\x8d\x35\x17\x4b" + "\x00\x23\x5c\x8c\x70\xad\x71\xa2" + "\xca\xc5\x6c\x59\xbf\xb4\xc0\x6d" + "\x86\x98\x3e\x19\x5a\x90\x92\xb1" + "\x66\x57\x6a\x91\x68\x7c\xbc\xf3" + "\xf1\xdb\x94\xf8\x48\xf1\x36\xd8" + "\x78\xac\x1c\xa9\xcc\xd6\x27\xba" + "\x91\x54\x22\xf5\xe6\x05\x3f\xcc" + "\xc2\x8f\x2c\x3b\x2b\xc3\x2b\x2b" + "\x3b\xb8\xb6\x29\xb7\x2f\x94\xb6" + "\x7b\xfc\x94\x3e\xd0\x7a\x41\x59" + "\x7b\x1f\x9a\x09\xa6\xed\x4a\x82" + "\x9d\x34\x1c\xbd\x4e\x1c\x3a\x66" + "\x80\x74\x0e\x9a\x4f\x55\x54\x47" + "\x16\xba\x2a\x0a\x03\x35\x99\xa3" + "\x5c\x63\x8d\xa2\x72\x8b\x17\x15" + "\x68\x39\x73\xeb\xec\xf2\xe8\xf5" + "\x95\x32\x27\xd6\xc4\xfe\xb0\x51" + "\xd5\x0c\x50\xc5\xcd\x6d\x16\xb3" + "\xa3\x1e\x95\x69\xad\x78\x95\x06" + "\xb9\x46\xf2\x6d\x24\x5a\x99\x76" + "\x73\x6a\x91\xa6\xac\x12\xe1\x28" + "\x79\xbc\x08\x4e\x97\x00\x98\x63" + "\x07\x1c\x4e\xd1\x68\xf3\xb3\x81" + "\xa8\xa6\x5f\xf1\x01\xc9\xc1\xaf" + "\x3a\x96\xf9\x9d\xb5\x5a\x5f\x8f" + "\x7e\xc1\x7e\x77\x0a\x40\xc8\x8e" + "\xfc\x0e\xed\xe1\x0d\xb0\xe5\x5e" + "\x5e\x6f\xf5\x7f\xab\x33\x7d\xcd" + "\xf0\x09\x4b\xb2\x11\x37\xdc\x65" + "\x97\x32\x62\x71\x3a\x29\x54\xb9" + "\xc7\xa4\xbf\x75\x0f\xf9\x40\xa9" + "\x8d\xd7\x8b\xa7\xe0\x9a\xbe\x15" + "\xc6\xda\xd8\x00\x14\x69\x1a\xaf" + "\x5f\x79\xc3\xf5\xbb\x6c\x2a\x9d" + "\xdd\x3c\x5f\x97\x21\xe1\x3a\x03" + "\x84\x6a\xe9\x76\x11\x1f\xd3\xd5" + "\xf0\x54\x20\x4d\xc2\x91\xc3\xa4" + "\x36\x25\xbe\x1b\x2a\x06\xb7\xf3" + "\xd1\xd0\x55\x29\x81\x4c\x83\xa3" + "\xa6\x84\x1e\x5c\xd1\xd0\x6c\x90" + "\xa4\x11\xf0\xd7\x63\x6a\x48\x05" + "\xbc\x48\x18\x53\xcd\xb0\x8d\xdb" + "\xdc\xfe\x55\x11\x5c\x51\xb3\xab" + "\xab\x63\x3e\x31\x5a\x8b\x93\x63" + "\x34\xa9\xba\x2b\x69\x1a\xc0\xe3" + "\xcb\x41\xbc\xd7\xf5\x7f\x82\x3e" + "\x01\xa3\x3c\x72\xf4\xfe\xdf\xbe" + "\xb1\x67\x17\x2b\x37\x60\x0d\xca" + "\x6f\xc3\x94\x2c\xd2\x92\x6d\x9d" + "\x75\x18\x77\xaa\x29\x38\x96\xed" + "\x0e\x20\x70\x92\xd5\xd0\xb4\x00" + "\xc0\x31\xf2\xc9\x43\x0e\x75\x1d" + "\x4b\x64\xf2\x1f\xf2\x29\x6c\x7b" + "\x7f\xec\x59\x7d\x8c\x0d\xd4\xd3" + "\xac\x53\x4c\xa3\xde\x42\x92\x95" + "\x6d\xa3\x4f\xd0\xe6\x3d\xe7\xec" + "\x7a\x4d\x68\xf1\xfe\x67\x66\x09" + "\x83\x22\xb1\x98\x43\x8c\xab\xb8" + "\x45\xe6\x6d\xdf\x5e\x50\x71\xce" + "\xf5\x4e\x40\x93\x2b\xfa\x86\x0e" + "\xe8\x30\xbd\x82\xcc\x1c\x9c\x5f" + "\xad\xfd\x08\x31\xbe\x52\xe7\xe6" + "\xf2\x06\x01\x62\x25\x15\x99\x74" + "\x33\x51\x52\x57\x3f\x57\x87\x61" + "\xb9\x7f\x29\x3d\xcd\x92\x5e\xa6" + "\x5c\x3b\xf1\xed\x5f\xeb\x82\xed" + "\x56\x7b\x61\xe7\xfd\x02\x47\x0e" + "\x2a\x15\xa4\xce\x43\x86\x9b\xe1" + "\x2b\x4c\x2a\xd9\x42\x97\xf7\x9a" + "\xe5\x47\x46\x48\xd3\x55\x6f\x4d" + "\xd9\xeb\x4b\xdd\x7b\x21\x2f\xb3" + "\xa8\x36\x28\xdf\xca\xf1\xf6\xd9" + "\x10\xf6\x1c\xfd\x2e\x0c\x27\xe0" + "\x01\xb3\xff\x6d\x47\x08\x4d\xd4" + "\x00\x25\xee\x55\x4a\xe9\xe8\x5b" + "\xd8\xf7\x56\x12\xd4\x50\xb2\xe5" + "\x51\x6f\x34\x63\x69\xd2\x4e\x96" + "\x4e\xbc\x79\xbf\x18\xae\xc6\x13" + "\x80\x92\x77\xb0\xb4\x0f\x29\x94" + "\x6f\x4c\xbb\x53\x11\x36\xc3\x9f" + "\x42\x8e\x96\x8a\x91\xc8\xe9\xfc" + "\xfe\xbf\x7c\x2d\x6f\xf9\xb8\x44" + "\x89\x1b\x09\x53\x0a\x2a\x92\xc3" + "\x54\x7a\x3a\xf9\xe2\xe4\x75\x87" + "\xa0\x5e\x4b\x03\x7a\x0d\x8a\xf4" + "\x55\x59\x94\x2b\x63\x96\x0e\xf5", + .psize = 1040, + .digest = "\xb5\xb9\x08\xb3\x24\x3e\x03\xf0" + "\xd6\x0b\x57\xbc\x0a\x6d\x89\x59", + }, { + .key = "\xf6\x34\x42\x71\x35\x52\x8b\x58" + "\x02\x3a\x8e\x4a\x8d\x41\x13\xe9" + "\x7f\xba\xb9\x55\x9d\x73\x4d\xf8" + "\x3f\x5d\x73\x15\xff\xd3\x9e\x7f" + "\x20\x2a\x6a\xa8\xd1\xf0\x8f\x12" + "\x6b\x02\xd8\x6c\xde\xba\x80\x22" + "\x19\x37\xc8\xd0\x4e\x89\x17\x7c" + "\x7c\xdd\x88\xfd\x41\xc0\x04\xb7" + "\x1d\xac\x19\xe3\x20\xc7\x16\xcf" + "\x58\xee\x1d\x7a\x61\x69\xa9\x12" + "\x4b\xef\x4f\xb6\x38\xdd\x78\xf8" + "\x28\xee\x70\x08\xc7\x7c\xcc\xc8" + "\x1e\x41\xf5\x80\x86\x70\xd0\xf0" + "\xa3\x87\x6b\x0a\x00\xd2\x41\x28" + "\x74\x26\xf1\x24\xf3\xd0\x28\x77" + "\xd7\xcd\xf6\x2d\x61\xf4\xa2\x13" + "\x77\xb4\x6f\xa0\xf4\xfb\xd6\xb5" + "\x38\x9d\x5a\x0c\x51\xaf\xad\x63" + "\x27\x67\x8c\x01\xea\x42\x1a\x66" + "\xda\x16\x7c\x3c\x30\x0c\x66\x53" + "\x1c\x88\xa4\x5c\xb2\xe3\x78\x0a" + "\x13\x05\x6d\xe2\xaf\xb3\xe4\x75" + "\x00\x99\x58\xee\x76\x09\x64\xaa" + "\xbb\x2e\xb1\x81\xec\xd8\x0e\xd3" + "\x0c\x33\x5d\xb7\x98\xef\x36\xb6" + "\xd2\x65\x69\x41\x70\x12\xdc\x25" + "\x41\x03\x99\x81\x41\x19\x62\x13" + "\xd1\x0a\x29\xc5\x8c\xe0\x4c\xf3" + "\xd6\xef\x4c\xf4\x1d\x83\x2e\x6d" + "\x8e\x14\x87\xed\x80\xe0\xaa\xd3" + "\x08\x04\x73\x1a\x84\x40\xf5\x64" + "\xbd\x61\x32\x65\x40\x42\xfb\xb0" + "\x40\xf6\x40\x8d\xc7\x7f\x14\xd0" + "\x83\x99\xaa\x36\x7e\x60\xc6\xbf" + "\x13\x8a\xf9\x21\xe4\x7e\x68\x87" + "\xf3\x33\x86\xb4\xe0\x23\x7e\x0a" + "\x21\xb1\xf5\xad\x67\x3c\x9c\x9d" + "\x09\xab\xaf\x5f\xba\xe0\xd0\x82" + "\x48\x22\x70\xb5\x6d\x53\xd6\x0e" + "\xde\x64\x92\x41\xb0\xd3\xfb\xda" + "\x21\xfe\xab\xea\x20\xc4\x03\x58" + "\x18\x2e\x7d\x2f\x03\xa9\x47\x66" + "\xdf\x7b\xa4\x6b\x34\x6b\x55\x9c" + "\x4f\xd7\x9c\x47\xfb\xa9\x42\xec" + "\x5a\x12\xfd\xfe\x76\xa0\x92\x9d" + "\xfe\x1e\x16\xdd\x24\x2a\xe4\x27" + "\xd5\xa9\xf2\x05\x4f\x83\xa2\xaf" + "\xfe\xee\x83\x7a\xad\xde\xdf\x9a" + "\x80\xd5\x81\x14\x93\x16\x7e\x46" + "\x47\xc2\x14\xef\x49\x6e\xb9\xdb" + "\x40\xe8\x06\x6f\x9c\x2a\xfd\x62" + "\x06\x46\xfd\x15\x1d\x36\x61\x6f" + "\x77\x77\x5e\x64\xce\x78\x1b\x85" + "\xbf\x50\x9a\xfd\x67\xa6\x1a\x65" + "\xad\x5b\x33\x30\xf1\x71\xaa\xd9" + "\x23\x0d\x92\x24\x5f\xae\x57\xb0" + "\x24\x37\x0a\x94\x12\xfb\xb5\xb1" + "\xd3\xb8\x1d\x12\x29\xb0\x80\x24" + "\x2d\x47\x9f\x96\x1f\x95\xf1\xb1" + "\xda\x35\xf6\x29\xe0\xe1\x23\x96" + "\xc7\xe8\x22\x9b\x7c\xac\xf9\x41" + "\x39\x01\xe5\x73\x15\x5e\x99\xec" + "\xb4\xc1\xf4\xe7\xa7\x97\x6a\xd5" + "\x90\x9a\xa0\x1d\xf3\x5a\x8b\x5f" + "\xdf\x01\x52\xa4\x93\x31\x97\xb0" + "\x93\x24\xb5\xbc\xb2\x14\x24\x98" + "\x4a\x8f\x19\x85\xc3\x2d\x0f\x74" + "\x9d\x16\x13\x80\x5e\x59\x62\x62" + "\x25\xe0\xd1\x2f\x64\xef\xba\xac" + "\xcd\x09\x07\x15\x8a\xcf\x73\xb5" + "\x8b\xc9\xd8\x24\xb0\x53\xd5\x6f" + "\xe1\x2b\x77\xb1\xc5\xe4\xa7\x0e" + "\x18\x45\xab\x36\x03\x59\xa8\xbd" + "\x43\xf0\xd8\x2c\x1a\x69\x96\xbb" + "\x13\xdf\x6c\x33\x77\xdf\x25\x34" + "\x5b\xa5\x5b\x8c\xf9\x51\x05\xd4" + "\x8b\x8b\x44\x87\x49\xfc\xa0\x8f" + "\x45\x15\x5b\x40\x42\xc4\x09\x92" + "\x98\x0c\x4d\xf4\x26\x37\x1b\x13" + "\x76\x01\x93\x8d\x4f\xe6\xed\x18" + "\xd0\x79\x7b\x3f\x44\x50\xcb\xee" + "\xf7\x4a\xc9\x9e\xe0\x96\x74\xa7" + "\xe6\x93\xb2\x53\xca\x55\xa8\xdc" + "\x1e\x68\x07\x87\xb7\x2e\xc1\x08" + "\xb2\xa4\x5b\xaf\xc6\xdb\x5c\x66" + "\x41\x1c\x51\xd9\xb0\x07\x00\x0d" + "\xf0\x4c\xdc\x93\xde\xa9\x1e\x8e" + "\xd3\x22\x62\xd8\x8b\x88\x2c\xea" + "\x5e\xf1\x6e\x14\x40\xc7\xbe\xaa" + "\x42\x28\xd0\x26\x30\x78\x01\x9b" + "\x83\x07\xbc\x94\xc7\x57\xa2\x9f" + "\x03\x07\xff\x16\xff\x3c\x6e\x48" + "\x0a\xd0\xdd\x4c\xf6\x64\x9a\xf1" + "\xcd\x30\x12\x82\x2c\x38\xd3\x26" + "\x83\xdb\xab\x3e\xc6\xf8\xe6\xfa" + "\x77\x0a\x78\x82\x75\xf8\x63\x51" + "\x59\xd0\x8d\x24\x9f\x25\xe6\xa3" + "\x4c\xbc\x34\xfc\xe3\x10\xc7\x62" + "\xd4\x23\xc8\x3d\xa7\xc6\xa6\x0a" + "\x4f\x7e\x29\x9d\x6d\xbe\xb5\xf1" + "\xdf\xa4\x53\xfa\xc0\x23\x0f\x37" + "\x84\x68\xd0\xb5\xc8\xc6\xae\xf8" + "\xb7\x8d\xb3\x16\xfe\x8f\x87\xad" + "\xd0\xc1\x08\xee\x12\x1c\x9b\x1d" + "\x90\xf8\xd1\x63\xa4\x92\x3c\xf0" + "\xc7\x34\xd8\xf1\x14\xed\xa3\xbc" + "\x17\x7e\xd4\x62\x42\x54\x57\x2c" + "\x3e\x7a\x35\x35\x17\x0f\x0b\x7f" + "\x81\xa1\x3f\xd0\xcd\xc8\x3b\x96" + "\xe9\xe0\x4a\x04\xe1\xb6\x3c\xa1" + "\xd6\xca\xc4\xbd\xb6\xb5\x95\x34" + "\x12\x9d\xc5\x96\xf2\xdf\xba\x54" + "\x76\xd1\xb2\x6b\x3b\x39\xe0\xb9" + "\x18\x62\xfb\xf7\xfc\x12\xf1\x5f" + "\x7e\xc7\xe3\x59\x4c\xa6\xc2\x3d" + "\x40\x15\xf9\xa3\x95\x64\x4c\x74" + "\x8b\x73\x77\x33\x07\xa7\x04\x1d" + "\x33\x5a\x7e\x8f\xbd\x86\x01\x4f" + "\x3e\xb9\x27\x6f\xe2\x41\xf7\x09" + "\x67\xfd\x29\x28\xc5\xe4\xf6\x18" + "\x4c\x1b\x49\xb2\x9c\x5b\xf6\x81" + "\x4f\xbb\x5c\xcc\x0b\xdf\x84\x23" + "\x58\xd6\x28\x34\x93\x3a\x25\x97" + "\xdf\xb2\xc3\x9e\x97\x38\x0b\x7d" + "\x10\xb3\x54\x35\x23\x8c\x64\xee" + "\xf0\xd8\x66\xff\x8b\x22\xd2\x5b" + "\x05\x16\x3c\x89\xf7\xb1\x75\xaf" + "\xc0\xae\x6a\x4f\x3f\xaf\x9a\xf4" + "\xf4\x9a\x24\xd9\x80\x82\xc0\x12" + "\xde\x96\xd1\xbe\x15\x0b\x8d\x6a" + "\xd7\x12\xe4\x85\x9f\x83\xc9\xc3" + "\xff\x0b\xb5\xaf\x3b\xd8\x6d\x67" + "\x81\x45\xe6\xac\xec\xc1\x7b\x16" + "\x18\x0a\xce\x4b\xc0\x2e\x76\xbc" + "\x1b\xfa\xb4\x34\xb8\xfc\x3e\xc8" + "\x5d\x90\x71\x6d\x7a\x79\xef\x06", + .ksize = 1088, + .plaintext = "\xaa\x5d\x54\xcb\xea\x1e\x46\x0f" + "\x45\x87\x70\x51\x8a\x66\x7a\x33" + "\xb4\x18\xff\xa9\x82\xf9\x45\x4b" + "\x93\xae\x2e\x7f\xab\x98\xfe\xbf" + "\x01\xee\xe5\xa0\x37\x8f\x57\xa6" + "\xb0\x76\x0d\xa4\xd6\x28\x2b\x5d" + "\xe1\x03\xd6\x1c\x6f\x34\x0d\xe7" + "\x61\x2d\x2e\xe5\xae\x5d\x47\xc7" + "\x80\x4b\x18\x8f\xa8\x99\xbc\x28" + "\xed\x1d\x9d\x86\x7d\xd7\x41\xd1" + "\xe0\x2b\xe1\x8c\x93\x2a\xa7\x80" + "\xe1\x07\xa0\xa9\x9f\x8c\x8d\x1a" + "\x55\xfc\x6b\x24\x7a\xbd\x3e\x51" + "\x68\x4b\x26\x59\xc8\xa7\x16\xd9" + "\xb9\x61\x13\xde\x8b\x63\x1c\xf6" + "\x60\x01\xfb\x08\xb3\x5b\x0a\xbf" + "\x34\x73\xda\x87\x87\x3d\x6f\x97" + "\x4a\x0c\xa3\x58\x20\xa2\xc0\x81" + "\x5b\x8c\xef\xa9\xc2\x01\x1e\x64" + "\x83\x8c\xbc\x03\xb6\xd0\x29\x9f" + "\x54\xe2\xce\x8b\xc2\x07\x85\x78" + "\x25\x38\x96\x4c\xb4\xbe\x17\x4a" + "\x65\xa6\xfa\x52\x9d\x66\x9d\x65" + "\x4a\xd1\x01\x01\xf0\xcb\x13\xcc" + "\xa5\x82\xf3\xf2\x66\xcd\x3f\x9d" + "\xd1\xaa\xe4\x67\xea\xf2\xad\x88" + "\x56\x76\xa7\x9b\x59\x3c\xb1\x5d" + "\x78\xfd\x69\x79\x74\x78\x43\x26" + "\x7b\xde\x3f\xf1\xf5\x4e\x14\xd9" + "\x15\xf5\x75\xb5\x2e\x19\xf3\x0c" + "\x48\x72\xd6\x71\x6d\x03\x6e\xaa" + "\xa7\x08\xf9\xaa\x70\xa3\x0f\x4d" + "\x12\x8a\xdd\xe3\x39\x73\x7e\xa7" + "\xea\x1f\x6d\x06\x26\x2a\xf2\xc5" + "\x52\xb4\xbf\xfd\x52\x0c\x06\x60" + "\x90\xd1\xb2\x7b\x56\xae\xac\x58" + "\x5a\x6b\x50\x2a\xf5\xe0\x30\x3c" + "\x2a\x98\x0f\x1b\x5b\x0a\x84\x6c" + "\x31\xae\x92\xe2\xd4\xbb\x7f\x59" + "\x26\x10\xb9\x89\x37\x68\x26\xbf" + "\x41\xc8\x49\xc4\x70\x35\x7d\xff" + "\x2d\x7f\xf6\x8a\x93\x68\x8c\x78" + "\x0d\x53\xce\x7d\xff\x7d\xfb\xae" + "\x13\x1b\x75\xc4\x78\xd7\x71\xd8" + "\xea\xd3\xf4\x9d\x95\x64\x8e\xb4" + "\xde\xb8\xe4\xa6\x68\xc8\xae\x73" + "\x58\xaf\xa8\xb0\x5a\x20\xde\x87" + "\x43\xb9\x0f\xe3\xad\x41\x4b\xd5" + "\xb7\xad\x16\x00\xa6\xff\xf6\x74" + "\xbf\x8c\x9f\xb3\x58\x1b\xb6\x55" + "\xa9\x90\x56\x28\xf0\xb5\x13\x4e" + "\x9e\xf7\x25\x86\xe0\x07\x7b\x98" + "\xd8\x60\x5d\x38\x95\x3c\xe4\x22" + "\x16\x2f\xb2\xa2\xaf\xe8\x90\x17" + "\xec\x11\x83\x1a\xf4\xa9\x26\xda" + "\x39\x72\xf5\x94\x61\x05\x51\xec" + "\xa8\x30\x8b\x2c\x13\xd0\x72\xac" + "\xb9\xd2\xa0\x4c\x4b\x78\xe8\x6e" + "\x04\x85\xe9\x04\x49\x82\x91\xff" + "\x89\xe5\xab\x4c\xaa\x37\x03\x12" + "\xca\x8b\x74\x10\xfd\x9e\xd9\x7b" + "\xcb\xdb\x82\x6e\xce\x2e\x33\x39" + "\xce\xd2\x84\x6e\x34\x71\x51\x6e" + "\x0d\xd6\x01\x87\xc7\xfa\x0a\xd3" + "\xad\x36\xf3\x4c\x9f\x96\x5e\x62" + "\x62\x54\xc3\x03\x78\xd6\xab\xdd" + "\x89\x73\x55\x25\x30\xf8\xa7\xe6" + "\x4f\x11\x0c\x7c\x0a\xa1\x2b\x7b" + "\x3d\x0d\xde\x81\xd4\x9d\x0b\xae" + "\xdf\x00\xf9\x4c\xb6\x90\x8e\x16" + "\xcb\x11\xc8\xd1\x2e\x73\x13\x75" + "\x75\x3e\xaa\xf5\xee\x02\xb3\x18" + "\xa6\x2d\xf5\x3b\x51\xd1\x1f\x47" + "\x6b\x2c\xdb\xc4\x10\xe0\xc8\xba" + "\x9d\xac\xb1\x9d\x75\xd5\x41\x0e" + "\x7e\xbe\x18\x5b\xa4\x1f\xf8\x22" + "\x4c\xc1\x68\xda\x6d\x51\x34\x6c" + "\x19\x59\xec\xb5\xb1\xec\xa7\x03" + "\xca\x54\x99\x63\x05\x6c\xb1\xac" + "\x9c\x31\xd6\xdb\xba\x7b\x14\x12" + "\x7a\xc3\x2f\xbf\x8d\xdc\x37\x46" + "\xdb\xd2\xbc\xd4\x2f\xab\x30\xd5" + "\xed\x34\x99\x8e\x83\x3e\xbe\x4c" + "\x86\x79\x58\xe0\x33\x8d\x9a\xb8" + "\xa9\xa6\x90\x46\xa2\x02\xb8\xdd" + "\xf5\xf9\x1a\x5c\x8c\x01\xaa\x6e" + "\xb4\x22\x12\xf5\x0c\x1b\x9b\x7a" + "\xc3\x80\xf3\x06\x00\x5f\x30\xd5" + "\x06\xdb\x7d\x82\xc2\xd4\x0b\x4c" + "\x5f\xe9\xc5\xf5\xdf\x97\x12\xbf" + "\x56\xaf\x9b\x69\xcd\xee\x30\xb4" + "\xa8\x71\xff\x3e\x7d\x73\x7a\xb4" + "\x0d\xa5\x46\x7a\xf3\xf4\x15\x87" + "\x5d\x93\x2b\x8c\x37\x64\xb5\xdd" + "\x48\xd1\xe5\x8c\xae\xd4\xf1\x76" + "\xda\xf4\xba\x9e\x25\x0e\xad\xa3" + "\x0d\x08\x7c\xa8\x82\x16\x8d\x90" + "\x56\x40\x16\x84\xe7\x22\x53\x3a" + "\x58\xbc\xb9\x8f\x33\xc8\xc2\x84" + "\x22\xe6\x0d\xe7\xb3\xdc\x5d\xdf" + "\xd7\x2a\x36\xe4\x16\x06\x07\xd2" + "\x97\x60\xb2\xf5\x5e\x14\xc9\xfd" + "\x8b\x05\xd1\xce\xee\x9a\x65\x99" + "\xb7\xae\x19\xb7\xc8\xbc\xd5\xa2" + "\x7b\x95\xe1\xcc\xba\x0d\xdc\x8a" + "\x1d\x59\x52\x50\xaa\x16\x02\x82" + "\xdf\x61\x33\x2e\x44\xce\x49\xc7" + "\xe5\xc6\x2e\x76\xcf\x80\x52\xf0" + "\x3d\x17\x34\x47\x3f\xd3\x80\x48" + "\xa2\xba\xd5\xc7\x7b\x02\x28\xdb" + "\xac\x44\xc7\x6e\x05\x5c\xc2\x79" + "\xb3\x7d\x6a\x47\x77\x66\xf1\x38" + "\xf0\xf5\x4f\x27\x1a\x31\xca\x6c" + "\x72\x95\x92\x8e\x3f\xb0\xec\x1d" + "\xc7\x2a\xff\x73\xee\xdf\x55\x80" + "\x93\xd2\xbd\x34\xd3\x9f\x00\x51" + "\xfb\x2e\x41\xba\x6c\x5a\x7c\x17" + "\x7f\xe6\x70\xac\x8d\x39\x3f\x77" + "\xe2\x23\xac\x8f\x72\x4e\xe4\x53" + "\xcc\xf1\x1b\xf1\x35\xfe\x52\xa4" + "\xd6\xb8\x40\x6b\xc1\xfd\xa0\xa1" + "\xf5\x46\x65\xc2\x50\xbb\x43\xe2" + "\xd1\x43\x28\x34\x74\xf5\x87\xa0" + "\xf2\x5e\x27\x3b\x59\x2b\x3e\x49" + "\xdf\x46\xee\xaf\x71\xd7\x32\x36" + "\xc7\x14\x0b\x58\x6e\x3e\x2d\x41" + "\xfa\x75\x66\x3a\x54\xe0\xb2\xb9" + "\xaf\xdd\x04\x80\x15\x19\x3f\x6f" + "\xce\x12\xb4\xd8\xe8\x89\x3c\x05" + "\x30\xeb\xf3\x3d\xcd\x27\xec\xdc" + "\x56\x70\x12\xcf\x78\x2b\x77\xbf" + "\x22\xf0\x1b\x17\x9c\xcc\xd6\x1b" + "\x2d\x3d\xa0\x3b\xd8\xc9\x70\xa4" + "\x7a\x3e\x07\xb9\x06\xc3\xfa\xb0" + "\x33\xee\xc1\xd8\xf6\xe0\xf0\xb2" + "\x61\x12\x69\xb0\x5f\x28\x99\xda" + "\xc3\x61\x48\xfa\x07\x16\x03\xc4" + "\xa8\xe1\x3c\xe8\x0e\x64\x15\x30" + "\xc1\x9d\x84\x2f\x73\x98\x0e\x3a" + "\xf2\x86\x21\xa4\x9e\x1d\xb5\x86" + "\x16\xdb\x2b\x9a\x06\x64\x8e\x79" + "\x8d\x76\x3e\xc3\xc2\x64\x44\xe3" + "\xda\xbc\x1a\x52\xd7\x61\x03\x65" + "\x54\x32\x77\x01\xed\x9d\x8a\x43" + "\x25\x24\xe3\xc1\xbe\xb8\x2f\xcb" + "\x89\x14\x64\xab\xf6\xa0\x6e\x02" + "\x57\xe4\x7d\xa9\x4e\x9a\x03\x36" + "\xad\xf1\xb1\xfc\x0b\xe6\x79\x51" + "\x9f\x81\x77\xc4\x14\x78\x9d\xbf" + "\xb6\xd6\xa3\x8c\xba\x0b\x26\xe7" + "\xc8\xb9\x5c\xcc\xe1\x5f\xd5\xc6" + "\xc4\xca\xc2\xa3\x45\xba\x94\x13" + "\xb2\x8f\xc3\x54\x01\x09\xe7\x8b" + "\xda\x2a\x0a\x11\x02\x43\xcb\x57" + "\xc9\xcc\xb5\x5c\xab\xc4\xec\x54" + "\x00\x06\x34\xe1\x6e\x03\x89\x7c" + "\xc6\xfb\x6a\xc7\x60\x43\xd6\xc5" + "\xb5\x68\x72\x89\x8f\x42\xc3\x74" + "\xbd\x25\xaa\x9f\x67\xb5\xdf\x26" + "\x20\xe8\xb7\x01\x3c\xe4\x77\xce" + "\xc4\x65\xa7\x23\x79\xea\x33\xc7" + "\x82\x14\x5c\x82\xf2\x4e\x3d\xf6" + "\xc6\x4a\x0e\x29\xbb\xec\x44\xcd" + "\x2f\xd1\x4f\x21\x71\xa9\xce\x0f" + "\x5c\xf2\x72\x5c\x08\x2e\x21\xd2" + "\xc3\x29\x13\xd8\xac\xc3\xda\x13" + "\x1a\x9d\xa7\x71\x1d\x27\x1d\x27" + "\x1d\xea\xab\x44\x79\xad\xe5\xeb" + "\xef\x1f\x22\x0a\x44\x4f\xcb\x87" + "\xa7\x58\x71\x0e\x66\xf8\x60\xbf" + "\x60\x74\x4a\xb4\xec\x2e\xfe\xd3" + "\xf5\xb8\xfe\x46\x08\x50\x99\x6c" + "\x66\xa5\xa8\x34\x44\xb5\xe5\xf0" + "\xdd\x2c\x67\x4e\x35\x96\x8e\x67" + "\x48\x3f\x5f\x37\x44\x60\x51\x2e" + "\x14\x91\x5e\x57\xc3\x0e\x79\x77" + "\x2f\x03\xf4\xe2\x1c\x72\xbf\x85" + "\x5d\xd3\x17\xdf\x6c\xc5\x70\x24" + "\x42\xdf\x51\x4e\x2a\xb2\xd2\x5b" + "\x9e\x69\x83\x41\x11\xfe\x73\x22" + "\xde\x8a\x9e\xd8\x8a\xfb\x20\x38" + "\xd8\x47\x6f\xd5\xed\x8f\x41\xfd" + "\x13\x7a\x18\x03\x7d\x0f\xcd\x7d" + "\xa6\x7d\x31\x9e\xf1\x8f\x30\xa3" + "\x8b\x4c\x24\xb7\xf5\x48\xd7\xd9" + "\x12\xe7\x84\x97\x5c\x31\x6d\xfb" + "\xdf\xf3\xd3\xd1\xd5\x0c\x30\x06" + "\x01\x6a\xbc\x6c\x78\x7b\xa6\x50" + "\xfa\x0f\x3c\x42\x2d\xa5\xa3\x3b" + "\xcf\x62\x50\xff\x71\x6d\xe7\xda" + "\x27\xab\xc6\x67\x16\x65\x68\x64" + "\xc7\xd5\x5f\x81\xa9\xf6\x65\xb3" + "\x5e\x43\x91\x16\xcd\x3d\x55\x37" + "\x55\xb3\xf0\x28\xc5\x54\x19\xc0" + "\xe0\xd6\x2a\x61\xd4\xc8\x72\x51" + "\xe9\xa1\x7b\x48\x21\xad\x44\x09" + "\xe4\x01\x61\x3c\x8a\x5b\xf9\xa1" + "\x6e\x1b\xdf\xc0\x04\xa8\x8b\xf2" + "\x21\xbe\x34\x7b\xfc\xa1\xcd\xc9" + "\xa9\x96\xf4\xa4\x4c\xf7\x4e\x8f" + "\x84\xcc\xd3\xa8\x92\x77\x8f\x36" + "\xe2\x2e\x8c\x33\xe8\x84\xa6\x0c" + "\x6c\x8a\xda\x14\x32\xc2\x96\xff" + "\xc6\x4a\xc2\x9b\x30\x7f\xd1\x29" + "\xc0\xd5\x78\x41\x00\x80\x80\x03" + "\x2a\xb1\xde\x26\x03\x48\x49\xee" + "\x57\x14\x76\x51\x3c\x36\x5d\x0a" + "\x5c\x9f\xe8\xd8\x53\xdb\x4f\xd4" + "\x38\xbf\x66\xc9\x75\x12\x18\x75" + "\x34\x2d\x93\x22\x96\x51\x24\x6e" + "\x4e\xd9\x30\xea\x67\xff\x92\x1c" + "\x16\x26\xe9\xb5\x33\xab\x8c\x22" + "\x47\xdb\xa0\x2c\x08\xf0\x12\x69" + "\x7e\x93\x52\xda\xa5\xe5\xca\xc1" + "\x0f\x55\x2a\xbd\x09\x30\x88\x1b" + "\x9c\xc6\x9f\xe6\xdb\xa6\x92\xeb" + "\xf4\xbd\x5c\xc4\xdb\xc6\x71\x09" + "\xab\x5e\x48\x0c\xed\x6f\xda\x8e" + "\x8d\x0c\x98\x71\x7d\x10\xd0\x9c" + "\x20\x9b\x79\x53\x26\x5d\xb9\x85" + "\x8a\x31\xb8\xc5\x1c\x97\xde\x88" + "\x61\x55\x7f\x7c\x21\x06\xea\xc4" + "\x5f\xaf\xf2\xf0\xd5\x5e\x7d\xb4" + "\x6e\xcf\xe9\xae\x1b\x0e\x11\x80" + "\xc1\x9a\x74\x7e\x52\x6f\xa0\xb7" + "\x24\xcd\x8d\x0a\x11\x40\x63\x72" + "\xfa\xe2\xc5\xb3\x94\xef\x29\xa2" + "\x1a\x23\x43\x04\x37\x55\x0d\xe9" + "\x83\xb2\x29\x51\x49\x64\xa0\xbd" + "\xde\x73\xfd\xa5\x7c\x95\x70\x62" + "\x58\xdc\xe2\xd0\xbf\x98\xf5\x8a" + "\x6a\xfd\xce\xa8\x0e\x42\x2a\xeb" + "\xd2\xff\x83\x27\x53\x5c\xa0\x6e" + "\x93\xef\xe2\xb9\x5d\x35\xd6\x98" + "\xf6\x71\x19\x7a\x54\xa1\xa7\xe8" + "\x09\xfe\xf6\x9e\xc7\xbd\x3e\x29" + "\xbd\x6b\x17\xf4\xe7\x3e\x10\x5c" + "\xc1\xd2\x59\x4f\x4b\x12\x1a\x5b" + "\x50\x80\x59\xb9\xec\x13\x66\xa8" + "\xd2\x31\x7b\x6a\x61\x22\xdd\x7d" + "\x61\xee\x87\x16\x46\x9f\xf9\xc7" + "\x41\xee\x74\xf8\xd0\x96\x2c\x76" + "\x2a\xac\x7d\x6e\x9f\x0e\x7f\x95" + "\xfe\x50\x16\xb2\x23\xca\x62\xd5" + "\x68\xcf\x07\x3f\x3f\x97\x85\x2a" + "\x0c\x25\x45\xba\xdb\x32\xcb\x83" + "\x8c\x4f\xe0\x6d\x9a\x99\xf9\xc9" + "\xda\xd4\x19\x31\xc1\x7c\x6d\xd9" + "\x9c\x56\xd3\xec\xc1\x81\x4c\xed" + "\x28\x9d\x87\xeb\x19\xd7\x1a\x4f" + "\x04\x6a\xcb\x1f\xcf\x1f\xa2\x16" + "\xfc\x2a\x0d\xa1\x14\x2d\xfa\xc5" + "\x5a\xd2\xc5\xf9\x19\x7c\x20\x1f" + "\x2d\x10\xc0\x66\x7c\xd9\x2d\xe5" + "\x88\x70\x59\xa7\x85\xd5\x2e\x7c" + "\x5c\xe3\xb7\x12\xd6\x97\x3f\x29", + .psize = 2048, + .digest = "\x37\x90\x92\xc2\xeb\x01\x87\xd9" + "\x95\xc7\x91\xc3\x17\x8b\x38\x52", + } +}; + + /* * DES test vectors. */ @@ -11449,6 +12797,82 @@ static const struct cipher_testvec aes_cbc_tv_template[] = { }, }; +static const struct cipher_testvec aes_cfb_tv_template[] = { + { /* From NIST SP800-38A */ + .key = "\x2b\x7e\x15\x16\x28\xae\xd2\xa6" + "\xab\xf7\x15\x88\x09\xcf\x4f\x3c", + .klen = 16, + .iv = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + .ptext = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96" + "\xe9\x3d\x7e\x11\x73\x93\x17\x2a" + "\xae\x2d\x8a\x57\x1e\x03\xac\x9c" + "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" + "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11" + "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef" + "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17" + "\xad\x2b\x41\x7b\xe6\x6c\x37\x10", + .ctext = "\x3b\x3f\xd9\x2e\xb7\x2d\xad\x20" + "\x33\x34\x49\xf8\xe8\x3c\xfb\x4a" + "\xc8\xa6\x45\x37\xa0\xb3\xa9\x3f" + "\xcd\xe3\xcd\xad\x9f\x1c\xe5\x8b" + "\x26\x75\x1f\x67\xa3\xcb\xb1\x40" + "\xb1\x80\x8c\xf1\x87\xa4\xf4\xdf" + "\xc0\x4b\x05\x35\x7c\x5d\x1c\x0e" + "\xea\xc4\xc6\x6f\x9f\xf7\xf2\xe6", + .len = 64, + }, { + .key = "\x8e\x73\xb0\xf7\xda\x0e\x64\x52" + "\xc8\x10\xf3\x2b\x80\x90\x79\xe5" + "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b", + .klen = 24, + .iv = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + .ptext = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96" + "\xe9\x3d\x7e\x11\x73\x93\x17\x2a" + "\xae\x2d\x8a\x57\x1e\x03\xac\x9c" + "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" + "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11" + "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef" + "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17" + "\xad\x2b\x41\x7b\xe6\x6c\x37\x10", + .ctext = "\xcd\xc8\x0d\x6f\xdd\xf1\x8c\xab" + "\x34\xc2\x59\x09\xc9\x9a\x41\x74" + "\x67\xce\x7f\x7f\x81\x17\x36\x21" + "\x96\x1a\x2b\x70\x17\x1d\x3d\x7a" + "\x2e\x1e\x8a\x1d\xd5\x9b\x88\xb1" + "\xc8\xe6\x0f\xed\x1e\xfa\xc4\xc9" + "\xc0\x5f\x9f\x9c\xa9\x83\x4f\xa0" + "\x42\xae\x8f\xba\x58\x4b\x09\xff", + .len = 64, + }, { + .key = "\x60\x3d\xeb\x10\x15\xca\x71\xbe" + "\x2b\x73\xae\xf0\x85\x7d\x77\x81" + "\x1f\x35\x2c\x07\x3b\x61\x08\xd7" + "\x2d\x98\x10\xa3\x09\x14\xdf\xf4", + .klen = 32, + .iv = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + .ptext = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96" + "\xe9\x3d\x7e\x11\x73\x93\x17\x2a" + "\xae\x2d\x8a\x57\x1e\x03\xac\x9c" + "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" + "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11" + "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef" + "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17" + "\xad\x2b\x41\x7b\xe6\x6c\x37\x10", + .ctext = "\xdc\x7e\x84\xbf\xda\x79\x16\x4b" + "\x7e\xcd\x84\x86\x98\x5d\x38\x60" + "\x39\xff\xed\x14\x3b\x28\xb1\xc8" + "\x32\x11\x3c\x63\x31\xe5\x40\x7b" + "\xdf\x10\x13\x24\x15\xe5\x4b\x92" + "\xa1\x3e\xd0\xa8\x26\x7a\xe2\xf9" + "\x75\xa3\x85\x74\x1a\xb9\xce\xf8" + "\x20\x31\x62\x3d\x55\xb1\xe4\x71", + .len = 64, + }, +}; + static const struct aead_testvec hmac_md5_ecb_cipher_null_enc_tv_template[] = { { /* Input data from RFC 2410 Case 1 */ #ifdef __LITTLE_ENDIAN @@ -30802,6 +32226,1794 @@ static const struct cipher_testvec chacha20_tv_template[] = { }, }; +static const struct cipher_testvec xchacha20_tv_template[] = { + { /* from libsodium test/default/xchacha20.c */ + .key = "\x79\xc9\x97\x98\xac\x67\x30\x0b" + "\xbb\x27\x04\xc9\x5c\x34\x1e\x32" + "\x45\xf3\xdc\xb2\x17\x61\xb9\x8e" + "\x52\xff\x45\xb2\x4f\x30\x4f\xc4", + .klen = 32, + .iv = "\xb3\x3f\xfd\x30\x96\x47\x9b\xcf" + "\xbc\x9a\xee\x49\x41\x76\x88\xa0" + "\xa2\x55\x4f\x8d\x95\x38\x94\x19" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .ptext = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00", + .ctext = "\xc6\xe9\x75\x81\x60\x08\x3a\xc6" + "\x04\xef\x90\xe7\x12\xce\x6e\x75" + "\xd7\x79\x75\x90\x74\x4e\x0c\xf0" + "\x60\xf0\x13\x73\x9c", + .len = 29, + }, { /* from libsodium test/default/xchacha20.c */ + .key = "\x9d\x23\xbd\x41\x49\xcb\x97\x9c" + "\xcf\x3c\x5c\x94\xdd\x21\x7e\x98" + "\x08\xcb\x0e\x50\xcd\x0f\x67\x81" + "\x22\x35\xea\xaf\x60\x1d\x62\x32", + .klen = 32, + .iv = "\xc0\x47\x54\x82\x66\xb7\xc3\x70" + "\xd3\x35\x66\xa2\x42\x5c\xbf\x30" + "\xd8\x2d\x1e\xaf\x52\x94\x10\x9e" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .ptext = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00", + .ctext = "\xa2\x12\x09\x09\x65\x94\xde\x8c" + "\x56\x67\xb1\xd1\x3a\xd9\x3f\x74" + "\x41\x06\xd0\x54\xdf\x21\x0e\x47" + "\x82\xcd\x39\x6f\xec\x69\x2d\x35" + "\x15\xa2\x0b\xf3\x51\xee\xc0\x11" + "\xa9\x2c\x36\x78\x88\xbc\x46\x4c" + "\x32\xf0\x80\x7a\xcd\x6c\x20\x3a" + "\x24\x7e\x0d\xb8\x54\x14\x84\x68" + "\xe9\xf9\x6b\xee\x4c\xf7\x18\xd6" + "\x8d\x5f\x63\x7c\xbd\x5a\x37\x64" + "\x57\x78\x8e\x6f\xae\x90\xfc\x31" + "\x09\x7c\xfc", + .len = 91, + }, { /* Taken from the ChaCha20 test vectors, appended 12 random bytes + to the nonce, zero-padded the stream position from 4 to 8 bytes, + and recomputed the ciphertext using libsodium's XChaCha20 */ + .key = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x67\xc6\x69\x73" + "\x51\xff\x4a\xec\x29\xcd\xba\xab" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .ptext = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .ctext = "\x9c\x49\x2a\xe7\x8a\x2f\x93\xc7" + "\xb3\x33\x6f\x82\x17\xd8\xc4\x1e" + "\xad\x80\x11\x11\x1d\x4c\x16\x18" + "\x07\x73\x9b\x4f\xdb\x7c\xcb\x47" + "\xfd\xef\x59\x74\xfa\x3f\xe5\x4c" + "\x9b\xd0\xea\xbc\xba\x56\xad\x32" + "\x03\xdc\xf8\x2b\xc1\xe1\x75\x67" + "\x23\x7b\xe6\xfc\xd4\x03\x86\x54", + .len = 64, + }, { /* Derived from a ChaCha20 test vector, via the process above */ + .key = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x01", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x02\xf2\xfb\xe3\x46" + "\x7c\xc2\x54\xf8\x1b\xe8\xe7\x8d" + "\x01\x00\x00\x00\x00\x00\x00\x00", + .ptext = "\x41\x6e\x79\x20\x73\x75\x62\x6d" + "\x69\x73\x73\x69\x6f\x6e\x20\x74" + "\x6f\x20\x74\x68\x65\x20\x49\x45" + "\x54\x46\x20\x69\x6e\x74\x65\x6e" + "\x64\x65\x64\x20\x62\x79\x20\x74" + "\x68\x65\x20\x43\x6f\x6e\x74\x72" + "\x69\x62\x75\x74\x6f\x72\x20\x66" + "\x6f\x72\x20\x70\x75\x62\x6c\x69" + "\x63\x61\x74\x69\x6f\x6e\x20\x61" + "\x73\x20\x61\x6c\x6c\x20\x6f\x72" + "\x20\x70\x61\x72\x74\x20\x6f\x66" + "\x20\x61\x6e\x20\x49\x45\x54\x46" + "\x20\x49\x6e\x74\x65\x72\x6e\x65" + "\x74\x2d\x44\x72\x61\x66\x74\x20" + "\x6f\x72\x20\x52\x46\x43\x20\x61" + "\x6e\x64\x20\x61\x6e\x79\x20\x73" + "\x74\x61\x74\x65\x6d\x65\x6e\x74" + "\x20\x6d\x61\x64\x65\x20\x77\x69" + "\x74\x68\x69\x6e\x20\x74\x68\x65" + "\x20\x63\x6f\x6e\x74\x65\x78\x74" + "\x20\x6f\x66\x20\x61\x6e\x20\x49" + "\x45\x54\x46\x20\x61\x63\x74\x69" + "\x76\x69\x74\x79\x20\x69\x73\x20" + "\x63\x6f\x6e\x73\x69\x64\x65\x72" + "\x65\x64\x20\x61\x6e\x20\x22\x49" + "\x45\x54\x46\x20\x43\x6f\x6e\x74" + "\x72\x69\x62\x75\x74\x69\x6f\x6e" + "\x22\x2e\x20\x53\x75\x63\x68\x20" + "\x73\x74\x61\x74\x65\x6d\x65\x6e" + "\x74\x73\x20\x69\x6e\x63\x6c\x75" + "\x64\x65\x20\x6f\x72\x61\x6c\x20" + "\x73\x74\x61\x74\x65\x6d\x65\x6e" + "\x74\x73\x20\x69\x6e\x20\x49\x45" + "\x54\x46\x20\x73\x65\x73\x73\x69" + "\x6f\x6e\x73\x2c\x20\x61\x73\x20" + "\x77\x65\x6c\x6c\x20\x61\x73\x20" + "\x77\x72\x69\x74\x74\x65\x6e\x20" + "\x61\x6e\x64\x20\x65\x6c\x65\x63" + "\x74\x72\x6f\x6e\x69\x63\x20\x63" + "\x6f\x6d\x6d\x75\x6e\x69\x63\x61" + "\x74\x69\x6f\x6e\x73\x20\x6d\x61" + "\x64\x65\x20\x61\x74\x20\x61\x6e" + "\x79\x20\x74\x69\x6d\x65\x20\x6f" + "\x72\x20\x70\x6c\x61\x63\x65\x2c" + "\x20\x77\x68\x69\x63\x68\x20\x61" + "\x72\x65\x20\x61\x64\x64\x72\x65" + "\x73\x73\x65\x64\x20\x74\x6f", + .ctext = "\xf9\xab\x7a\x4a\x60\xb8\x5f\xa0" + "\x50\xbb\x57\xce\xef\x8c\xc1\xd9" + "\x24\x15\xb3\x67\x5e\x7f\x01\xf6" + "\x1c\x22\xf6\xe5\x71\xb1\x43\x64" + "\x63\x05\xd5\xfc\x5c\x3d\xc0\x0e" + "\x23\xef\xd3\x3b\xd9\xdc\x7f\xa8" + "\x58\x26\xb3\xd0\xc2\xd5\x04\x3f" + "\x0a\x0e\x8f\x17\xe4\xcd\xf7\x2a" + "\xb4\x2c\x09\xe4\x47\xec\x8b\xfb" + "\x59\x37\x7a\xa1\xd0\x04\x7e\xaa" + "\xf1\x98\x5f\x24\x3d\x72\x9a\x43" + "\xa4\x36\x51\x92\x22\x87\xff\x26" + "\xce\x9d\xeb\x59\x78\x84\x5e\x74" + "\x97\x2e\x63\xc0\xef\x29\xf7\x8a" + "\xb9\xee\x35\x08\x77\x6a\x35\x9a" + "\x3e\xe6\x4f\x06\x03\x74\x1b\xc1" + "\x5b\xb3\x0b\x89\x11\x07\xd3\xb7" + "\x53\xd6\x25\x04\xd9\x35\xb4\x5d" + "\x4c\x33\x5a\xc2\x42\x4c\xe6\xa4" + "\x97\x6e\x0e\xd2\xb2\x8b\x2f\x7f" + "\x28\xe5\x9f\xac\x4b\x2e\x02\xab" + "\x85\xfa\xa9\x0d\x7c\x2d\x10\xe6" + "\x91\xab\x55\x63\xf0\xde\x3a\x94" + "\x25\x08\x10\x03\xc2\x68\xd1\xf4" + "\xaf\x7d\x9c\x99\xf7\x86\x96\x30" + "\x60\xfc\x0b\xe6\xa8\x80\x15\xb0" + "\x81\xb1\x0c\xbe\xb9\x12\x18\x25" + "\xe9\x0e\xb1\xe7\x23\xb2\xef\x4a" + "\x22\x8f\xc5\x61\x89\xd4\xe7\x0c" + "\x64\x36\x35\x61\xb6\x34\x60\xf7" + "\x7b\x61\x37\x37\x12\x10\xa2\xf6" + "\x7e\xdb\x7f\x39\x3f\xb6\x8e\x89" + "\x9e\xf3\xfe\x13\x98\xbb\x66\x5a" + "\xec\xea\xab\x3f\x9c\x87\xc4\x8c" + "\x8a\x04\x18\x49\xfc\x77\x11\x50" + "\x16\xe6\x71\x2b\xee\xc0\x9c\xb6" + "\x87\xfd\x80\xff\x0b\x1d\x73\x38" + "\xa4\x1d\x6f\xae\xe4\x12\xd7\x93" + "\x9d\xcd\x38\x26\x09\x40\x52\xcd" + "\x67\x01\x67\x26\xe0\x3e\x98\xa8" + "\xe8\x1a\x13\x41\xbb\x90\x4d\x87" + "\xbb\x42\x82\x39\xce\x3a\xd0\x18" + "\x6d\x7b\x71\x8f\xbb\x2c\x6a\xd1" + "\xbd\xf5\xc7\x8a\x7e\xe1\x1e\x0f" + "\x0d\x0d\x13\x7c\xd9\xd8\x3c\x91" + "\xab\xff\x1f\x12\xc3\xee\xe5\x65" + "\x12\x8d\x7b\x61\xe5\x1f\x98", + .len = 375, + .also_non_np = 1, + .np = 3, + .tap = { 375 - 20, 4, 16 }, + + }, { /* Derived from a ChaCha20 test vector, via the process above */ + .key = "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a" + "\xf3\x33\x88\x86\x04\xf6\xb5\xf0" + "\x47\x39\x17\xc1\x40\x2b\x80\x09" + "\x9d\xca\x5c\xbc\x20\x70\x75\xc0", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x02\x76\x5a\x2e\x63" + "\x33\x9f\xc9\x9a\x66\x32\x0d\xb7" + "\x2a\x00\x00\x00\x00\x00\x00\x00", + .ptext = "\x27\x54\x77\x61\x73\x20\x62\x72" + "\x69\x6c\x6c\x69\x67\x2c\x20\x61" + "\x6e\x64\x20\x74\x68\x65\x20\x73" + "\x6c\x69\x74\x68\x79\x20\x74\x6f" + "\x76\x65\x73\x0a\x44\x69\x64\x20" + "\x67\x79\x72\x65\x20\x61\x6e\x64" + "\x20\x67\x69\x6d\x62\x6c\x65\x20" + "\x69\x6e\x20\x74\x68\x65\x20\x77" + "\x61\x62\x65\x3a\x0a\x41\x6c\x6c" + "\x20\x6d\x69\x6d\x73\x79\x20\x77" + "\x65\x72\x65\x20\x74\x68\x65\x20" + "\x62\x6f\x72\x6f\x67\x6f\x76\x65" + "\x73\x2c\x0a\x41\x6e\x64\x20\x74" + "\x68\x65\x20\x6d\x6f\x6d\x65\x20" + "\x72\x61\x74\x68\x73\x20\x6f\x75" + "\x74\x67\x72\x61\x62\x65\x2e", + .ctext = "\x95\xb9\x51\xe7\x8f\xb4\xa4\x03" + "\xca\x37\xcc\xde\x60\x1d\x8c\xe2" + "\xf1\xbb\x8a\x13\x7f\x61\x85\xcc" + "\xad\xf4\xf0\xdc\x86\xa6\x1e\x10" + "\xbc\x8e\xcb\x38\x2b\xa5\xc8\x8f" + "\xaa\x03\x3d\x53\x4a\x42\xb1\x33" + "\xfc\xd3\xef\xf0\x8e\x7e\x10\x9c" + "\x6f\x12\x5e\xd4\x96\xfe\x5b\x08" + "\xb6\x48\xf0\x14\x74\x51\x18\x7c" + "\x07\x92\xfc\xac\x9d\xf1\x94\xc0" + "\xc1\x9d\xc5\x19\x43\x1f\x1d\xbb" + "\x07\xf0\x1b\x14\x25\x45\xbb\xcb" + "\x5c\xe2\x8b\x28\xf3\xcf\x47\x29" + "\x27\x79\x67\x24\xa6\x87\xc2\x11" + "\x65\x03\xfa\x45\xf7\x9e\x53\x7a" + "\x99\xf1\x82\x25\x4f\x8d\x07", + .len = 127, + }, { /* Derived from a ChaCha20 test vector, via the process above */ + .key = "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a" + "\xf3\x33\x88\x86\x04\xf6\xb5\xf0" + "\x47\x39\x17\xc1\x40\x2b\x80\x09" + "\x9d\xca\x5c\xbc\x20\x70\x75\xc0", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x01\x31\x58\xa3\x5a" + "\x25\x5d\x05\x17\x58\xe9\x5e\xd4" + "\x1c\x00\x00\x00\x00\x00\x00\x00", + .ptext = "\x49\xee\xe0\xdc\x24\x90\x40\xcd" + "\xc5\x40\x8f\x47\x05\xbc\xdd\x81" + "\x47\xc6\x8d\xe6\xb1\x8f\xd7\xcb" + "\x09\x0e\x6e\x22\x48\x1f\xbf\xb8" + "\x5c\xf7\x1e\x8a\xc1\x23\xf2\xd4" + "\x19\x4b\x01\x0f\x4e\xa4\x43\xce" + "\x01\xc6\x67\xda\x03\x91\x18\x90" + "\xa5\xa4\x8e\x45\x03\xb3\x2d\xac" + "\x74\x92\xd3\x53\x47\xc8\xdd\x25" + "\x53\x6c\x02\x03\x87\x0d\x11\x0c" + "\x58\xe3\x12\x18\xfd\x2a\x5b\x40" + "\x0c\x30\xf0\xb8\x3f\x43\xce\xae" + "\x65\x3a\x7d\x7c\xf4\x54\xaa\xcc" + "\x33\x97\xc3\x77\xba\xc5\x70\xde" + "\xd7\xd5\x13\xa5\x65\xc4\x5f\x0f" + "\x46\x1a\x0d\x97\xb5\xf3\xbb\x3c" + "\x84\x0f\x2b\xc5\xaa\xea\xf2\x6c" + "\xc9\xb5\x0c\xee\x15\xf3\x7d\xbe" + "\x9f\x7b\x5a\xa6\xae\x4f\x83\xb6" + "\x79\x49\x41\xf4\x58\x18\xcb\x86" + "\x7f\x30\x0e\xf8\x7d\x44\x36\xea" + "\x75\xeb\x88\x84\x40\x3c\xad\x4f" + "\x6f\x31\x6b\xaa\x5d\xe5\xa5\xc5" + "\x21\x66\xe9\xa7\xe3\xb2\x15\x88" + "\x78\xf6\x79\xa1\x59\x47\x12\x4e" + "\x9f\x9f\x64\x1a\xa0\x22\x5b\x08" + "\xbe\x7c\x36\xc2\x2b\x66\x33\x1b" + "\xdd\x60\x71\xf7\x47\x8c\x61\xc3" + "\xda\x8a\x78\x1e\x16\xfa\x1e\x86" + "\x81\xa6\x17\x2a\xa7\xb5\xc2\xe7" + "\xa4\xc7\x42\xf1\xcf\x6a\xca\xb4" + "\x45\xcf\xf3\x93\xf0\xe7\xea\xf6" + "\xf4\xe6\x33\x43\x84\x93\xa5\x67" + "\x9b\x16\x58\x58\x80\x0f\x2b\x5c" + "\x24\x74\x75\x7f\x95\x81\xb7\x30" + "\x7a\x33\xa7\xf7\x94\x87\x32\x27" + "\x10\x5d\x14\x4c\x43\x29\xdd\x26" + "\xbd\x3e\x3c\x0e\xfe\x0e\xa5\x10" + "\xea\x6b\x64\xfd\x73\xc6\xed\xec" + "\xa8\xc9\xbf\xb3\xba\x0b\x4d\x07" + "\x70\xfc\x16\xfd\x79\x1e\xd7\xc5" + "\x49\x4e\x1c\x8b\x8d\x79\x1b\xb1" + "\xec\xca\x60\x09\x4c\x6a\xd5\x09" + "\x49\x46\x00\x88\x22\x8d\xce\xea" + "\xb1\x17\x11\xde\x42\xd2\x23\xc1" + "\x72\x11\xf5\x50\x73\x04\x40\x47" + "\xf9\x5d\xe7\xa7\x26\xb1\x7e\xb0" + "\x3f\x58\xc1\x52\xab\x12\x67\x9d" + "\x3f\x43\x4b\x68\xd4\x9c\x68\x38" + "\x07\x8a\x2d\x3e\xf3\xaf\x6a\x4b" + "\xf9\xe5\x31\x69\x22\xf9\xa6\x69" + "\xc6\x9c\x96\x9a\x12\x35\x95\x1d" + "\x95\xd5\xdd\xbe\xbf\x93\x53\x24" + "\xfd\xeb\xc2\x0a\x64\xb0\x77\x00" + "\x6f\x88\xc4\x37\x18\x69\x7c\xd7" + "\x41\x92\x55\x4c\x03\xa1\x9a\x4b" + "\x15\xe5\xdf\x7f\x37\x33\x72\xc1" + "\x8b\x10\x67\xa3\x01\x57\x94\x25" + "\x7b\x38\x71\x7e\xdd\x1e\xcc\x73" + "\x55\xd2\x8e\xeb\x07\xdd\xf1\xda" + "\x58\xb1\x47\x90\xfe\x42\x21\x72" + "\xa3\x54\x7a\xa0\x40\xec\x9f\xdd" + "\xc6\x84\x6e\xca\xae\xe3\x68\xb4" + "\x9d\xe4\x78\xff\x57\xf2\xf8\x1b" + "\x03\xa1\x31\xd9\xde\x8d\xf5\x22" + "\x9c\xdd\x20\xa4\x1e\x27\xb1\x76" + "\x4f\x44\x55\xe2\x9b\xa1\x9c\xfe" + "\x54\xf7\x27\x1b\xf4\xde\x02\xf5" + "\x1b\x55\x48\x5c\xdc\x21\x4b\x9e" + "\x4b\x6e\xed\x46\x23\xdc\x65\xb2" + "\xcf\x79\x5f\x28\xe0\x9e\x8b\xe7" + "\x4c\x9d\x8a\xff\xc1\xa6\x28\xb8" + "\x65\x69\x8a\x45\x29\xef\x74\x85" + "\xde\x79\xc7\x08\xae\x30\xb0\xf4" + "\xa3\x1d\x51\x41\xab\xce\xcb\xf6" + "\xb5\xd8\x6d\xe0\x85\xe1\x98\xb3" + "\x43\xbb\x86\x83\x0a\xa0\xf5\xb7" + "\x04\x0b\xfa\x71\x1f\xb0\xf6\xd9" + "\x13\x00\x15\xf0\xc7\xeb\x0d\x5a" + "\x9f\xd7\xb9\x6c\x65\x14\x22\x45" + "\x6e\x45\x32\x3e\x7e\x60\x1a\x12" + "\x97\x82\x14\xfb\xaa\x04\x22\xfa" + "\xa0\xe5\x7e\x8c\x78\x02\x48\x5d" + "\x78\x33\x5a\x7c\xad\xdb\x29\xce" + "\xbb\x8b\x61\xa4\xb7\x42\xe2\xac" + "\x8b\x1a\xd9\x2f\x0b\x8b\x62\x21" + "\x83\x35\x7e\xad\x73\xc2\xb5\x6c" + "\x10\x26\x38\x07\xe5\xc7\x36\x80" + "\xe2\x23\x12\x61\xf5\x48\x4b\x2b" + "\xc5\xdf\x15\xd9\x87\x01\xaa\xac" + "\x1e\x7c\xad\x73\x78\x18\x63\xe0" + "\x8b\x9f\x81\xd8\x12\x6a\x28\x10" + "\xbe\x04\x68\x8a\x09\x7c\x1b\x1c" + "\x83\x66\x80\x47\x80\xe8\xfd\x35" + "\x1c\x97\x6f\xae\x49\x10\x66\xcc" + "\xc6\xd8\xcc\x3a\x84\x91\x20\x77" + "\x72\xe4\x24\xd2\x37\x9f\xc5\xc9" + "\x25\x94\x10\x5f\x40\x00\x64\x99" + "\xdc\xae\xd7\x21\x09\x78\x50\x15" + "\xac\x5f\xc6\x2c\xa2\x0b\xa9\x39" + "\x87\x6e\x6d\xab\xde\x08\x51\x16" + "\xc7\x13\xe9\xea\xed\x06\x8e\x2c" + "\xf8\x37\x8c\xf0\xa6\x96\x8d\x43" + "\xb6\x98\x37\xb2\x43\xed\xde\xdf" + "\x89\x1a\xe7\xeb\x9d\xa1\x7b\x0b" + "\x77\xb0\xe2\x75\xc0\xf1\x98\xd9" + "\x80\x55\xc9\x34\x91\xd1\x59\xe8" + "\x4b\x0f\xc1\xa9\x4b\x7a\x84\x06" + "\x20\xa8\x5d\xfa\xd1\xde\x70\x56" + "\x2f\x9e\x91\x9c\x20\xb3\x24\xd8" + "\x84\x3d\xe1\x8c\x7e\x62\x52\xe5" + "\x44\x4b\x9f\xc2\x93\x03\xea\x2b" + "\x59\xc5\xfa\x3f\x91\x2b\xbb\x23" + "\xf5\xb2\x7b\xf5\x38\xaf\xb3\xee" + "\x63\xdc\x7b\xd1\xff\xaa\x8b\xab" + "\x82\x6b\x37\x04\xeb\x74\xbe\x79" + "\xb9\x83\x90\xef\x20\x59\x46\xff" + "\xe9\x97\x3e\x2f\xee\xb6\x64\x18" + "\x38\x4c\x7a\x4a\xf9\x61\xe8\x9a" + "\xa1\xb5\x01\xa6\x47\xd3\x11\xd4" + "\xce\xd3\x91\x49\x88\xc7\xb8\x4d" + "\xb1\xb9\x07\x6d\x16\x72\xae\x46" + "\x5e\x03\xa1\x4b\xb6\x02\x30\xa8" + "\x3d\xa9\x07\x2a\x7c\x19\xe7\x62" + "\x87\xe3\x82\x2f\x6f\xe1\x09\xd9" + "\x94\x97\xea\xdd\x58\x9e\xae\x76" + "\x7e\x35\xe5\xb4\xda\x7e\xf4\xde" + "\xf7\x32\x87\xcd\x93\xbf\x11\x56" + "\x11\xbe\x08\x74\xe1\x69\xad\xe2" + "\xd7\xf8\x86\x75\x8a\x3c\xa4\xbe" + "\x70\xa7\x1b\xfc\x0b\x44\x2a\x76" + "\x35\xea\x5d\x85\x81\xaf\x85\xeb" + "\xa0\x1c\x61\xc2\xf7\x4f\xa5\xdc" + "\x02\x7f\xf6\x95\x40\x6e\x8a\x9a" + "\xf3\x5d\x25\x6e\x14\x3a\x22\xc9" + "\x37\x1c\xeb\x46\x54\x3f\xa5\x91" + "\xc2\xb5\x8c\xfe\x53\x08\x97\x32" + "\x1b\xb2\x30\x27\xfe\x25\x5d\xdc" + "\x08\x87\xd0\xe5\x94\x1a\xd4\xf1" + "\xfe\xd6\xb4\xa3\xe6\x74\x81\x3c" + "\x1b\xb7\x31\xa7\x22\xfd\xd4\xdd" + "\x20\x4e\x7c\x51\xb0\x60\x73\xb8" + "\x9c\xac\x91\x90\x7e\x01\xb0\xe1" + "\x8a\x2f\x75\x1c\x53\x2a\x98\x2a" + "\x06\x52\x95\x52\xb2\xe9\x25\x2e" + "\x4c\xe2\x5a\x00\xb2\x13\x81\x03" + "\x77\x66\x0d\xa5\x99\xda\x4e\x8c" + "\xac\xf3\x13\x53\x27\x45\xaf\x64" + "\x46\xdc\xea\x23\xda\x97\xd1\xab" + "\x7d\x6c\x30\x96\x1f\xbc\x06\x34" + "\x18\x0b\x5e\x21\x35\x11\x8d\x4c" + "\xe0\x2d\xe9\x50\x16\x74\x81\xa8" + "\xb4\x34\xb9\x72\x42\xa6\xcc\xbc" + "\xca\x34\x83\x27\x10\x5b\x68\x45" + "\x8f\x52\x22\x0c\x55\x3d\x29\x7c" + "\xe3\xc0\x66\x05\x42\x91\x5f\x58" + "\xfe\x4a\x62\xd9\x8c\xa9\x04\x19" + "\x04\xa9\x08\x4b\x57\xfc\x67\x53" + "\x08\x7c\xbc\x66\x8a\xb0\xb6\x9f" + "\x92\xd6\x41\x7c\x5b\x2a\x00\x79" + "\x72", + .ctext = "\x3a\x92\xee\x53\x31\xaf\x2b\x60" + "\x5f\x55\x8d\x00\x5d\xfc\x74\x97" + "\x28\x54\xf4\xa5\x75\xf1\x9b\x25" + "\x62\x1c\xc0\xe0\x13\xc8\x87\x53" + "\xd0\xf3\xa7\x97\x1f\x3b\x1e\xea" + "\xe0\xe5\x2a\xd1\xdd\xa4\x3b\x50" + "\x45\xa3\x0d\x7e\x1b\xc9\xa0\xad" + "\xb9\x2c\x54\xa6\xc7\x55\x16\xd0" + "\xc5\x2e\x02\x44\x35\xd0\x7e\x67" + "\xf2\xc4\x9b\xcd\x95\x10\xcc\x29" + "\x4b\xfa\x86\x87\xbe\x40\x36\xbe" + "\xe1\xa3\x52\x89\x55\x20\x9b\xc2" + "\xab\xf2\x31\x34\x16\xad\xc8\x17" + "\x65\x24\xc0\xff\x12\x37\xfe\x5a" + "\x62\x3b\x59\x47\x6c\x5f\x3a\x8e" + "\x3b\xd9\x30\xc8\x7f\x2f\x88\xda" + "\x80\xfd\x02\xda\x7f\x9a\x7a\x73" + "\x59\xc5\x34\x09\x9a\x11\xcb\xa7" + "\xfc\xf6\xa1\xa0\x60\xfb\x43\xbb" + "\xf1\xe9\xd7\xc6\x79\x27\x4e\xff" + "\x22\xb4\x24\xbf\x76\xee\x47\xb9" + "\x6d\x3f\x8b\xb0\x9c\x3c\x43\xdd" + "\xff\x25\x2e\x6d\xa4\x2b\xfb\x5d" + "\x1b\x97\x6c\x55\x0a\x82\x7a\x7b" + "\x94\x34\xc2\xdb\x2f\x1f\xc1\xea" + "\xd4\x4d\x17\x46\x3b\x51\x69\x09" + "\xe4\x99\x32\x25\xfd\x94\xaf\xfb" + "\x10\xf7\x4f\xdd\x0b\x3c\x8b\x41" + "\xb3\x6a\xb7\xd1\x33\xa8\x0c\x2f" + "\x62\x4c\x72\x11\xd7\x74\xe1\x3b" + "\x38\x43\x66\x7b\x6c\x36\x48\xe7" + "\xe3\xe7\x9d\xb9\x42\x73\x7a\x2a" + "\x89\x20\x1a\x41\x80\x03\xf7\x8f" + "\x61\x78\x13\xbf\xfe\x50\xf5\x04" + "\x52\xf9\xac\x47\xf8\x62\x4b\xb2" + "\x24\xa9\xbf\x64\xb0\x18\x69\xd2" + "\xf5\xe4\xce\xc8\xb1\x87\x75\xd6" + "\x2c\x24\x79\x00\x7d\x26\xfb\x44" + "\xe7\x45\x7a\xee\x58\xa5\x83\xc1" + "\xb4\x24\xab\x23\x2f\x4d\xd7\x4f" + "\x1c\xc7\xaa\xa9\x50\xf4\xa3\x07" + "\x12\x13\x89\x74\xdc\x31\x6a\xb2" + "\xf5\x0f\x13\x8b\xb9\xdb\x85\x1f" + "\xf5\xbc\x88\xd9\x95\xea\x31\x6c" + "\x36\x60\xb6\x49\xdc\xc4\xf7\x55" + "\x3f\x21\xc1\xb5\x92\x18\x5e\xbc" + "\x9f\x87\x7f\xe7\x79\x25\x40\x33" + "\xd6\xb9\x33\xd5\x50\xb3\xc7\x89" + "\x1b\x12\xa0\x46\xdd\xa7\xd8\x3e" + "\x71\xeb\x6f\x66\xa1\x26\x0c\x67" + "\xab\xb2\x38\x58\x17\xd8\x44\x3b" + "\x16\xf0\x8e\x62\x8d\x16\x10\x00" + "\x32\x8b\xef\xb9\x28\xd3\xc5\xad" + "\x0a\x19\xa2\xe4\x03\x27\x7d\x94" + "\x06\x18\xcd\xd6\x27\x00\xf9\x1f" + "\xb6\xb3\xfe\x96\x35\x5f\xc4\x1c" + "\x07\x62\x10\x79\x68\x50\xf1\x7e" + "\x29\xe7\xc4\xc4\xe7\xee\x54\xd6" + "\x58\x76\x84\x6d\x8d\xe4\x59\x31" + "\xe9\xf4\xdc\xa1\x1f\xe5\x1a\xd6" + "\xe6\x64\x46\xf5\x77\x9c\x60\x7a" + "\x5e\x62\xe3\x0a\xd4\x9f\x7a\x2d" + "\x7a\xa5\x0a\x7b\x29\x86\x7a\x74" + "\x74\x71\x6b\xca\x7d\x1d\xaa\xba" + "\x39\x84\x43\x76\x35\xfe\x4f\x9b" + "\xbb\xbb\xb5\x6a\x32\xb5\x5d\x41" + "\x51\xf0\x5b\x68\x03\x47\x4b\x8a" + "\xca\x88\xf6\x37\xbd\x73\x51\x70" + "\x66\xfe\x9e\x5f\x21\x9c\xf3\xdd" + "\xc3\xea\x27\xf9\x64\x94\xe1\x19" + "\xa0\xa9\xab\x60\xe0\x0e\xf7\x78" + "\x70\x86\xeb\xe0\xd1\x5c\x05\xd3" + "\xd7\xca\xe0\xc0\x47\x47\x34\xee" + "\x11\xa3\xa3\x54\x98\xb7\x49\x8e" + "\x84\x28\x70\x2c\x9e\xfb\x55\x54" + "\x4d\xf8\x86\xf7\x85\x7c\xbd\xf3" + "\x17\xd8\x47\xcb\xac\xf4\x20\x85" + "\x34\x66\xad\x37\x2d\x5e\x52\xda" + "\x8a\xfe\x98\x55\x30\xe7\x2d\x2b" + "\x19\x10\x8e\x7b\x66\x5e\xdc\xe0" + "\x45\x1f\x7b\xb4\x08\xfb\x8f\xf6" + "\x8c\x89\x21\x34\x55\x27\xb2\x76" + "\xb2\x07\xd9\xd6\x68\x9b\xea\x6b" + "\x2d\xb4\xc4\x35\xdd\xd2\x79\xae" + "\xc7\xd6\x26\x7f\x12\x01\x8c\xa7" + "\xe3\xdb\xa8\xf4\xf7\x2b\xec\x99" + "\x11\x00\xf1\x35\x8c\xcf\xd5\xc9" + "\xbd\x91\x36\x39\x70\xcf\x7d\x70" + "\x47\x1a\xfc\x6b\x56\xe0\x3f\x9c" + "\x60\x49\x01\x72\xa9\xaf\x2c\x9c" + "\xe8\xab\xda\x8c\x14\x19\xf3\x75" + "\x07\x17\x9d\x44\x67\x7a\x2e\xef" + "\xb7\x83\x35\x4a\xd1\x3d\x1c\x84" + "\x32\xdd\xaa\xea\xca\x1d\xdc\x72" + "\x2c\xcc\x43\xcd\x5d\xe3\x21\xa4" + "\xd0\x8a\x4b\x20\x12\xa3\xd5\x86" + "\x76\x96\xff\x5f\x04\x57\x0f\xe6" + "\xba\xe8\x76\x50\x0c\x64\x1d\x83" + "\x9c\x9b\x9a\x9a\x58\x97\x9c\x5c" + "\xb4\xa4\xa6\x3e\x19\xeb\x8f\x5a" + "\x61\xb2\x03\x7b\x35\x19\xbe\xa7" + "\x63\x0c\xfd\xdd\xf9\x90\x6c\x08" + "\x19\x11\xd3\x65\x4a\xf5\x96\x92" + "\x59\xaa\x9c\x61\x0c\x29\xa7\xf8" + "\x14\x39\x37\xbf\x3c\xf2\x16\x72" + "\x02\xfa\xa2\xf3\x18\x67\x5d\xcb" + "\xdc\x4d\xbb\x96\xff\x70\x08\x2d" + "\xc2\xa8\x52\xe1\x34\x5f\x72\xfe" + "\x64\xbf\xca\xa7\x74\x38\xfb\x74" + "\x55\x9c\xfa\x8a\xed\xfb\x98\xeb" + "\x58\x2e\x6c\xe1\x52\x76\x86\xd7" + "\xcf\xa1\xa4\xfc\xb2\x47\x41\x28" + "\xa3\xc1\xe5\xfd\x53\x19\x28\x2b" + "\x37\x04\x65\x96\x99\x7a\x28\x0f" + "\x07\x68\x4b\xc7\x52\x0a\x55\x35" + "\x40\x19\x95\x61\xe8\x59\x40\x1f" + "\x9d\xbf\x78\x7d\x8f\x84\xff\x6f" + "\xd0\xd5\x63\xd2\x22\xbd\xc8\x4e" + "\xfb\xe7\x9f\x06\xe6\xe7\x39\x6d" + "\x6a\x96\x9f\xf0\x74\x7e\xc9\x35" + "\xb7\x26\xb8\x1c\x0a\xa6\x27\x2c" + "\xa2\x2b\xfe\xbe\x0f\x07\x73\xae" + "\x7f\x7f\x54\xf5\x7c\x6a\x0a\x56" + "\x49\xd4\x81\xe5\x85\x53\x99\x1f" + "\x95\x05\x13\x58\x8d\x0e\x1b\x90" + "\xc3\x75\x48\x64\x58\x98\x67\x84" + "\xae\xe2\x21\xa2\x8a\x04\x0a\x0b" + "\x61\xaa\xb0\xd4\x28\x60\x7a\xf8" + "\xbc\x52\xfb\x24\x7f\xed\x0d\x2a" + "\x0a\xb2\xf9\xc6\x95\xb5\x11\xc9" + "\xf4\x0f\x26\x11\xcf\x2a\x57\x87" + "\x7a\xf3\xe7\x94\x65\xc2\xb5\xb3" + "\xab\x98\xe3\xc1\x2b\x59\x19\x7c" + "\xd6\xf3\xf9\xbf\xff\x6d\xc6\x82" + "\x13\x2f\x4a\x2e\xcd\x26\xfe\x2d" + "\x01\x70\xf4\xc2\x7f\x1f\x4c\xcb" + "\x47\x77\x0c\xa0\xa3\x03\xec\xda" + "\xa9\xbf\x0d\x2d\xae\xe4\xb8\x7b" + "\xa9\xbc\x08\xb4\x68\x2e\xc5\x60" + "\x8d\x87\x41\x2b\x0f\x69\xf0\xaf" + "\x5f\xba\x72\x20\x0f\x33\xcd\x6d" + "\x36\x7d\x7b\xd5\x05\xf1\x4b\x05" + "\xc4\xfc\x7f\x80\xb9\x4d\xbd\xf7" + "\x7c\x84\x07\x01\xc2\x40\x66\x5b" + "\x98\xc7\x2c\xe3\x97\xfa\xdf\x87" + "\xa0\x1f\xe9\x21\x42\x0f\x3b\xeb" + "\x89\x1c\x3b\xca\x83\x61\x77\x68" + "\x84\xbb\x60\x87\x38\x2e\x25\xd5" + "\x9e\x04\x41\x70\xac\xda\xc0\x9c" + "\x9c\x69\xea\x8d\x4e\x55\x2a\x29" + "\xed\x05\x4b\x7b\x73\x71\x90\x59" + "\x4d\xc8\xd8\x44\xf0\x4c\xe1\x5e" + "\x84\x47\x55\xcc\x32\x3f\xe7\x97" + "\x42\xc6\x32\xac\x40\xe5\xa5\xc7" + "\x8b\xed\xdb\xf7\x83\xd6\xb1\xc2" + "\x52\x5e\x34\xb7\xeb\x6e\xd9\xfc" + "\xe5\x93\x9a\x97\x3e\xb0\xdc\xd9" + "\xd7\x06\x10\xb6\x1d\x80\x59\xdd" + "\x0d\xfe\x64\x35\xcd\x5d\xec\xf0" + "\xba\xd0\x34\xc9\x2d\x91\xc5\x17" + "\x11", + .len = 1281, + .also_non_np = 1, + .np = 3, + .tap = { 1200, 1, 80 }, + }, { /* test vector from https://tools.ietf.org/html/draft-arciszewski-xchacha-02#appendix-A.3.2 */ + .key = "\x80\x81\x82\x83\x84\x85\x86\x87" + "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" + "\x90\x91\x92\x93\x94\x95\x96\x97" + "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f", + .klen = 32, + .iv = "\x40\x41\x42\x43\x44\x45\x46\x47" + "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" + "\x50\x51\x52\x53\x54\x55\x56\x58" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .ptext = "\x54\x68\x65\x20\x64\x68\x6f\x6c" + "\x65\x20\x28\x70\x72\x6f\x6e\x6f" + "\x75\x6e\x63\x65\x64\x20\x22\x64" + "\x6f\x6c\x65\x22\x29\x20\x69\x73" + "\x20\x61\x6c\x73\x6f\x20\x6b\x6e" + "\x6f\x77\x6e\x20\x61\x73\x20\x74" + "\x68\x65\x20\x41\x73\x69\x61\x74" + "\x69\x63\x20\x77\x69\x6c\x64\x20" + "\x64\x6f\x67\x2c\x20\x72\x65\x64" + "\x20\x64\x6f\x67\x2c\x20\x61\x6e" + "\x64\x20\x77\x68\x69\x73\x74\x6c" + "\x69\x6e\x67\x20\x64\x6f\x67\x2e" + "\x20\x49\x74\x20\x69\x73\x20\x61" + "\x62\x6f\x75\x74\x20\x74\x68\x65" + "\x20\x73\x69\x7a\x65\x20\x6f\x66" + "\x20\x61\x20\x47\x65\x72\x6d\x61" + "\x6e\x20\x73\x68\x65\x70\x68\x65" + "\x72\x64\x20\x62\x75\x74\x20\x6c" + "\x6f\x6f\x6b\x73\x20\x6d\x6f\x72" + "\x65\x20\x6c\x69\x6b\x65\x20\x61" + "\x20\x6c\x6f\x6e\x67\x2d\x6c\x65" + "\x67\x67\x65\x64\x20\x66\x6f\x78" + "\x2e\x20\x54\x68\x69\x73\x20\x68" + "\x69\x67\x68\x6c\x79\x20\x65\x6c" + "\x75\x73\x69\x76\x65\x20\x61\x6e" + "\x64\x20\x73\x6b\x69\x6c\x6c\x65" + "\x64\x20\x6a\x75\x6d\x70\x65\x72" + "\x20\x69\x73\x20\x63\x6c\x61\x73" + "\x73\x69\x66\x69\x65\x64\x20\x77" + "\x69\x74\x68\x20\x77\x6f\x6c\x76" + "\x65\x73\x2c\x20\x63\x6f\x79\x6f" + "\x74\x65\x73\x2c\x20\x6a\x61\x63" + "\x6b\x61\x6c\x73\x2c\x20\x61\x6e" + "\x64\x20\x66\x6f\x78\x65\x73\x20" + "\x69\x6e\x20\x74\x68\x65\x20\x74" + "\x61\x78\x6f\x6e\x6f\x6d\x69\x63" + "\x20\x66\x61\x6d\x69\x6c\x79\x20" + "\x43\x61\x6e\x69\x64\x61\x65\x2e", + .ctext = "\x45\x59\xab\xba\x4e\x48\xc1\x61" + "\x02\xe8\xbb\x2c\x05\xe6\x94\x7f" + "\x50\xa7\x86\xde\x16\x2f\x9b\x0b" + "\x7e\x59\x2a\x9b\x53\xd0\xd4\xe9" + "\x8d\x8d\x64\x10\xd5\x40\xa1\xa6" + "\x37\x5b\x26\xd8\x0d\xac\xe4\xfa" + "\xb5\x23\x84\xc7\x31\xac\xbf\x16" + "\xa5\x92\x3c\x0c\x48\xd3\x57\x5d" + "\x4d\x0d\x2c\x67\x3b\x66\x6f\xaa" + "\x73\x10\x61\x27\x77\x01\x09\x3a" + "\x6b\xf7\xa1\x58\xa8\x86\x42\x92" + "\xa4\x1c\x48\xe3\xa9\xb4\xc0\xda" + "\xec\xe0\xf8\xd9\x8d\x0d\x7e\x05" + "\xb3\x7a\x30\x7b\xbb\x66\x33\x31" + "\x64\xec\x9e\x1b\x24\xea\x0d\x6c" + "\x3f\xfd\xdc\xec\x4f\x68\xe7\x44" + "\x30\x56\x19\x3a\x03\xc8\x10\xe1" + "\x13\x44\xca\x06\xd8\xed\x8a\x2b" + "\xfb\x1e\x8d\x48\xcf\xa6\xbc\x0e" + "\xb4\xe2\x46\x4b\x74\x81\x42\x40" + "\x7c\x9f\x43\x1a\xee\x76\x99\x60" + "\xe1\x5b\xa8\xb9\x68\x90\x46\x6e" + "\xf2\x45\x75\x99\x85\x23\x85\xc6" + "\x61\xf7\x52\xce\x20\xf9\xda\x0c" + "\x09\xab\x6b\x19\xdf\x74\xe7\x6a" + "\x95\x96\x74\x46\xf8\xd0\xfd\x41" + "\x5e\x7b\xee\x2a\x12\xa1\x14\xc2" + "\x0e\xb5\x29\x2a\xe7\xa3\x49\xae" + "\x57\x78\x20\xd5\x52\x0a\x1f\x3f" + "\xb6\x2a\x17\xce\x6a\x7e\x68\xfa" + "\x7c\x79\x11\x1d\x88\x60\x92\x0b" + "\xc0\x48\xef\x43\xfe\x84\x48\x6c" + "\xcb\x87\xc2\x5f\x0a\xe0\x45\xf0" + "\xcc\xe1\xe7\x98\x9a\x9a\xa2\x20" + "\xa2\x8b\xdd\x48\x27\xe7\x51\xa2" + "\x4a\x6d\x5c\x62\xd7\x90\xa6\x63" + "\x93\xb9\x31\x11\xc1\xa5\x5d\xd7" + "\x42\x1a\x10\x18\x49\x74\xc7\xc5", + .len = 304, + } +}; + +/* + * Same as XChaCha20 test vectors above, but recomputed the ciphertext with + * XChaCha12, using a modified libsodium. + */ +static const struct cipher_testvec xchacha12_tv_template[] = { + { + .key = "\x79\xc9\x97\x98\xac\x67\x30\x0b" + "\xbb\x27\x04\xc9\x5c\x34\x1e\x32" + "\x45\xf3\xdc\xb2\x17\x61\xb9\x8e" + "\x52\xff\x45\xb2\x4f\x30\x4f\xc4", + .klen = 32, + .iv = "\xb3\x3f\xfd\x30\x96\x47\x9b\xcf" + "\xbc\x9a\xee\x49\x41\x76\x88\xa0" + "\xa2\x55\x4f\x8d\x95\x38\x94\x19" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .ptext = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00", + .ctext = "\x1b\x78\x7f\xd7\xa1\x41\x68\xab" + "\x3d\x3f\xd1\x7b\x69\x56\xb2\xd5" + "\x43\xce\xeb\xaf\x36\xf0\x29\x9d" + "\x3a\xfb\x18\xae\x1b", + .len = 29, + }, { + .key = "\x9d\x23\xbd\x41\x49\xcb\x97\x9c" + "\xcf\x3c\x5c\x94\xdd\x21\x7e\x98" + "\x08\xcb\x0e\x50\xcd\x0f\x67\x81" + "\x22\x35\xea\xaf\x60\x1d\x62\x32", + .klen = 32, + .iv = "\xc0\x47\x54\x82\x66\xb7\xc3\x70" + "\xd3\x35\x66\xa2\x42\x5c\xbf\x30" + "\xd8\x2d\x1e\xaf\x52\x94\x10\x9e" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .ptext = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00", + .ctext = "\xfb\x32\x09\x1d\x83\x05\xae\x4c" + "\x13\x1f\x12\x71\xf2\xca\xb2\xeb" + "\x5b\x83\x14\x7d\x83\xf6\x57\x77" + "\x2e\x40\x1f\x92\x2c\xf9\xec\x35" + "\x34\x1f\x93\xdf\xfb\x30\xd7\x35" + "\x03\x05\x78\xc1\x20\x3b\x7a\xe3" + "\x62\xa3\x89\xdc\x11\x11\x45\xa8" + "\x82\x89\xa0\xf1\x4e\xc7\x0f\x11" + "\x69\xdd\x0c\x84\x2b\x89\x5c\xdc" + "\xf0\xde\x01\xef\xc5\x65\x79\x23" + "\x87\x67\xd6\x50\xd9\x8d\xd9\x92" + "\x54\x5b\x0e", + .len = 91, + }, { + .key = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x67\xc6\x69\x73" + "\x51\xff\x4a\xec\x29\xcd\xba\xab" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .ptext = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .ctext = "\xdf\x2d\xc6\x21\x2a\x9d\xa1\xbb" + "\xc2\x77\x66\x0c\x5c\x46\xef\xa7" + "\x79\x1b\xb9\xdf\x55\xe2\xf9\x61" + "\x4c\x7b\xa4\x52\x24\xaf\xa2\xda" + "\xd1\x8f\x8f\xa2\x9e\x53\x4d\xc4" + "\xb8\x55\x98\x08\x7c\x08\xd4\x18" + "\x67\x8f\xef\x50\xb1\x5f\xa5\x77" + "\x4c\x25\xe7\x86\x26\x42\xca\x44", + .len = 64, + }, { + .key = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x01", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x02\xf2\xfb\xe3\x46" + "\x7c\xc2\x54\xf8\x1b\xe8\xe7\x8d" + "\x01\x00\x00\x00\x00\x00\x00\x00", + .ptext = "\x41\x6e\x79\x20\x73\x75\x62\x6d" + "\x69\x73\x73\x69\x6f\x6e\x20\x74" + "\x6f\x20\x74\x68\x65\x20\x49\x45" + "\x54\x46\x20\x69\x6e\x74\x65\x6e" + "\x64\x65\x64\x20\x62\x79\x20\x74" + "\x68\x65\x20\x43\x6f\x6e\x74\x72" + "\x69\x62\x75\x74\x6f\x72\x20\x66" + "\x6f\x72\x20\x70\x75\x62\x6c\x69" + "\x63\x61\x74\x69\x6f\x6e\x20\x61" + "\x73\x20\x61\x6c\x6c\x20\x6f\x72" + "\x20\x70\x61\x72\x74\x20\x6f\x66" + "\x20\x61\x6e\x20\x49\x45\x54\x46" + "\x20\x49\x6e\x74\x65\x72\x6e\x65" + "\x74\x2d\x44\x72\x61\x66\x74\x20" + "\x6f\x72\x20\x52\x46\x43\x20\x61" + "\x6e\x64\x20\x61\x6e\x79\x20\x73" + "\x74\x61\x74\x65\x6d\x65\x6e\x74" + "\x20\x6d\x61\x64\x65\x20\x77\x69" + "\x74\x68\x69\x6e\x20\x74\x68\x65" + "\x20\x63\x6f\x6e\x74\x65\x78\x74" + "\x20\x6f\x66\x20\x61\x6e\x20\x49" + "\x45\x54\x46\x20\x61\x63\x74\x69" + "\x76\x69\x74\x79\x20\x69\x73\x20" + "\x63\x6f\x6e\x73\x69\x64\x65\x72" + "\x65\x64\x20\x61\x6e\x20\x22\x49" + "\x45\x54\x46\x20\x43\x6f\x6e\x74" + "\x72\x69\x62\x75\x74\x69\x6f\x6e" + "\x22\x2e\x20\x53\x75\x63\x68\x20" + "\x73\x74\x61\x74\x65\x6d\x65\x6e" + "\x74\x73\x20\x69\x6e\x63\x6c\x75" + "\x64\x65\x20\x6f\x72\x61\x6c\x20" + "\x73\x74\x61\x74\x65\x6d\x65\x6e" + "\x74\x73\x20\x69\x6e\x20\x49\x45" + "\x54\x46\x20\x73\x65\x73\x73\x69" + "\x6f\x6e\x73\x2c\x20\x61\x73\x20" + "\x77\x65\x6c\x6c\x20\x61\x73\x20" + "\x77\x72\x69\x74\x74\x65\x6e\x20" + "\x61\x6e\x64\x20\x65\x6c\x65\x63" + "\x74\x72\x6f\x6e\x69\x63\x20\x63" + "\x6f\x6d\x6d\x75\x6e\x69\x63\x61" + "\x74\x69\x6f\x6e\x73\x20\x6d\x61" + "\x64\x65\x20\x61\x74\x20\x61\x6e" + "\x79\x20\x74\x69\x6d\x65\x20\x6f" + "\x72\x20\x70\x6c\x61\x63\x65\x2c" + "\x20\x77\x68\x69\x63\x68\x20\x61" + "\x72\x65\x20\x61\x64\x64\x72\x65" + "\x73\x73\x65\x64\x20\x74\x6f", + .ctext = "\xe4\xa6\xc8\x30\xc4\x23\x13\xd6" + "\x08\x4d\xc9\xb7\xa5\x64\x7c\xb9" + "\x71\xe2\xab\x3e\xa8\x30\x8a\x1c" + "\x4a\x94\x6d\x9b\xe0\xb3\x6f\xf1" + "\xdc\xe3\x1b\xb3\xa9\x6d\x0d\xd6" + "\xd0\xca\x12\xef\xe7\x5f\xd8\x61" + "\x3c\x82\xd3\x99\x86\x3c\x6f\x66" + "\x02\x06\xdc\x55\xf9\xed\xdf\x38" + "\xb4\xa6\x17\x00\x7f\xef\xbf\x4f" + "\xf8\x36\xf1\x60\x7e\x47\xaf\xdb" + "\x55\x9b\x12\xcb\x56\x44\xa7\x1f" + "\xd3\x1a\x07\x3b\x00\xec\xe6\x4c" + "\xa2\x43\x27\xdf\x86\x19\x4f\x16" + "\xed\xf9\x4a\xf3\x63\x6f\xfa\x7f" + "\x78\x11\xf6\x7d\x97\x6f\xec\x6f" + "\x85\x0f\x5c\x36\x13\x8d\x87\xe0" + "\x80\xb1\x69\x0b\x98\x89\x9c\x4e" + "\xf8\xdd\xee\x5c\x0a\x85\xce\xd4" + "\xea\x1b\x48\xbe\x08\xf8\xe2\xa8" + "\xa5\xb0\x3c\x79\xb1\x15\xb4\xb9" + "\x75\x10\x95\x35\x81\x7e\x26\xe6" + "\x78\xa4\x88\xcf\xdb\x91\x34\x18" + "\xad\xd7\x8e\x07\x7d\xab\x39\xf9" + "\xa3\x9e\xa5\x1d\xbb\xed\x61\xfd" + "\xdc\xb7\x5a\x27\xfc\xb5\xc9\x10" + "\xa8\xcc\x52\x7f\x14\x76\x90\xe7" + "\x1b\x29\x60\x74\xc0\x98\x77\xbb" + "\xe0\x54\xbb\x27\x49\x59\x1e\x62" + "\x3d\xaf\x74\x06\xa4\x42\x6f\xc6" + "\x52\x97\xc4\x1d\xc4\x9f\xe2\xe5" + "\x38\x57\x91\xd1\xa2\x28\xcc\x40" + "\xcc\x70\x59\x37\xfc\x9f\x4b\xda" + "\xa0\xeb\x97\x9a\x7d\xed\x14\x5c" + "\x9c\xb7\x93\x26\x41\xa8\x66\xdd" + "\x87\x6a\xc0\xd3\xc2\xa9\x3e\xae" + "\xe9\x72\xfe\xd1\xb3\xac\x38\xea" + "\x4d\x15\xa9\xd5\x36\x61\xe9\x96" + "\x6c\x23\xf8\x43\xe4\x92\x29\xd9" + "\x8b\x78\xf7\x0a\x52\xe0\x19\x5b" + "\x59\x69\x5b\x5d\xa1\x53\xc4\x68" + "\xe1\xbb\xac\x89\x14\xe2\xe2\x85" + "\x41\x18\xf5\xb3\xd1\xfa\x68\x19" + "\x44\x78\xdc\xcf\xe7\x88\x2d\x52" + "\x5f\x40\xb5\x7e\xf8\x88\xa2\xae" + "\x4a\xb2\x07\x35\x9d\x9b\x07\x88" + "\xb7\x00\xd0\x0c\xb6\xa0\x47\x59" + "\xda\x4e\xc9\xab\x9b\x8a\x7b", + + .len = 375, + .also_non_np = 1, + .np = 3, + .tap = { 375 - 20, 4, 16 }, + + }, { + .key = "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a" + "\xf3\x33\x88\x86\x04\xf6\xb5\xf0" + "\x47\x39\x17\xc1\x40\x2b\x80\x09" + "\x9d\xca\x5c\xbc\x20\x70\x75\xc0", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x02\x76\x5a\x2e\x63" + "\x33\x9f\xc9\x9a\x66\x32\x0d\xb7" + "\x2a\x00\x00\x00\x00\x00\x00\x00", + .ptext = "\x27\x54\x77\x61\x73\x20\x62\x72" + "\x69\x6c\x6c\x69\x67\x2c\x20\x61" + "\x6e\x64\x20\x74\x68\x65\x20\x73" + "\x6c\x69\x74\x68\x79\x20\x74\x6f" + "\x76\x65\x73\x0a\x44\x69\x64\x20" + "\x67\x79\x72\x65\x20\x61\x6e\x64" + "\x20\x67\x69\x6d\x62\x6c\x65\x20" + "\x69\x6e\x20\x74\x68\x65\x20\x77" + "\x61\x62\x65\x3a\x0a\x41\x6c\x6c" + "\x20\x6d\x69\x6d\x73\x79\x20\x77" + "\x65\x72\x65\x20\x74\x68\x65\x20" + "\x62\x6f\x72\x6f\x67\x6f\x76\x65" + "\x73\x2c\x0a\x41\x6e\x64\x20\x74" + "\x68\x65\x20\x6d\x6f\x6d\x65\x20" + "\x72\x61\x74\x68\x73\x20\x6f\x75" + "\x74\x67\x72\x61\x62\x65\x2e", + .ctext = "\xb9\x68\xbc\x6a\x24\xbc\xcc\xd8" + "\x9b\x2a\x8d\x5b\x96\xaf\x56\xe3" + "\x11\x61\xe7\xa7\x9b\xce\x4e\x7d" + "\x60\x02\x48\xac\xeb\xd5\x3a\x26" + "\x9d\x77\x3b\xb5\x32\x13\x86\x8e" + "\x20\x82\x26\x72\xae\x64\x1b\x7e" + "\x2e\x01\x68\xb4\x87\x45\xa1\x24" + "\xe4\x48\x40\xf0\xaa\xac\xee\xa9" + "\xfc\x31\xad\x9d\x89\xa3\xbb\xd2" + "\xe4\x25\x13\xad\x0f\x5e\xdf\x3c" + "\x27\xab\xb8\x62\x46\x22\x30\x48" + "\x55\x2c\x4e\x84\x78\x1d\x0d\x34" + "\x8d\x3c\x91\x0a\x7f\x5b\x19\x9f" + "\x97\x05\x4c\xa7\x62\x47\x8b\xc5" + "\x44\x2e\x20\x33\xdd\xa0\x82\xa9" + "\x25\x76\x37\xe6\x3c\x67\x5b", + .len = 127, + }, { + .key = "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a" + "\xf3\x33\x88\x86\x04\xf6\xb5\xf0" + "\x47\x39\x17\xc1\x40\x2b\x80\x09" + "\x9d\xca\x5c\xbc\x20\x70\x75\xc0", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x01\x31\x58\xa3\x5a" + "\x25\x5d\x05\x17\x58\xe9\x5e\xd4" + "\x1c\x00\x00\x00\x00\x00\x00\x00", + .ptext = "\x49\xee\xe0\xdc\x24\x90\x40\xcd" + "\xc5\x40\x8f\x47\x05\xbc\xdd\x81" + "\x47\xc6\x8d\xe6\xb1\x8f\xd7\xcb" + "\x09\x0e\x6e\x22\x48\x1f\xbf\xb8" + "\x5c\xf7\x1e\x8a\xc1\x23\xf2\xd4" + "\x19\x4b\x01\x0f\x4e\xa4\x43\xce" + "\x01\xc6\x67\xda\x03\x91\x18\x90" + "\xa5\xa4\x8e\x45\x03\xb3\x2d\xac" + "\x74\x92\xd3\x53\x47\xc8\xdd\x25" + "\x53\x6c\x02\x03\x87\x0d\x11\x0c" + "\x58\xe3\x12\x18\xfd\x2a\x5b\x40" + "\x0c\x30\xf0\xb8\x3f\x43\xce\xae" + "\x65\x3a\x7d\x7c\xf4\x54\xaa\xcc" + "\x33\x97\xc3\x77\xba\xc5\x70\xde" + "\xd7\xd5\x13\xa5\x65\xc4\x5f\x0f" + "\x46\x1a\x0d\x97\xb5\xf3\xbb\x3c" + "\x84\x0f\x2b\xc5\xaa\xea\xf2\x6c" + "\xc9\xb5\x0c\xee\x15\xf3\x7d\xbe" + "\x9f\x7b\x5a\xa6\xae\x4f\x83\xb6" + "\x79\x49\x41\xf4\x58\x18\xcb\x86" + "\x7f\x30\x0e\xf8\x7d\x44\x36\xea" + "\x75\xeb\x88\x84\x40\x3c\xad\x4f" + "\x6f\x31\x6b\xaa\x5d\xe5\xa5\xc5" + "\x21\x66\xe9\xa7\xe3\xb2\x15\x88" + "\x78\xf6\x79\xa1\x59\x47\x12\x4e" + "\x9f\x9f\x64\x1a\xa0\x22\x5b\x08" + "\xbe\x7c\x36\xc2\x2b\x66\x33\x1b" + "\xdd\x60\x71\xf7\x47\x8c\x61\xc3" + "\xda\x8a\x78\x1e\x16\xfa\x1e\x86" + "\x81\xa6\x17\x2a\xa7\xb5\xc2\xe7" + "\xa4\xc7\x42\xf1\xcf\x6a\xca\xb4" + "\x45\xcf\xf3\x93\xf0\xe7\xea\xf6" + "\xf4\xe6\x33\x43\x84\x93\xa5\x67" + "\x9b\x16\x58\x58\x80\x0f\x2b\x5c" + "\x24\x74\x75\x7f\x95\x81\xb7\x30" + "\x7a\x33\xa7\xf7\x94\x87\x32\x27" + "\x10\x5d\x14\x4c\x43\x29\xdd\x26" + "\xbd\x3e\x3c\x0e\xfe\x0e\xa5\x10" + "\xea\x6b\x64\xfd\x73\xc6\xed\xec" + "\xa8\xc9\xbf\xb3\xba\x0b\x4d\x07" + "\x70\xfc\x16\xfd\x79\x1e\xd7\xc5" + "\x49\x4e\x1c\x8b\x8d\x79\x1b\xb1" + "\xec\xca\x60\x09\x4c\x6a\xd5\x09" + "\x49\x46\x00\x88\x22\x8d\xce\xea" + "\xb1\x17\x11\xde\x42\xd2\x23\xc1" + "\x72\x11\xf5\x50\x73\x04\x40\x47" + "\xf9\x5d\xe7\xa7\x26\xb1\x7e\xb0" + "\x3f\x58\xc1\x52\xab\x12\x67\x9d" + "\x3f\x43\x4b\x68\xd4\x9c\x68\x38" + "\x07\x8a\x2d\x3e\xf3\xaf\x6a\x4b" + "\xf9\xe5\x31\x69\x22\xf9\xa6\x69" + "\xc6\x9c\x96\x9a\x12\x35\x95\x1d" + "\x95\xd5\xdd\xbe\xbf\x93\x53\x24" + "\xfd\xeb\xc2\x0a\x64\xb0\x77\x00" + "\x6f\x88\xc4\x37\x18\x69\x7c\xd7" + "\x41\x92\x55\x4c\x03\xa1\x9a\x4b" + "\x15\xe5\xdf\x7f\x37\x33\x72\xc1" + "\x8b\x10\x67\xa3\x01\x57\x94\x25" + "\x7b\x38\x71\x7e\xdd\x1e\xcc\x73" + "\x55\xd2\x8e\xeb\x07\xdd\xf1\xda" + "\x58\xb1\x47\x90\xfe\x42\x21\x72" + "\xa3\x54\x7a\xa0\x40\xec\x9f\xdd" + "\xc6\x84\x6e\xca\xae\xe3\x68\xb4" + "\x9d\xe4\x78\xff\x57\xf2\xf8\x1b" + "\x03\xa1\x31\xd9\xde\x8d\xf5\x22" + "\x9c\xdd\x20\xa4\x1e\x27\xb1\x76" + "\x4f\x44\x55\xe2\x9b\xa1\x9c\xfe" + "\x54\xf7\x27\x1b\xf4\xde\x02\xf5" + "\x1b\x55\x48\x5c\xdc\x21\x4b\x9e" + "\x4b\x6e\xed\x46\x23\xdc\x65\xb2" + "\xcf\x79\x5f\x28\xe0\x9e\x8b\xe7" + "\x4c\x9d\x8a\xff\xc1\xa6\x28\xb8" + "\x65\x69\x8a\x45\x29\xef\x74\x85" + "\xde\x79\xc7\x08\xae\x30\xb0\xf4" + "\xa3\x1d\x51\x41\xab\xce\xcb\xf6" + "\xb5\xd8\x6d\xe0\x85\xe1\x98\xb3" + "\x43\xbb\x86\x83\x0a\xa0\xf5\xb7" + "\x04\x0b\xfa\x71\x1f\xb0\xf6\xd9" + "\x13\x00\x15\xf0\xc7\xeb\x0d\x5a" + "\x9f\xd7\xb9\x6c\x65\x14\x22\x45" + "\x6e\x45\x32\x3e\x7e\x60\x1a\x12" + "\x97\x82\x14\xfb\xaa\x04\x22\xfa" + "\xa0\xe5\x7e\x8c\x78\x02\x48\x5d" + "\x78\x33\x5a\x7c\xad\xdb\x29\xce" + "\xbb\x8b\x61\xa4\xb7\x42\xe2\xac" + "\x8b\x1a\xd9\x2f\x0b\x8b\x62\x21" + "\x83\x35\x7e\xad\x73\xc2\xb5\x6c" + "\x10\x26\x38\x07\xe5\xc7\x36\x80" + "\xe2\x23\x12\x61\xf5\x48\x4b\x2b" + "\xc5\xdf\x15\xd9\x87\x01\xaa\xac" + "\x1e\x7c\xad\x73\x78\x18\x63\xe0" + "\x8b\x9f\x81\xd8\x12\x6a\x28\x10" + "\xbe\x04\x68\x8a\x09\x7c\x1b\x1c" + "\x83\x66\x80\x47\x80\xe8\xfd\x35" + "\x1c\x97\x6f\xae\x49\x10\x66\xcc" + "\xc6\xd8\xcc\x3a\x84\x91\x20\x77" + "\x72\xe4\x24\xd2\x37\x9f\xc5\xc9" + "\x25\x94\x10\x5f\x40\x00\x64\x99" + "\xdc\xae\xd7\x21\x09\x78\x50\x15" + "\xac\x5f\xc6\x2c\xa2\x0b\xa9\x39" + "\x87\x6e\x6d\xab\xde\x08\x51\x16" + "\xc7\x13\xe9\xea\xed\x06\x8e\x2c" + "\xf8\x37\x8c\xf0\xa6\x96\x8d\x43" + "\xb6\x98\x37\xb2\x43\xed\xde\xdf" + "\x89\x1a\xe7\xeb\x9d\xa1\x7b\x0b" + "\x77\xb0\xe2\x75\xc0\xf1\x98\xd9" + "\x80\x55\xc9\x34\x91\xd1\x59\xe8" + "\x4b\x0f\xc1\xa9\x4b\x7a\x84\x06" + "\x20\xa8\x5d\xfa\xd1\xde\x70\x56" + "\x2f\x9e\x91\x9c\x20\xb3\x24\xd8" + "\x84\x3d\xe1\x8c\x7e\x62\x52\xe5" + "\x44\x4b\x9f\xc2\x93\x03\xea\x2b" + "\x59\xc5\xfa\x3f\x91\x2b\xbb\x23" + "\xf5\xb2\x7b\xf5\x38\xaf\xb3\xee" + "\x63\xdc\x7b\xd1\xff\xaa\x8b\xab" + "\x82\x6b\x37\x04\xeb\x74\xbe\x79" + "\xb9\x83\x90\xef\x20\x59\x46\xff" + "\xe9\x97\x3e\x2f\xee\xb6\x64\x18" + "\x38\x4c\x7a\x4a\xf9\x61\xe8\x9a" + "\xa1\xb5\x01\xa6\x47\xd3\x11\xd4" + "\xce\xd3\x91\x49\x88\xc7\xb8\x4d" + "\xb1\xb9\x07\x6d\x16\x72\xae\x46" + "\x5e\x03\xa1\x4b\xb6\x02\x30\xa8" + "\x3d\xa9\x07\x2a\x7c\x19\xe7\x62" + "\x87\xe3\x82\x2f\x6f\xe1\x09\xd9" + "\x94\x97\xea\xdd\x58\x9e\xae\x76" + "\x7e\x35\xe5\xb4\xda\x7e\xf4\xde" + "\xf7\x32\x87\xcd\x93\xbf\x11\x56" + "\x11\xbe\x08\x74\xe1\x69\xad\xe2" + "\xd7\xf8\x86\x75\x8a\x3c\xa4\xbe" + "\x70\xa7\x1b\xfc\x0b\x44\x2a\x76" + "\x35\xea\x5d\x85\x81\xaf\x85\xeb" + "\xa0\x1c\x61\xc2\xf7\x4f\xa5\xdc" + "\x02\x7f\xf6\x95\x40\x6e\x8a\x9a" + "\xf3\x5d\x25\x6e\x14\x3a\x22\xc9" + "\x37\x1c\xeb\x46\x54\x3f\xa5\x91" + "\xc2\xb5\x8c\xfe\x53\x08\x97\x32" + "\x1b\xb2\x30\x27\xfe\x25\x5d\xdc" + "\x08\x87\xd0\xe5\x94\x1a\xd4\xf1" + "\xfe\xd6\xb4\xa3\xe6\x74\x81\x3c" + "\x1b\xb7\x31\xa7\x22\xfd\xd4\xdd" + "\x20\x4e\x7c\x51\xb0\x60\x73\xb8" + "\x9c\xac\x91\x90\x7e\x01\xb0\xe1" + "\x8a\x2f\x75\x1c\x53\x2a\x98\x2a" + "\x06\x52\x95\x52\xb2\xe9\x25\x2e" + "\x4c\xe2\x5a\x00\xb2\x13\x81\x03" + "\x77\x66\x0d\xa5\x99\xda\x4e\x8c" + "\xac\xf3\x13\x53\x27\x45\xaf\x64" + "\x46\xdc\xea\x23\xda\x97\xd1\xab" + "\x7d\x6c\x30\x96\x1f\xbc\x06\x34" + "\x18\x0b\x5e\x21\x35\x11\x8d\x4c" + "\xe0\x2d\xe9\x50\x16\x74\x81\xa8" + "\xb4\x34\xb9\x72\x42\xa6\xcc\xbc" + "\xca\x34\x83\x27\x10\x5b\x68\x45" + "\x8f\x52\x22\x0c\x55\x3d\x29\x7c" + "\xe3\xc0\x66\x05\x42\x91\x5f\x58" + "\xfe\x4a\x62\xd9\x8c\xa9\x04\x19" + "\x04\xa9\x08\x4b\x57\xfc\x67\x53" + "\x08\x7c\xbc\x66\x8a\xb0\xb6\x9f" + "\x92\xd6\x41\x7c\x5b\x2a\x00\x79" + "\x72", + .ctext = "\xe1\xb6\x8b\x5c\x80\xb8\xcc\x08" + "\x1b\x84\xb2\xd1\xad\xa4\x70\xac" + "\x67\xa9\x39\x27\xac\xb4\x5b\xb7" + "\x4c\x26\x77\x23\x1d\xce\x0a\xbe" + "\x18\x9e\x42\x8b\xbd\x7f\xd6\xf1" + "\xf1\x6b\xe2\x6d\x7f\x92\x0e\xcb" + "\xb8\x79\xba\xb4\xac\x7e\x2d\xc0" + "\x9e\x83\x81\x91\xd5\xea\xc3\x12" + "\x8d\xa4\x26\x70\xa4\xf9\x71\x0b" + "\xbd\x2e\xe1\xb3\x80\x42\x25\xb3" + "\x0b\x31\x99\xe1\x0d\xde\xa6\x90" + "\xf2\xa3\x10\xf7\xe5\xf3\x83\x1e" + "\x2c\xfb\x4d\xf0\x45\x3d\x28\x3c" + "\xb8\xf1\xcb\xbf\x67\xd8\x43\x5a" + "\x9d\x7b\x73\x29\x88\x0f\x13\x06" + "\x37\x50\x0d\x7c\xe6\x9b\x07\xdd" + "\x7e\x01\x1f\x81\x90\x10\x69\xdb" + "\xa4\xad\x8a\x5e\xac\x30\x72\xf2" + "\x36\xcd\xe3\x23\x49\x02\x93\xfa" + "\x3d\xbb\xe2\x98\x83\xeb\xe9\x8d" + "\xb3\x8f\x11\xaa\x53\xdb\xaf\x2e" + "\x95\x13\x99\x3d\x71\xbd\x32\x92" + "\xdd\xfc\x9d\x5e\x6f\x63\x2c\xee" + "\x91\x1f\x4c\x64\x3d\x87\x55\x0f" + "\xcc\x3d\x89\x61\x53\x02\x57\x8f" + "\xe4\x77\x29\x32\xaf\xa6\x2f\x0a" + "\xae\x3c\x3f\x3f\xf4\xfb\x65\x52" + "\xc5\xc1\x78\x78\x53\x28\xad\xed" + "\xd1\x67\x37\xc7\x59\x70\xcd\x0a" + "\xb8\x0f\x80\x51\x9f\xc0\x12\x5e" + "\x06\x0a\x7e\xec\x24\x5f\x73\x00" + "\xb1\x0b\x31\x47\x4f\x73\x8d\xb4" + "\xce\xf3\x55\x45\x6c\x84\x27\xba" + "\xb9\x6f\x03\x4a\xeb\x98\x88\x6e" + "\x53\xed\x25\x19\x0d\x8f\xfe\xca" + "\x60\xe5\x00\x93\x6e\x3c\xff\x19" + "\xae\x08\x3b\x8a\xa6\x84\x05\xfe" + "\x9b\x59\xa0\x8c\xc8\x05\x45\xf5" + "\x05\x37\xdc\x45\x6f\x8b\x95\x8c" + "\x4e\x11\x45\x7a\xce\x21\xa5\xf7" + "\x71\x67\xb9\xce\xd7\xf9\xe9\x5e" + "\x60\xf5\x53\x7a\xa8\x85\x14\x03" + "\xa0\x92\xec\xf3\x51\x80\x84\xc4" + "\xdc\x11\x9e\x57\xce\x4b\x45\xcf" + "\x90\x95\x85\x0b\x96\xe9\xee\x35" + "\x10\xb8\x9b\xf2\x59\x4a\xc6\x7e" + "\x85\xe5\x6f\x38\x51\x93\x40\x0c" + "\x99\xd7\x7f\x32\xa8\x06\x27\xd1" + "\x2b\xd5\xb5\x3a\x1a\xe1\x5e\xda" + "\xcd\x5a\x50\x30\x3c\xc7\xe7\x65" + "\xa6\x07\x0b\x98\x91\xc6\x20\x27" + "\x2a\x03\x63\x1b\x1e\x3d\xaf\xc8" + "\x71\x48\x46\x6a\x64\x28\xf9\x3d" + "\xd1\x1d\xab\xc8\x40\x76\xc2\x39" + "\x4e\x00\x75\xd2\x0e\x82\x58\x8c" + "\xd3\x73\x5a\xea\x46\x89\xbe\xfd" + "\x4e\x2c\x0d\x94\xaa\x9b\x68\xac" + "\x86\x87\x30\x7e\xa9\x16\xcd\x59" + "\xd2\xa6\xbe\x0a\xd8\xf5\xfd\x2d" + "\x49\x69\xd2\x1a\x90\xd2\x1b\xed" + "\xff\x71\x04\x87\x87\x21\xc4\xb8" + "\x1f\x5b\x51\x33\xd0\xd6\x59\x9a" + "\x03\x0e\xd3\x8b\xfb\x57\x73\xfd" + "\x5a\x52\x63\x82\xc8\x85\x2f\xcb" + "\x74\x6d\x4e\xd9\x68\x37\x85\x6a" + "\xd4\xfb\x94\xed\x8d\xd1\x1a\xaf" + "\x76\xa7\xb7\x88\xd0\x2b\x4e\xda" + "\xec\x99\x94\x27\x6f\x87\x8c\xdf" + "\x4b\x5e\xa6\x66\xdd\xcb\x33\x7b" + "\x64\x94\x31\xa8\x37\xa6\x1d\xdb" + "\x0d\x5c\x93\xa4\x40\xf9\x30\x53" + "\x4b\x74\x8d\xdd\xf6\xde\x3c\xac" + "\x5c\x80\x01\x3a\xef\xb1\x9a\x02" + "\x0c\x22\x8e\xe7\x44\x09\x74\x4c" + "\xf2\x9a\x27\x69\x7f\x12\x32\x36" + "\xde\x92\xdf\xde\x8f\x5b\x31\xab" + "\x4a\x01\x26\xe0\xb1\xda\xe8\x37" + "\x21\x64\xe8\xff\x69\xfc\x9e\x41" + "\xd2\x96\x2d\x18\x64\x98\x33\x78" + "\x24\x61\x73\x9b\x47\x29\xf1\xa7" + "\xcb\x27\x0f\xf0\x85\x6d\x8c\x9d" + "\x2c\x95\x9e\xe5\xb2\x8e\x30\x29" + "\x78\x8a\x9d\x65\xb4\x8e\xde\x7b" + "\xd9\x00\x50\xf5\x7f\x81\xc3\x1b" + "\x25\x85\xeb\xc2\x8c\x33\x22\x1e" + "\x68\x38\x22\x30\xd8\x2e\x00\x98" + "\x85\x16\x06\x56\xb4\x81\x74\x20" + "\x95\xdb\x1c\x05\x19\xe8\x23\x4d" + "\x65\x5d\xcc\xd8\x7f\xc4\x2d\x0f" + "\x57\x26\x71\x07\xad\xaa\x71\x9f" + "\x19\x76\x2f\x25\x51\x88\xe4\xc0" + "\x82\x6e\x08\x05\x37\x04\xee\x25" + "\x23\x90\xe9\x4e\xce\x9b\x16\xc1" + "\x31\xe7\x6e\x2c\x1b\xe1\x85\x9a" + "\x0c\x8c\xbb\x12\x1e\x68\x7b\x93" + "\xa9\x3c\x39\x56\x23\x3e\x6e\xc7" + "\x77\x84\xd3\xe0\x86\x59\xaa\xb9" + "\xd5\x53\x58\xc9\x0a\x83\x5f\x85" + "\xd8\x47\x14\x67\x8a\x3c\x17\xe0" + "\xab\x02\x51\xea\xf1\xf0\x4f\x30" + "\x7d\xe0\x92\xc2\x5f\xfb\x19\x5a" + "\x3f\xbd\xf4\x39\xa4\x31\x0c\x39" + "\xd1\xae\x4e\xf7\x65\x7f\x1f\xce" + "\xc2\x39\xd1\x84\xd4\xe5\x02\xe0" + "\x58\xaa\xf1\x5e\x81\xaf\x7f\x72" + "\x0f\x08\x99\x43\xb9\xd8\xac\x41" + "\x35\x55\xf2\xb2\xd4\x98\xb8\x3b" + "\x2b\x3c\x3e\x16\x06\x31\xfc\x79" + "\x47\x38\x63\x51\xc5\xd0\x26\xd7" + "\x43\xb4\x2b\xd9\xc5\x05\xf2\x9d" + "\x18\xc9\x26\x82\x56\xd2\x11\x05" + "\xb6\x89\xb4\x43\x9c\xb5\x9d\x11" + "\x6c\x83\x37\x71\x27\x1c\xae\xbf" + "\xcd\x57\xd2\xee\x0d\x5a\x15\x26" + "\x67\x88\x80\x80\x1b\xdc\xc1\x62" + "\xdd\x4c\xff\x92\x5c\x6c\xe1\xa0" + "\xe3\x79\xa9\x65\x8c\x8c\x14\x42" + "\xe5\x11\xd2\x1a\xad\xa9\x56\x6f" + "\x98\xfc\x8a\x7b\x56\x1f\xc6\xc1" + "\x52\x12\x92\x9b\x41\x0f\x4b\xae" + "\x1b\x4a\xbc\xfe\x23\xb6\x94\x70" + "\x04\x30\x9e\x69\x47\xbe\xb8\x8f" + "\xca\x45\xd7\x8a\xf4\x78\x3e\xaa" + "\x71\x17\xd8\x1e\xb8\x11\x8f\xbc" + "\xc8\x1a\x65\x7b\x41\x89\x72\xc7" + "\x5f\xbe\xc5\x2a\xdb\x5c\x54\xf9" + "\x25\xa3\x7a\x80\x56\x9c\x8c\xab" + "\x26\x19\x10\x36\xa6\xf3\x14\x79" + "\x40\x98\x70\x68\xb7\x35\xd9\xb9" + "\x27\xd4\xe7\x74\x5b\x3d\x97\xb4" + "\xd9\xaa\xd9\xf2\xb5\x14\x84\x1f" + "\xa9\xde\x12\x44\x5b\x00\xc0\xbc" + "\xc8\x11\x25\x1b\x67\x7a\x15\x72" + "\xa6\x31\x6f\xf4\x68\x7a\x86\x9d" + "\x43\x1c\x5f\x16\xd3\xad\x2e\x52" + "\xf3\xb4\xc3\xfa\x27\x2e\x68\x6c" + "\x06\xe7\x4c\x4f\xa2\xe0\xe4\x21" + "\x5d\x9e\x33\x58\x8d\xbf\xd5\x70" + "\xf8\x80\xa5\xdd\xe7\x18\x79\xfa" + "\x7b\xfd\x09\x69\x2c\x37\x32\xa8" + "\x65\xfa\x8d\x8b\x5c\xcc\xe8\xf3" + "\x37\xf6\xa6\xc6\x5c\xa2\x66\x79" + "\xfa\x8a\xa7\xd1\x0b\x2e\x1b\x5e" + "\x95\x35\x00\x76\xae\x42\xf7\x50" + "\x51\x78\xfb\xb4\x28\x24\xde\x1a" + "\x70\x8b\xed\xca\x3c\x5e\xe4\xbd" + "\x28\xb5\xf3\x76\x4f\x67\x5d\x81" + "\xb2\x60\x87\xd9\x7b\x19\x1a\xa7" + "\x79\xa2\xfa\x3f\x9e\xa9\xd7\x25" + "\x61\xe1\x74\x31\xa2\x77\xa0\x1b" + "\xf6\xf7\xcb\xc5\xaa\x9e\xce\xf9" + "\x9b\x96\xef\x51\xc3\x1a\x44\x96" + "\xae\x17\x50\xab\x29\x08\xda\xcc" + "\x1a\xb3\x12\xd0\x24\xe4\xe2\xe0" + "\xc6\xe3\xcc\x82\xd0\xba\x47\x4c" + "\x3f\x49\xd7\xe8\xb6\x61\xaa\x65" + "\x25\x18\x40\x2d\x62\x25\x02\x71" + "\x61\xa2\xc1\xb2\x13\xd2\x71\x3f" + "\x43\x1a\xc9\x09\x92\xff\xd5\x57" + "\xf0\xfc\x5e\x1c\xf1\xf5\xf9\xf3" + "\x5b", + .len = 1281, + .also_non_np = 1, + .np = 3, + .tap = { 1200, 1, 80 }, + }, { + .key = "\x80\x81\x82\x83\x84\x85\x86\x87" + "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" + "\x90\x91\x92\x93\x94\x95\x96\x97" + "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f", + .klen = 32, + .iv = "\x40\x41\x42\x43\x44\x45\x46\x47" + "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" + "\x50\x51\x52\x53\x54\x55\x56\x58" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .ptext = "\x54\x68\x65\x20\x64\x68\x6f\x6c" + "\x65\x20\x28\x70\x72\x6f\x6e\x6f" + "\x75\x6e\x63\x65\x64\x20\x22\x64" + "\x6f\x6c\x65\x22\x29\x20\x69\x73" + "\x20\x61\x6c\x73\x6f\x20\x6b\x6e" + "\x6f\x77\x6e\x20\x61\x73\x20\x74" + "\x68\x65\x20\x41\x73\x69\x61\x74" + "\x69\x63\x20\x77\x69\x6c\x64\x20" + "\x64\x6f\x67\x2c\x20\x72\x65\x64" + "\x20\x64\x6f\x67\x2c\x20\x61\x6e" + "\x64\x20\x77\x68\x69\x73\x74\x6c" + "\x69\x6e\x67\x20\x64\x6f\x67\x2e" + "\x20\x49\x74\x20\x69\x73\x20\x61" + "\x62\x6f\x75\x74\x20\x74\x68\x65" + "\x20\x73\x69\x7a\x65\x20\x6f\x66" + "\x20\x61\x20\x47\x65\x72\x6d\x61" + "\x6e\x20\x73\x68\x65\x70\x68\x65" + "\x72\x64\x20\x62\x75\x74\x20\x6c" + "\x6f\x6f\x6b\x73\x20\x6d\x6f\x72" + "\x65\x20\x6c\x69\x6b\x65\x20\x61" + "\x20\x6c\x6f\x6e\x67\x2d\x6c\x65" + "\x67\x67\x65\x64\x20\x66\x6f\x78" + "\x2e\x20\x54\x68\x69\x73\x20\x68" + "\x69\x67\x68\x6c\x79\x20\x65\x6c" + "\x75\x73\x69\x76\x65\x20\x61\x6e" + "\x64\x20\x73\x6b\x69\x6c\x6c\x65" + "\x64\x20\x6a\x75\x6d\x70\x65\x72" + "\x20\x69\x73\x20\x63\x6c\x61\x73" + "\x73\x69\x66\x69\x65\x64\x20\x77" + "\x69\x74\x68\x20\x77\x6f\x6c\x76" + "\x65\x73\x2c\x20\x63\x6f\x79\x6f" + "\x74\x65\x73\x2c\x20\x6a\x61\x63" + "\x6b\x61\x6c\x73\x2c\x20\x61\x6e" + "\x64\x20\x66\x6f\x78\x65\x73\x20" + "\x69\x6e\x20\x74\x68\x65\x20\x74" + "\x61\x78\x6f\x6e\x6f\x6d\x69\x63" + "\x20\x66\x61\x6d\x69\x6c\x79\x20" + "\x43\x61\x6e\x69\x64\x61\x65\x2e", + .ctext = "\x9f\x1a\xab\x8a\x95\xf4\x7e\xcd" + "\xee\x34\xc0\x39\xd6\x23\x43\x94" + "\xf6\x01\xc1\x7f\x60\x91\xa5\x23" + "\x4a\x8a\xe6\xb1\x14\x8b\xd7\x58" + "\xee\x02\xad\xab\xce\x1e\x7d\xdf" + "\xf9\x49\x27\x69\xd0\x8d\x0c\x20" + "\x6e\x17\xc4\xae\x87\x7a\xc6\x61" + "\x91\xe2\x8e\x0a\x1d\x61\xcc\x38" + "\x02\x64\x43\x49\xc6\xb2\x59\x59" + "\x42\xe7\x9d\x83\x00\x60\x90\xd2" + "\xb9\xcd\x97\x6e\xc7\x95\x71\xbc" + "\x23\x31\x58\x07\xb3\xb4\xac\x0b" + "\x87\x64\x56\xe5\xe3\xec\x63\xa1" + "\x71\x8c\x08\x48\x33\x20\x29\x81" + "\xea\x01\x25\x20\xc3\xda\xe6\xee" + "\x6a\x03\xf6\x68\x4d\x26\xa0\x91" + "\x9e\x44\xb8\xc1\xc0\x8f\x5a\x6a" + "\xc0\xcd\xbf\x24\x5e\x40\x66\xd2" + "\x42\x24\xb5\xbf\xc1\xeb\x12\x60" + "\x56\xbe\xb1\xa6\xc4\x0f\xfc\x49" + "\x69\x9f\xcc\x06\x5c\xe3\x26\xd7" + "\x52\xc0\x42\xe8\xb4\x76\xc3\xee" + "\xb2\x97\xe3\x37\x61\x29\x5a\xb5" + "\x8e\xe8\x8c\xc5\x38\xcc\xcb\xec" + "\x64\x1a\xa9\x12\x5f\xf7\x79\xdf" + "\x64\xca\x77\x4e\xbd\xf9\x83\xa0" + "\x13\x27\x3f\x31\x03\x63\x30\x26" + "\x27\x0b\x3e\xb3\x23\x13\x61\x0b" + "\x70\x1d\xd4\xad\x85\x1e\xbf\xdf" + "\xc6\x8e\x4d\x08\xcc\x7e\x77\xbd" + "\x1e\x18\x77\x38\x3a\xfe\xc0\x5d" + "\x16\xfc\xf0\xa9\x2f\xe9\x17\xc7" + "\xd3\x23\x17\x18\xa3\xe6\x54\x77" + "\x6f\x1b\xbe\x8a\x6e\x7e\xca\x97" + "\x08\x05\x36\x76\xaf\x12\x7a\x42" + "\xf7\x7a\xc2\x35\xc3\xb4\x93\x40" + "\x54\x14\x90\xa0\x4d\x65\x1c\x37" + "\x50\x70\x44\x29\x6d\x6e\x62\x68", + .len = 304, + } +}; + +/* Adiantum test vectors from https://github.com/google/adiantum */ +static const struct cipher_testvec adiantum_xchacha12_aes_tv_template[] = { + { + .key = "\x9e\xeb\xb2\x49\x3c\x1c\xf5\xf4" + "\x6a\x99\xc2\xc4\xdf\xb1\xf4\xdd" + "\x75\x20\x57\xea\x2c\x4f\xcd\xb2" + "\xa5\x3d\x7b\x49\x1e\xab\xfd\x0f", + .klen = 32, + .iv = "\xdf\x63\xd4\xab\xd2\x49\xf3\xd8" + "\x33\x81\x37\x60\x7d\xfa\x73\x08" + "\xd8\x49\x6d\x80\xe8\x2f\x62\x54" + "\xeb\x0e\xa9\x39\x5b\x45\x7f\x8a", + .ptext = "\x67\xc9\xf2\x30\x84\x41\x8e\x43" + "\xfb\xf3\xb3\x3e\x79\x36\x7f\xe8", + .ctext = "\x6d\x32\x86\x18\x67\x86\x0f\x3f" + "\x96\x7c\x9d\x28\x0d\x53\xec\x9f", + .len = 16, + .also_non_np = 1, + .np = 2, + .tap = { 14, 2 }, + }, { + .key = "\x36\x2b\x57\x97\xf8\x5d\xcd\x99" + "\x5f\x1a\x5a\x44\x1d\x92\x0f\x27" + "\xcc\x16\xd7\x2b\x85\x63\x99\xd3" + "\xba\x96\xa1\xdb\xd2\x60\x68\xda", + .klen = 32, + .iv = "\xef\x58\x69\xb1\x2c\x5e\x9a\x47" + "\x24\xc1\xb1\x69\xe1\x12\x93\x8f" + "\x43\x3d\x6d\x00\xdb\x5e\xd8\xd9" + "\x12\x9a\xfe\xd9\xff\x2d\xaa\xc4", + .ptext = "\x5e\xa8\x68\x19\x85\x98\x12\x23" + "\x26\x0a\xcc\xdb\x0a\x04\xb9\xdf" + "\x4d\xb3\x48\x7b\xb0\xe3\xc8\x19" + "\x43\x5a\x46\x06\x94\x2d\xf2", + .ctext = "\xc7\xc6\xf1\x73\x8f\xc4\xff\x4a" + "\x39\xbe\x78\xbe\x8d\x28\xc8\x89" + "\x46\x63\xe7\x0c\x7d\x87\xe8\x4e" + "\xc9\x18\x7b\xbe\x18\x60\x50", + .len = 31, + }, { + .key = "\xa5\x28\x24\x34\x1a\x3c\xd8\xf7" + "\x05\x91\x8f\xee\x85\x1f\x35\x7f" + "\x80\x3d\xfc\x9b\x94\xf6\xfc\x9e" + "\x19\x09\x00\xa9\x04\x31\x4f\x11", + .klen = 32, + .iv = "\xa1\xba\x49\x95\xff\x34\x6d\xb8" + "\xcd\x87\x5d\x5e\xfd\xea\x85\xdb" + "\x8a\x7b\x5e\xb2\x5d\x57\xdd\x62" + "\xac\xa9\x8c\x41\x42\x94\x75\xb7", + .ptext = "\x69\xb4\xe8\x8c\x37\xe8\x67\x82" + "\xf1\xec\x5d\x04\xe5\x14\x91\x13" + "\xdf\xf2\x87\x1b\x69\x81\x1d\x71" + "\x70\x9e\x9c\x3b\xde\x49\x70\x11" + "\xa0\xa3\xdb\x0d\x54\x4f\x66\x69" + "\xd7\xdb\x80\xa7\x70\x92\x68\xce" + "\x81\x04\x2c\xc6\xab\xae\xe5\x60" + "\x15\xe9\x6f\xef\xaa\x8f\xa7\xa7" + "\x63\x8f\xf2\xf0\x77\xf1\xa8\xea" + "\xe1\xb7\x1f\x9e\xab\x9e\x4b\x3f" + "\x07\x87\x5b\x6f\xcd\xa8\xaf\xb9" + "\xfa\x70\x0b\x52\xb8\xa8\xa7\x9e" + "\x07\x5f\xa6\x0e\xb3\x9b\x79\x13" + "\x79\xc3\x3e\x8d\x1c\x2c\x68\xc8" + "\x51\x1d\x3c\x7b\x7d\x79\x77\x2a" + "\x56\x65\xc5\x54\x23\x28\xb0\x03", + .ctext = "\x9e\x16\xab\xed\x4b\xa7\x42\x5a" + "\xc6\xfb\x4e\x76\xff\xbe\x03\xa0" + "\x0f\xe3\xad\xba\xe4\x98\x2b\x0e" + "\x21\x48\xa0\xb8\x65\x48\x27\x48" + "\x84\x54\x54\xb2\x9a\x94\x7b\xe6" + "\x4b\x29\xe9\xcf\x05\x91\x80\x1a" + "\x3a\xf3\x41\x96\x85\x1d\x9f\x74" + "\x51\x56\x63\xfa\x7c\x28\x85\x49" + "\xf7\x2f\xf9\xf2\x18\x46\xf5\x33" + "\x80\xa3\x3c\xce\xb2\x57\x93\xf5" + "\xae\xbd\xa9\xf5\x7b\x30\xc4\x93" + "\x66\xe0\x30\x77\x16\xe4\xa0\x31" + "\xba\x70\xbc\x68\x13\xf5\xb0\x9a" + "\xc1\xfc\x7e\xfe\x55\x80\x5c\x48" + "\x74\xa6\xaa\xa3\xac\xdc\xc2\xf5" + "\x8d\xde\x34\x86\x78\x60\x75\x8d", + .len = 128, + .also_non_np = 1, + .np = 4, + .tap = { 104, 16, 4, 4 }, + }, { + .key = "\xd3\x81\x72\x18\x23\xff\x6f\x4a" + "\x25\x74\x29\x0d\x51\x8a\x0e\x13" + "\xc1\x53\x5d\x30\x8d\xee\x75\x0d" + "\x14\xd6\x69\xc9\x15\xa9\x0c\x60", + .klen = 32, + .iv = "\x65\x9b\xd4\xa8\x7d\x29\x1d\xf4" + "\xc4\xd6\x9b\x6a\x28\xab\x64\xe2" + "\x62\x81\x97\xc5\x81\xaa\xf9\x44" + "\xc1\x72\x59\x82\xaf\x16\xc8\x2c", + .ptext = "\xc7\x6b\x52\x6a\x10\xf0\xcc\x09" + "\xc1\x12\x1d\x6d\x21\xa6\x78\xf5" + "\x05\xa3\x69\x60\x91\x36\x98\x57" + "\xba\x0c\x14\xcc\xf3\x2d\x73\x03" + "\xc6\xb2\x5f\xc8\x16\x27\x37\x5d" + "\xd0\x0b\x87\xb2\x50\x94\x7b\x58" + "\x04\xf4\xe0\x7f\x6e\x57\x8e\xc9" + "\x41\x84\xc1\xb1\x7e\x4b\x91\x12" + "\x3a\x8b\x5d\x50\x82\x7b\xcb\xd9" + "\x9a\xd9\x4e\x18\x06\x23\x9e\xd4" + "\xa5\x20\x98\xef\xb5\xda\xe5\xc0" + "\x8a\x6a\x83\x77\x15\x84\x1e\xae" + "\x78\x94\x9d\xdf\xb7\xd1\xea\x67" + "\xaa\xb0\x14\x15\xfa\x67\x21\x84" + "\xd3\x41\x2a\xce\xba\x4b\x4a\xe8" + "\x95\x62\xa9\x55\xf0\x80\xad\xbd" + "\xab\xaf\xdd\x4f\xa5\x7c\x13\x36" + "\xed\x5e\x4f\x72\xad\x4b\xf1\xd0" + "\x88\x4e\xec\x2c\x88\x10\x5e\xea" + "\x12\xc0\x16\x01\x29\xa3\xa0\x55" + "\xaa\x68\xf3\xe9\x9d\x3b\x0d\x3b" + "\x6d\xec\xf8\xa0\x2d\xf0\x90\x8d" + "\x1c\xe2\x88\xd4\x24\x71\xf9\xb3" + "\xc1\x9f\xc5\xd6\x76\x70\xc5\x2e" + "\x9c\xac\xdb\x90\xbd\x83\x72\xba" + "\x6e\xb5\xa5\x53\x83\xa9\xa5\xbf" + "\x7d\x06\x0e\x3c\x2a\xd2\x04\xb5" + "\x1e\x19\x38\x09\x16\xd2\x82\x1f" + "\x75\x18\x56\xb8\x96\x0b\xa6\xf9" + "\xcf\x62\xd9\x32\x5d\xa9\xd7\x1d" + "\xec\xe4\xdf\x1b\xbe\xf1\x36\xee" + "\xe3\x7b\xb5\x2f\xee\xf8\x53\x3d" + "\x6a\xb7\x70\xa9\xfc\x9c\x57\x25" + "\xf2\x89\x10\xd3\xb8\xa8\x8c\x30" + "\xae\x23\x4f\x0e\x13\x66\x4f\xe1" + "\xb6\xc0\xe4\xf8\xef\x93\xbd\x6e" + "\x15\x85\x6b\xe3\x60\x81\x1d\x68" + "\xd7\x31\x87\x89\x09\xab\xd5\x96" + "\x1d\xf3\x6d\x67\x80\xca\x07\x31" + "\x5d\xa7\xe4\xfb\x3e\xf2\x9b\x33" + "\x52\x18\xc8\x30\xfe\x2d\xca\x1e" + "\x79\x92\x7a\x60\x5c\xb6\x58\x87" + "\xa4\x36\xa2\x67\x92\x8b\xa4\xb7" + "\xf1\x86\xdf\xdc\xc0\x7e\x8f\x63" + "\xd2\xa2\xdc\x78\xeb\x4f\xd8\x96" + "\x47\xca\xb8\x91\xf9\xf7\x94\x21" + "\x5f\x9a\x9f\x5b\xb8\x40\x41\x4b" + "\x66\x69\x6a\x72\xd0\xcb\x70\xb7" + "\x93\xb5\x37\x96\x05\x37\x4f\xe5" + "\x8c\xa7\x5a\x4e\x8b\xb7\x84\xea" + "\xc7\xfc\x19\x6e\x1f\x5a\xa1\xac" + "\x18\x7d\x52\x3b\xb3\x34\x62\x99" + "\xe4\x9e\x31\x04\x3f\xc0\x8d\x84" + "\x17\x7c\x25\x48\x52\x67\x11\x27" + "\x67\xbb\x5a\x85\xca\x56\xb2\x5c" + "\xe6\xec\xd5\x96\x3d\x15\xfc\xfb" + "\x22\x25\xf4\x13\xe5\x93\x4b\x9a" + "\x77\xf1\x52\x18\xfa\x16\x5e\x49" + "\x03\x45\xa8\x08\xfa\xb3\x41\x92" + "\x79\x50\x33\xca\xd0\xd7\x42\x55" + "\xc3\x9a\x0c\x4e\xd9\xa4\x3c\x86" + "\x80\x9f\x53\xd1\xa4\x2e\xd1\xbc" + "\xf1\x54\x6e\x93\xa4\x65\x99\x8e" + "\xdf\x29\xc0\x64\x63\x07\xbb\xea", + .ctext = "\x15\x97\xd0\x86\x18\x03\x9c\x51" + "\xc5\x11\x36\x62\x13\x92\xe6\x73" + "\x29\x79\xde\xa1\x00\x3e\x08\x64" + "\x17\x1a\xbc\xd5\xfe\x33\x0e\x0c" + "\x7c\x94\xa7\xc6\x3c\xbe\xac\xa2" + "\x89\xe6\xbc\xdf\x0c\x33\x27\x42" + "\x46\x73\x2f\xba\x4e\xa6\x46\x8f" + "\xe4\xee\x39\x63\x42\x65\xa3\x88" + "\x7a\xad\x33\x23\xa9\xa7\x20\x7f" + "\x0b\xe6\x6a\xc3\x60\xda\x9e\xb4" + "\xd6\x07\x8a\x77\x26\xd1\xab\x44" + "\x99\x55\x03\x5e\xed\x8d\x7b\xbd" + "\xc8\x21\xb7\x21\x30\x3f\xc0\xb5" + "\xc8\xec\x6c\x23\xa6\xa3\x6d\xf1" + "\x30\x0a\xd0\xa6\xa9\x28\x69\xae" + "\x2a\xe6\x54\xac\x82\x9d\x6a\x95" + "\x6f\x06\x44\xc5\x5a\x77\x6e\xec" + "\xf8\xf8\x63\xb2\xe6\xaa\xbd\x8e" + "\x0e\x8a\x62\x00\x03\xc8\x84\xdd" + "\x47\x4a\xc3\x55\xba\xb7\xe7\xdf" + "\x08\xbf\x62\xf5\xe8\xbc\xb6\x11" + "\xe4\xcb\xd0\x66\x74\x32\xcf\xd4" + "\xf8\x51\x80\x39\x14\x05\x12\xdb" + "\x87\x93\xe2\x26\x30\x9c\x3a\x21" + "\xe5\xd0\x38\x57\x80\x15\xe4\x08" + "\x58\x05\x49\x7d\xe6\x92\x77\x70" + "\xfb\x1e\x2d\x6a\x84\x00\xc8\x68" + "\xf7\x1a\xdd\xf0\x7b\x38\x1e\xd8" + "\x2c\x78\x78\x61\xcf\xe3\xde\x69" + "\x1f\xd5\x03\xd5\x1a\xb4\xcf\x03" + "\xc8\x7a\x70\x68\x35\xb4\xf6\xbe" + "\x90\x62\xb2\x28\x99\x86\xf5\x44" + "\x99\xeb\x31\xcf\xca\xdf\xd0\x21" + "\xd6\x60\xf7\x0f\x40\xb4\x80\xb7" + "\xab\xe1\x9b\x45\xba\x66\xda\xee" + "\xdd\x04\x12\x40\x98\xe1\x69\xe5" + "\x2b\x9c\x59\x80\xe7\x7b\xcc\x63" + "\xa6\xc0\x3a\xa9\xfe\x8a\xf9\x62" + "\x11\x34\x61\x94\x35\xfe\xf2\x99" + "\xfd\xee\x19\xea\x95\xb6\x12\xbf" + "\x1b\xdf\x02\x1a\xcc\x3e\x7e\x65" + "\x78\x74\x10\x50\x29\x63\x28\xea" + "\x6b\xab\xd4\x06\x4d\x15\x24\x31" + "\xc7\x0a\xc9\x16\xb6\x48\xf0\xbf" + "\x49\xdb\x68\x71\x31\x8f\x87\xe2" + "\x13\x05\x64\xd6\x22\x0c\xf8\x36" + "\x84\x24\x3e\x69\x5e\xb8\x9e\x16" + "\x73\x6c\x83\x1e\xe0\x9f\x9e\xba" + "\xe5\x59\x21\x33\x1b\xa9\x26\xc2" + "\xc7\xd9\x30\x73\xb6\xa6\x73\x82" + "\x19\xfa\x44\x4d\x40\x8b\x69\x04" + "\x94\x74\xea\x6e\xb3\x09\x47\x01" + "\x2a\xb9\x78\x34\x43\x11\xed\xd6" + "\x8c\x95\x65\x1b\x85\x67\xa5\x40" + "\xac\x9c\x05\x4b\x57\x4a\xa9\x96" + "\x0f\xdd\x4f\xa1\xe0\xcf\x6e\xc7" + "\x1b\xed\xa2\xb4\x56\x8c\x09\x6e" + "\xa6\x65\xd7\x55\x81\xb7\xed\x11" + "\x9b\x40\x75\xa8\x6b\x56\xaf\x16" + "\x8b\x3d\xf4\xcb\xfe\xd5\x1d\x3d" + "\x85\xc2\xc0\xde\x43\x39\x4a\x96" + "\xba\x88\x97\xc0\xd6\x00\x0e\x27" + "\x21\xb0\x21\x52\xba\xa7\x37\xaa" + "\xcc\xbf\x95\xa8\xf4\xd0\x91\xf6", + .len = 512, + .also_non_np = 1, + .np = 2, + .tap = { 144, 368 }, + } +}; + +/* Adiantum with XChaCha20 instead of XChaCha12 */ +/* Test vectors from https://github.com/google/adiantum */ +static const struct cipher_testvec adiantum_xchacha20_aes_tv_template[] = { + { + .key = "\x9e\xeb\xb2\x49\x3c\x1c\xf5\xf4" + "\x6a\x99\xc2\xc4\xdf\xb1\xf4\xdd" + "\x75\x20\x57\xea\x2c\x4f\xcd\xb2" + "\xa5\x3d\x7b\x49\x1e\xab\xfd\x0f", + .klen = 32, + .iv = "\xdf\x63\xd4\xab\xd2\x49\xf3\xd8" + "\x33\x81\x37\x60\x7d\xfa\x73\x08" + "\xd8\x49\x6d\x80\xe8\x2f\x62\x54" + "\xeb\x0e\xa9\x39\x5b\x45\x7f\x8a", + .ptext = "\x67\xc9\xf2\x30\x84\x41\x8e\x43" + "\xfb\xf3\xb3\x3e\x79\x36\x7f\xe8", + .ctext = "\xf6\x78\x97\xd6\xaa\x94\x01\x27" + "\x2e\x4d\x83\xe0\x6e\x64\x9a\xdf", + .len = 16, + .also_non_np = 1, + .np = 3, + .tap = { 5, 2, 9 }, + }, { + .key = "\x36\x2b\x57\x97\xf8\x5d\xcd\x99" + "\x5f\x1a\x5a\x44\x1d\x92\x0f\x27" + "\xcc\x16\xd7\x2b\x85\x63\x99\xd3" + "\xba\x96\xa1\xdb\xd2\x60\x68\xda", + .klen = 32, + .iv = "\xef\x58\x69\xb1\x2c\x5e\x9a\x47" + "\x24\xc1\xb1\x69\xe1\x12\x93\x8f" + "\x43\x3d\x6d\x00\xdb\x5e\xd8\xd9" + "\x12\x9a\xfe\xd9\xff\x2d\xaa\xc4", + .ptext = "\x5e\xa8\x68\x19\x85\x98\x12\x23" + "\x26\x0a\xcc\xdb\x0a\x04\xb9\xdf" + "\x4d\xb3\x48\x7b\xb0\xe3\xc8\x19" + "\x43\x5a\x46\x06\x94\x2d\xf2", + .ctext = "\x4b\xb8\x90\x10\xdf\x7f\x64\x08" + "\x0e\x14\x42\x5f\x00\x74\x09\x36" + "\x57\x72\xb5\xfd\xb5\x5d\xb8\x28" + "\x0c\x04\x91\x14\x91\xe9\x37", + .len = 31, + .also_non_np = 1, + .np = 2, + .tap = { 16, 15 }, + }, { + .key = "\xa5\x28\x24\x34\x1a\x3c\xd8\xf7" + "\x05\x91\x8f\xee\x85\x1f\x35\x7f" + "\x80\x3d\xfc\x9b\x94\xf6\xfc\x9e" + "\x19\x09\x00\xa9\x04\x31\x4f\x11", + .klen = 32, + .iv = "\xa1\xba\x49\x95\xff\x34\x6d\xb8" + "\xcd\x87\x5d\x5e\xfd\xea\x85\xdb" + "\x8a\x7b\x5e\xb2\x5d\x57\xdd\x62" + "\xac\xa9\x8c\x41\x42\x94\x75\xb7", + .ptext = "\x69\xb4\xe8\x8c\x37\xe8\x67\x82" + "\xf1\xec\x5d\x04\xe5\x14\x91\x13" + "\xdf\xf2\x87\x1b\x69\x81\x1d\x71" + "\x70\x9e\x9c\x3b\xde\x49\x70\x11" + "\xa0\xa3\xdb\x0d\x54\x4f\x66\x69" + "\xd7\xdb\x80\xa7\x70\x92\x68\xce" + "\x81\x04\x2c\xc6\xab\xae\xe5\x60" + "\x15\xe9\x6f\xef\xaa\x8f\xa7\xa7" + "\x63\x8f\xf2\xf0\x77\xf1\xa8\xea" + "\xe1\xb7\x1f\x9e\xab\x9e\x4b\x3f" + "\x07\x87\x5b\x6f\xcd\xa8\xaf\xb9" + "\xfa\x70\x0b\x52\xb8\xa8\xa7\x9e" + "\x07\x5f\xa6\x0e\xb3\x9b\x79\x13" + "\x79\xc3\x3e\x8d\x1c\x2c\x68\xc8" + "\x51\x1d\x3c\x7b\x7d\x79\x77\x2a" + "\x56\x65\xc5\x54\x23\x28\xb0\x03", + .ctext = "\xb1\x8b\xa0\x05\x77\xa8\x4d\x59" + "\x1b\x8e\x21\xfc\x3a\x49\xfa\xd4" + "\xeb\x36\xf3\xc4\xdf\xdc\xae\x67" + "\x07\x3f\x70\x0e\xe9\x66\xf5\x0c" + "\x30\x4d\x66\xc9\xa4\x2f\x73\x9c" + "\x13\xc8\x49\x44\xcc\x0a\x90\x9d" + "\x7c\xdd\x19\x3f\xea\x72\x8d\x58" + "\xab\xe7\x09\x2c\xec\xb5\x44\xd2" + "\xca\xa6\x2d\x7a\x5c\x9c\x2b\x15" + "\xec\x2a\xa6\x69\x91\xf9\xf3\x13" + "\xf7\x72\xc1\xc1\x40\xd5\xe1\x94" + "\xf4\x29\xa1\x3e\x25\x02\xa8\x3e" + "\x94\xc1\x91\x14\xa1\x14\xcb\xbe" + "\x67\x4c\xb9\x38\xfe\xa7\xaa\x32" + "\x29\x62\x0d\xb2\xf6\x3c\x58\x57" + "\xc1\xd5\x5a\xbb\xd6\xa6\x2a\xe5", + .len = 128, + .also_non_np = 1, + .np = 4, + .tap = { 112, 7, 8, 1 }, + }, { + .key = "\xd3\x81\x72\x18\x23\xff\x6f\x4a" + "\x25\x74\x29\x0d\x51\x8a\x0e\x13" + "\xc1\x53\x5d\x30\x8d\xee\x75\x0d" + "\x14\xd6\x69\xc9\x15\xa9\x0c\x60", + .klen = 32, + .iv = "\x65\x9b\xd4\xa8\x7d\x29\x1d\xf4" + "\xc4\xd6\x9b\x6a\x28\xab\x64\xe2" + "\x62\x81\x97\xc5\x81\xaa\xf9\x44" + "\xc1\x72\x59\x82\xaf\x16\xc8\x2c", + .ptext = "\xc7\x6b\x52\x6a\x10\xf0\xcc\x09" + "\xc1\x12\x1d\x6d\x21\xa6\x78\xf5" + "\x05\xa3\x69\x60\x91\x36\x98\x57" + "\xba\x0c\x14\xcc\xf3\x2d\x73\x03" + "\xc6\xb2\x5f\xc8\x16\x27\x37\x5d" + "\xd0\x0b\x87\xb2\x50\x94\x7b\x58" + "\x04\xf4\xe0\x7f\x6e\x57\x8e\xc9" + "\x41\x84\xc1\xb1\x7e\x4b\x91\x12" + "\x3a\x8b\x5d\x50\x82\x7b\xcb\xd9" + "\x9a\xd9\x4e\x18\x06\x23\x9e\xd4" + "\xa5\x20\x98\xef\xb5\xda\xe5\xc0" + "\x8a\x6a\x83\x77\x15\x84\x1e\xae" + "\x78\x94\x9d\xdf\xb7\xd1\xea\x67" + "\xaa\xb0\x14\x15\xfa\x67\x21\x84" + "\xd3\x41\x2a\xce\xba\x4b\x4a\xe8" + "\x95\x62\xa9\x55\xf0\x80\xad\xbd" + "\xab\xaf\xdd\x4f\xa5\x7c\x13\x36" + "\xed\x5e\x4f\x72\xad\x4b\xf1\xd0" + "\x88\x4e\xec\x2c\x88\x10\x5e\xea" + "\x12\xc0\x16\x01\x29\xa3\xa0\x55" + "\xaa\x68\xf3\xe9\x9d\x3b\x0d\x3b" + "\x6d\xec\xf8\xa0\x2d\xf0\x90\x8d" + "\x1c\xe2\x88\xd4\x24\x71\xf9\xb3" + "\xc1\x9f\xc5\xd6\x76\x70\xc5\x2e" + "\x9c\xac\xdb\x90\xbd\x83\x72\xba" + "\x6e\xb5\xa5\x53\x83\xa9\xa5\xbf" + "\x7d\x06\x0e\x3c\x2a\xd2\x04\xb5" + "\x1e\x19\x38\x09\x16\xd2\x82\x1f" + "\x75\x18\x56\xb8\x96\x0b\xa6\xf9" + "\xcf\x62\xd9\x32\x5d\xa9\xd7\x1d" + "\xec\xe4\xdf\x1b\xbe\xf1\x36\xee" + "\xe3\x7b\xb5\x2f\xee\xf8\x53\x3d" + "\x6a\xb7\x70\xa9\xfc\x9c\x57\x25" + "\xf2\x89\x10\xd3\xb8\xa8\x8c\x30" + "\xae\x23\x4f\x0e\x13\x66\x4f\xe1" + "\xb6\xc0\xe4\xf8\xef\x93\xbd\x6e" + "\x15\x85\x6b\xe3\x60\x81\x1d\x68" + "\xd7\x31\x87\x89\x09\xab\xd5\x96" + "\x1d\xf3\x6d\x67\x80\xca\x07\x31" + "\x5d\xa7\xe4\xfb\x3e\xf2\x9b\x33" + "\x52\x18\xc8\x30\xfe\x2d\xca\x1e" + "\x79\x92\x7a\x60\x5c\xb6\x58\x87" + "\xa4\x36\xa2\x67\x92\x8b\xa4\xb7" + "\xf1\x86\xdf\xdc\xc0\x7e\x8f\x63" + "\xd2\xa2\xdc\x78\xeb\x4f\xd8\x96" + "\x47\xca\xb8\x91\xf9\xf7\x94\x21" + "\x5f\x9a\x9f\x5b\xb8\x40\x41\x4b" + "\x66\x69\x6a\x72\xd0\xcb\x70\xb7" + "\x93\xb5\x37\x96\x05\x37\x4f\xe5" + "\x8c\xa7\x5a\x4e\x8b\xb7\x84\xea" + "\xc7\xfc\x19\x6e\x1f\x5a\xa1\xac" + "\x18\x7d\x52\x3b\xb3\x34\x62\x99" + "\xe4\x9e\x31\x04\x3f\xc0\x8d\x84" + "\x17\x7c\x25\x48\x52\x67\x11\x27" + "\x67\xbb\x5a\x85\xca\x56\xb2\x5c" + "\xe6\xec\xd5\x96\x3d\x15\xfc\xfb" + "\x22\x25\xf4\x13\xe5\x93\x4b\x9a" + "\x77\xf1\x52\x18\xfa\x16\x5e\x49" + "\x03\x45\xa8\x08\xfa\xb3\x41\x92" + "\x79\x50\x33\xca\xd0\xd7\x42\x55" + "\xc3\x9a\x0c\x4e\xd9\xa4\x3c\x86" + "\x80\x9f\x53\xd1\xa4\x2e\xd1\xbc" + "\xf1\x54\x6e\x93\xa4\x65\x99\x8e" + "\xdf\x29\xc0\x64\x63\x07\xbb\xea", + .ctext = "\xe0\x33\xf6\xe0\xb4\xa5\xdd\x2b" + "\xdd\xce\xfc\x12\x1e\xfc\x2d\xf2" + "\x8b\xc7\xeb\xc1\xc4\x2a\xe8\x44" + "\x0f\x3d\x97\x19\x2e\x6d\xa2\x38" + "\x9d\xa6\xaa\xe1\x96\xb9\x08\xe8" + "\x0b\x70\x48\x5c\xed\xb5\x9b\xcb" + "\x8b\x40\x88\x7e\x69\x73\xf7\x16" + "\x71\xbb\x5b\xfc\xa3\x47\x5d\xa6" + "\xae\x3a\x64\xc4\xe7\xb8\xa8\xe7" + "\xb1\x32\x19\xdb\xe3\x01\xb8\xf0" + "\xa4\x86\xb4\x4c\xc2\xde\x5c\xd2" + "\x6c\x77\xd2\xe8\x18\xb7\x0a\xc9" + "\x3d\x53\xb5\xc4\x5c\xf0\x8c\x06" + "\xdc\x90\xe0\x74\x47\x1b\x0b\xf6" + "\xd2\x71\x6b\xc4\xf1\x97\x00\x2d" + "\x63\x57\x44\x1f\x8c\xf4\xe6\x9b" + "\xe0\x7a\xdd\xec\x32\x73\x42\x32" + "\x7f\x35\x67\x60\x0d\xcf\x10\x52" + "\x61\x22\x53\x8d\x8e\xbb\x33\x76" + "\x59\xd9\x10\xce\xdf\xef\xc0\x41" + "\xd5\x33\x29\x6a\xda\x46\xa4\x51" + "\xf0\x99\x3d\x96\x31\xdd\xb5\xcb" + "\x3e\x2a\x1f\xc7\x5c\x79\xd3\xc5" + "\x20\xa1\xb1\x39\x1b\xc6\x0a\x70" + "\x26\x39\x95\x07\xad\x7a\xc9\x69" + "\xfe\x81\xc7\x88\x08\x38\xaf\xad" + "\x9e\x8d\xfb\xe8\x24\x0d\x22\xb8" + "\x0e\xed\xbe\x37\x53\x7c\xa6\xc6" + "\x78\x62\xec\xa3\x59\xd9\xc6\x9d" + "\xb8\x0e\x69\x77\x84\x2d\x6a\x4c" + "\xc5\xd9\xb2\xa0\x2b\xa8\x80\xcc" + "\xe9\x1e\x9c\x5a\xc4\xa1\xb2\x37" + "\x06\x9b\x30\x32\x67\xf7\xe7\xd2" + "\x42\xc7\xdf\x4e\xd4\xcb\xa0\x12" + "\x94\xa1\x34\x85\x93\x50\x4b\x0a" + "\x3c\x7d\x49\x25\x01\x41\x6b\x96" + "\xa9\x12\xbb\x0b\xc0\xd7\xd0\x93" + "\x1f\x70\x38\xb8\x21\xee\xf6\xa7" + "\xee\xeb\xe7\x81\xa4\x13\xb4\x87" + "\xfa\xc1\xb0\xb5\x37\x8b\x74\xa2" + "\x4e\xc7\xc2\xad\x3d\x62\x3f\xf8" + "\x34\x42\xe5\xae\x45\x13\x63\xfe" + "\xfc\x2a\x17\x46\x61\xa9\xd3\x1c" + "\x4c\xaf\xf0\x09\x62\x26\x66\x1e" + "\x74\xcf\xd6\x68\x3d\x7d\xd8\xb7" + "\xe7\xe6\xf8\xf0\x08\x20\xf7\x47" + "\x1c\x52\xaa\x0f\x3e\x21\xa3\xf2" + "\xbf\x2f\x95\x16\xa8\xc8\xc8\x8c" + "\x99\x0f\x5d\xfb\xfa\x2b\x58\x8a" + "\x7e\xd6\x74\x02\x60\xf0\xd0\x5b" + "\x65\xa8\xac\xea\x8d\x68\x46\x34" + "\x26\x9d\x4f\xb1\x9a\x8e\xc0\x1a" + "\xf1\xed\xc6\x7a\x83\xfd\x8a\x57" + "\xf2\xe6\xe4\xba\xfc\xc6\x3c\xad" + "\x5b\x19\x50\x2f\x3a\xcc\x06\x46" + "\x04\x51\x3f\x91\x97\xf0\xd2\x07" + "\xe7\x93\x89\x7e\xb5\x32\x0f\x03" + "\xe5\x58\x9e\x74\x72\xeb\xc2\x38" + "\x00\x0c\x91\x72\x69\xed\x7d\x6d" + "\xc8\x71\xf0\xec\xff\x80\xd9\x1c" + "\x9e\xd2\xfa\x15\xfc\x6c\x4e\xbc" + "\xb1\xa6\xbd\xbd\x70\x40\xca\x20" + "\xb8\x78\xd2\xa3\xc6\xf3\x79\x9c" + "\xc7\x27\xe1\x6a\x29\xad\xa4\x03", + .len = 512, + } +}; + /* * CTS (Cipher Text Stealing) mode tests */ diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 61c392752fe4..ccfcf00f2798 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3623,7 +3623,7 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in * change. */ - peer_integrity_tfm = crypto_alloc_shash(integrity_alg, 0, CRYPTO_ALG_ASYNC); + peer_integrity_tfm = crypto_alloc_shash(integrity_alg, 0, 0); if (IS_ERR(peer_integrity_tfm)) { peer_integrity_tfm = NULL; drbd_err(connection, "peer data-integrity-alg %s not supported\n", diff --git a/drivers/char/hw_random/bcm2835-rng.c b/drivers/char/hw_random/bcm2835-rng.c index 6767d965c36c..256b0b1d0f26 100644 --- a/drivers/char/hw_random/bcm2835-rng.c +++ b/drivers/char/hw_random/bcm2835-rng.c @@ -1,10 +1,7 @@ -/** +// SPDX-License-Identifier: GPL-2.0 +/* * Copyright (c) 2010-2012 Broadcom. All rights reserved. * Copyright (c) 2013 Lubomir Rintel - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License ("GPL") - * version 2, as published by the Free Software Foundation. */ #include diff --git a/drivers/char/random.c b/drivers/char/random.c index 2eb70e76ed35..38c6d1af6d1c 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -265,7 +265,7 @@ #include #include #include -#include +#include #include #include @@ -431,11 +431,10 @@ static int crng_init = 0; #define crng_ready() (likely(crng_init > 1)) static int crng_init_cnt = 0; static unsigned long crng_global_init_time = 0; -#define CRNG_INIT_CNT_THRESH (2*CHACHA20_KEY_SIZE) -static void _extract_crng(struct crng_state *crng, - __u8 out[CHACHA20_BLOCK_SIZE]); +#define CRNG_INIT_CNT_THRESH (2*CHACHA_KEY_SIZE) +static void _extract_crng(struct crng_state *crng, __u8 out[CHACHA_BLOCK_SIZE]); static void _crng_backtrack_protect(struct crng_state *crng, - __u8 tmp[CHACHA20_BLOCK_SIZE], int used); + __u8 tmp[CHACHA_BLOCK_SIZE], int used); static void process_random_ready_list(void); static void _get_random_bytes(void *buf, int nbytes); @@ -863,7 +862,7 @@ static int crng_fast_load(const char *cp, size_t len) } p = (unsigned char *) &primary_crng.state[4]; while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) { - p[crng_init_cnt % CHACHA20_KEY_SIZE] ^= *cp; + p[crng_init_cnt % CHACHA_KEY_SIZE] ^= *cp; cp++; crng_init_cnt++; len--; } spin_unlock_irqrestore(&primary_crng.lock, flags); @@ -895,7 +894,7 @@ static int crng_slow_load(const char *cp, size_t len) unsigned long flags; static unsigned char lfsr = 1; unsigned char tmp; - unsigned i, max = CHACHA20_KEY_SIZE; + unsigned i, max = CHACHA_KEY_SIZE; const char * src_buf = cp; char * dest_buf = (char *) &primary_crng.state[4]; @@ -913,8 +912,8 @@ static int crng_slow_load(const char *cp, size_t len) lfsr >>= 1; if (tmp & 1) lfsr ^= 0xE1; - tmp = dest_buf[i % CHACHA20_KEY_SIZE]; - dest_buf[i % CHACHA20_KEY_SIZE] ^= src_buf[i % len] ^ lfsr; + tmp = dest_buf[i % CHACHA_KEY_SIZE]; + dest_buf[i % CHACHA_KEY_SIZE] ^= src_buf[i % len] ^ lfsr; lfsr += (tmp << 3) | (tmp >> 5); } spin_unlock_irqrestore(&primary_crng.lock, flags); @@ -926,7 +925,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) unsigned long flags; int i, num; union { - __u8 block[CHACHA20_BLOCK_SIZE]; + __u8 block[CHACHA_BLOCK_SIZE]; __u32 key[8]; } buf; @@ -937,7 +936,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) } else { _extract_crng(&primary_crng, buf.block); _crng_backtrack_protect(&primary_crng, buf.block, - CHACHA20_KEY_SIZE); + CHACHA_KEY_SIZE); } spin_lock_irqsave(&crng->lock, flags); for (i = 0; i < 8; i++) { @@ -973,7 +972,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) } static void _extract_crng(struct crng_state *crng, - __u8 out[CHACHA20_BLOCK_SIZE]) + __u8 out[CHACHA_BLOCK_SIZE]) { unsigned long v, flags; @@ -990,7 +989,7 @@ static void _extract_crng(struct crng_state *crng, spin_unlock_irqrestore(&crng->lock, flags); } -static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE]) +static void extract_crng(__u8 out[CHACHA_BLOCK_SIZE]) { struct crng_state *crng = NULL; @@ -1008,14 +1007,14 @@ static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE]) * enough) to mutate the CRNG key to provide backtracking protection. */ static void _crng_backtrack_protect(struct crng_state *crng, - __u8 tmp[CHACHA20_BLOCK_SIZE], int used) + __u8 tmp[CHACHA_BLOCK_SIZE], int used) { unsigned long flags; __u32 *s, *d; int i; used = round_up(used, sizeof(__u32)); - if (used + CHACHA20_KEY_SIZE > CHACHA20_BLOCK_SIZE) { + if (used + CHACHA_KEY_SIZE > CHACHA_BLOCK_SIZE) { extract_crng(tmp); used = 0; } @@ -1027,7 +1026,7 @@ static void _crng_backtrack_protect(struct crng_state *crng, spin_unlock_irqrestore(&crng->lock, flags); } -static void crng_backtrack_protect(__u8 tmp[CHACHA20_BLOCK_SIZE], int used) +static void crng_backtrack_protect(__u8 tmp[CHACHA_BLOCK_SIZE], int used) { struct crng_state *crng = NULL; @@ -1042,8 +1041,8 @@ static void crng_backtrack_protect(__u8 tmp[CHACHA20_BLOCK_SIZE], int used) static ssize_t extract_crng_user(void __user *buf, size_t nbytes) { - ssize_t ret = 0, i = CHACHA20_BLOCK_SIZE; - __u8 tmp[CHACHA20_BLOCK_SIZE] __aligned(4); + ssize_t ret = 0, i = CHACHA_BLOCK_SIZE; + __u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4); int large_request = (nbytes > 256); while (nbytes) { @@ -1057,7 +1056,7 @@ static ssize_t extract_crng_user(void __user *buf, size_t nbytes) } extract_crng(tmp); - i = min_t(int, nbytes, CHACHA20_BLOCK_SIZE); + i = min_t(int, nbytes, CHACHA_BLOCK_SIZE); if (copy_to_user(buf, tmp, i)) { ret = -EFAULT; break; @@ -1622,14 +1621,14 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, */ static void _get_random_bytes(void *buf, int nbytes) { - __u8 tmp[CHACHA20_BLOCK_SIZE] __aligned(4); + __u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4); trace_get_random_bytes(nbytes, _RET_IP_); - while (nbytes >= CHACHA20_BLOCK_SIZE) { + while (nbytes >= CHACHA_BLOCK_SIZE) { extract_crng(buf); - buf += CHACHA20_BLOCK_SIZE; - nbytes -= CHACHA20_BLOCK_SIZE; + buf += CHACHA_BLOCK_SIZE; + nbytes -= CHACHA_BLOCK_SIZE; } if (nbytes > 0) { @@ -1637,7 +1636,7 @@ static void _get_random_bytes(void *buf, int nbytes) memcpy(buf, tmp, nbytes); crng_backtrack_protect(tmp, nbytes); } else - crng_backtrack_protect(tmp, CHACHA20_BLOCK_SIZE); + crng_backtrack_protect(tmp, CHACHA_BLOCK_SIZE); memzero_explicit(tmp, sizeof(tmp)); } @@ -2208,8 +2207,8 @@ struct ctl_table random_table[] = { struct batched_entropy { union { - u64 entropy_u64[CHACHA20_BLOCK_SIZE / sizeof(u64)]; - u32 entropy_u32[CHACHA20_BLOCK_SIZE / sizeof(u32)]; + u64 entropy_u64[CHACHA_BLOCK_SIZE / sizeof(u64)]; + u32 entropy_u32[CHACHA_BLOCK_SIZE / sizeof(u32)]; }; unsigned int position; }; diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index caa98a7fe392..d80751d48cf1 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -762,10 +762,12 @@ config CRYPTO_DEV_CCREE select CRYPTO_ECB select CRYPTO_CTR select CRYPTO_XTS + select CRYPTO_SM4 + select CRYPTO_SM3 help Say 'Y' to enable a driver for the REE interface of the Arm TrustZone CryptoCell family of processors. Currently the - CryptoCell 712, 710 and 630 are supported. + CryptoCell 713, 703, 712, 710 and 630 are supported. Choose this if you wish to use hardware acceleration of cryptographic operations on the system REE. If unsure say Y. diff --git a/drivers/crypto/amcc/crypto4xx_alg.c b/drivers/crypto/amcc/crypto4xx_alg.c index f5c07498ea4f..4092c2aad8e2 100644 --- a/drivers/crypto/amcc/crypto4xx_alg.c +++ b/drivers/crypto/amcc/crypto4xx_alg.c @@ -520,8 +520,7 @@ static int crypto4xx_compute_gcm_hash_key_sw(__le32 *hash_start, const u8 *key, uint8_t src[16] = { 0 }; int rc = 0; - aes_tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC | - CRYPTO_ALG_NEED_FALLBACK); + aes_tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(aes_tfm)) { rc = PTR_ERR(aes_tfm); pr_warn("could not load aes cipher driver: %d\n", rc); diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c index 2d1f1db9f807..c9393ffb70ed 100644 --- a/drivers/crypto/bcm/cipher.c +++ b/drivers/crypto/bcm/cipher.c @@ -3868,7 +3868,6 @@ static struct iproc_alg_s driver_algs[] = { .cra_driver_name = "ctr-aes-iproc", .cra_blocksize = AES_BLOCK_SIZE, .cra_ablkcipher = { - /* .geniv = "chainiv", */ .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, .ivsize = AES_BLOCK_SIZE, @@ -4605,7 +4604,6 @@ static int spu_register_ablkcipher(struct iproc_alg_s *driver_alg) crypto->cra_priority = cipher_pri; crypto->cra_alignmask = 0; crypto->cra_ctxsize = sizeof(struct iproc_ctx_s); - INIT_LIST_HEAD(&crypto->cra_list); crypto->cra_init = ablkcipher_cra_init; crypto->cra_exit = generic_cra_exit; @@ -4652,12 +4650,16 @@ static int spu_register_ahash(struct iproc_alg_s *driver_alg) hash->halg.statesize = sizeof(struct spu_hash_export_s); if (driver_alg->auth_info.mode != HASH_MODE_HMAC) { - hash->setkey = ahash_setkey; hash->init = ahash_init; hash->update = ahash_update; hash->final = ahash_final; hash->finup = ahash_finup; hash->digest = ahash_digest; + if ((driver_alg->auth_info.alg == HASH_ALG_AES) && + ((driver_alg->auth_info.mode == HASH_MODE_XCBC) || + (driver_alg->auth_info.mode == HASH_MODE_CMAC))) { + hash->setkey = ahash_setkey; + } } else { hash->setkey = ahash_hmac_setkey; hash->init = ahash_hmac_init; @@ -4687,7 +4689,6 @@ static int spu_register_aead(struct iproc_alg_s *driver_alg) aead->base.cra_priority = aead_pri; aead->base.cra_alignmask = 0; aead->base.cra_ctxsize = sizeof(struct iproc_ctx_s); - INIT_LIST_HEAD(&aead->base.cra_list); aead->base.cra_flags |= CRYPTO_ALG_ASYNC; /* setkey set in alg initialization */ diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index 869f092432de..92e593e2069a 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -72,6 +72,8 @@ #define AUTHENC_DESC_JOB_IO_LEN (AEAD_DESC_JOB_IO_LEN + \ CAAM_CMD_SZ * 5) +#define CHACHAPOLY_DESC_JOB_IO_LEN (AEAD_DESC_JOB_IO_LEN + CAAM_CMD_SZ * 6) + #define DESC_MAX_USED_BYTES (CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN) #define DESC_MAX_USED_LEN (DESC_MAX_USED_BYTES / CAAM_CMD_SZ) @@ -513,6 +515,61 @@ static int rfc4543_setauthsize(struct crypto_aead *authenc, return 0; } +static int chachapoly_set_sh_desc(struct crypto_aead *aead) +{ + struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct device *jrdev = ctx->jrdev; + unsigned int ivsize = crypto_aead_ivsize(aead); + u32 *desc; + + if (!ctx->cdata.keylen || !ctx->authsize) + return 0; + + desc = ctx->sh_desc_enc; + cnstr_shdsc_chachapoly(desc, &ctx->cdata, &ctx->adata, ivsize, + ctx->authsize, true, false); + dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma, + desc_bytes(desc), ctx->dir); + + desc = ctx->sh_desc_dec; + cnstr_shdsc_chachapoly(desc, &ctx->cdata, &ctx->adata, ivsize, + ctx->authsize, false, false); + dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma, + desc_bytes(desc), ctx->dir); + + return 0; +} + +static int chachapoly_setauthsize(struct crypto_aead *aead, + unsigned int authsize) +{ + struct caam_ctx *ctx = crypto_aead_ctx(aead); + + if (authsize != POLY1305_DIGEST_SIZE) + return -EINVAL; + + ctx->authsize = authsize; + return chachapoly_set_sh_desc(aead); +} + +static int chachapoly_setkey(struct crypto_aead *aead, const u8 *key, + unsigned int keylen) +{ + struct caam_ctx *ctx = crypto_aead_ctx(aead); + unsigned int ivsize = crypto_aead_ivsize(aead); + unsigned int saltlen = CHACHAPOLY_IV_SIZE - ivsize; + + if (keylen != CHACHA_KEY_SIZE + saltlen) { + crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + ctx->cdata.key_virt = key; + ctx->cdata.keylen = keylen - saltlen; + + return chachapoly_set_sh_desc(aead); +} + static int aead_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { @@ -1031,6 +1088,40 @@ static void init_gcm_job(struct aead_request *req, /* End of blank commands */ } +static void init_chachapoly_job(struct aead_request *req, + struct aead_edesc *edesc, bool all_contig, + bool encrypt) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + unsigned int ivsize = crypto_aead_ivsize(aead); + unsigned int assoclen = req->assoclen; + u32 *desc = edesc->hw_desc; + u32 ctx_iv_off = 4; + + init_aead_job(req, edesc, all_contig, encrypt); + + if (ivsize != CHACHAPOLY_IV_SIZE) { + /* IPsec specific: CONTEXT1[223:128] = {NONCE, IV} */ + ctx_iv_off += 4; + + /* + * The associated data comes already with the IV but we need + * to skip it when we authenticate or encrypt... + */ + assoclen -= ivsize; + } + + append_math_add_imm_u32(desc, REG3, ZERO, IMM, assoclen); + + /* + * For IPsec load the IV further in the same register. + * For RFC7539 simply load the 12 bytes nonce in a single operation + */ + append_load_as_imm(desc, req->iv, ivsize, LDST_CLASS_1_CCB | + LDST_SRCDST_BYTE_CONTEXT | + ctx_iv_off << LDST_OFFSET_SHIFT); +} + static void init_authenc_job(struct aead_request *req, struct aead_edesc *edesc, bool all_contig, bool encrypt) @@ -1289,6 +1380,72 @@ static int gcm_encrypt(struct aead_request *req) return ret; } +static int chachapoly_encrypt(struct aead_request *req) +{ + struct aead_edesc *edesc; + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct device *jrdev = ctx->jrdev; + bool all_contig; + u32 *desc; + int ret; + + edesc = aead_edesc_alloc(req, CHACHAPOLY_DESC_JOB_IO_LEN, &all_contig, + true); + if (IS_ERR(edesc)) + return PTR_ERR(edesc); + + desc = edesc->hw_desc; + + init_chachapoly_job(req, edesc, all_contig, true); + print_hex_dump_debug("chachapoly jobdesc@" __stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), + 1); + + ret = caam_jr_enqueue(jrdev, desc, aead_encrypt_done, req); + if (!ret) { + ret = -EINPROGRESS; + } else { + aead_unmap(jrdev, edesc, req); + kfree(edesc); + } + + return ret; +} + +static int chachapoly_decrypt(struct aead_request *req) +{ + struct aead_edesc *edesc; + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct device *jrdev = ctx->jrdev; + bool all_contig; + u32 *desc; + int ret; + + edesc = aead_edesc_alloc(req, CHACHAPOLY_DESC_JOB_IO_LEN, &all_contig, + false); + if (IS_ERR(edesc)) + return PTR_ERR(edesc); + + desc = edesc->hw_desc; + + init_chachapoly_job(req, edesc, all_contig, false); + print_hex_dump_debug("chachapoly jobdesc@" __stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), + 1); + + ret = caam_jr_enqueue(jrdev, desc, aead_decrypt_done, req); + if (!ret) { + ret = -EINPROGRESS; + } else { + aead_unmap(jrdev, edesc, req); + kfree(edesc); + } + + return ret; +} + static int ipsec_gcm_encrypt(struct aead_request *req) { if (req->assoclen < 8) @@ -3002,6 +3159,50 @@ static struct caam_aead_alg driver_aeads[] = { .geniv = true, }, }, + { + .aead = { + .base = { + .cra_name = "rfc7539(chacha20,poly1305)", + .cra_driver_name = "rfc7539-chacha20-poly1305-" + "caam", + .cra_blocksize = 1, + }, + .setkey = chachapoly_setkey, + .setauthsize = chachapoly_setauthsize, + .encrypt = chachapoly_encrypt, + .decrypt = chachapoly_decrypt, + .ivsize = CHACHAPOLY_IV_SIZE, + .maxauthsize = POLY1305_DIGEST_SIZE, + }, + .caam = { + .class1_alg_type = OP_ALG_ALGSEL_CHACHA20 | + OP_ALG_AAI_AEAD, + .class2_alg_type = OP_ALG_ALGSEL_POLY1305 | + OP_ALG_AAI_AEAD, + }, + }, + { + .aead = { + .base = { + .cra_name = "rfc7539esp(chacha20,poly1305)", + .cra_driver_name = "rfc7539esp-chacha20-" + "poly1305-caam", + .cra_blocksize = 1, + }, + .setkey = chachapoly_setkey, + .setauthsize = chachapoly_setauthsize, + .encrypt = chachapoly_encrypt, + .decrypt = chachapoly_decrypt, + .ivsize = 8, + .maxauthsize = POLY1305_DIGEST_SIZE, + }, + .caam = { + .class1_alg_type = OP_ALG_ALGSEL_CHACHA20 | + OP_ALG_AAI_AEAD, + .class2_alg_type = OP_ALG_ALGSEL_POLY1305 | + OP_ALG_AAI_AEAD, + }, + }, }; static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam, @@ -3135,7 +3336,7 @@ static int __init caam_algapi_init(void) struct device *ctrldev; struct caam_drv_private *priv; int i = 0, err = 0; - u32 cha_vid, cha_inst, des_inst, aes_inst, md_inst; + u32 aes_vid, aes_inst, des_inst, md_vid, md_inst, ccha_inst, ptha_inst; unsigned int md_limit = SHA512_DIGEST_SIZE; bool registered = false; @@ -3168,14 +3369,38 @@ static int __init caam_algapi_init(void) * Register crypto algorithms the device supports. * First, detect presence and attributes of DES, AES, and MD blocks. */ - cha_vid = rd_reg32(&priv->ctrl->perfmon.cha_id_ls); - cha_inst = rd_reg32(&priv->ctrl->perfmon.cha_num_ls); - des_inst = (cha_inst & CHA_ID_LS_DES_MASK) >> CHA_ID_LS_DES_SHIFT; - aes_inst = (cha_inst & CHA_ID_LS_AES_MASK) >> CHA_ID_LS_AES_SHIFT; - md_inst = (cha_inst & CHA_ID_LS_MD_MASK) >> CHA_ID_LS_MD_SHIFT; + if (priv->era < 10) { + u32 cha_vid, cha_inst; + + cha_vid = rd_reg32(&priv->ctrl->perfmon.cha_id_ls); + aes_vid = cha_vid & CHA_ID_LS_AES_MASK; + md_vid = (cha_vid & CHA_ID_LS_MD_MASK) >> CHA_ID_LS_MD_SHIFT; + + cha_inst = rd_reg32(&priv->ctrl->perfmon.cha_num_ls); + des_inst = (cha_inst & CHA_ID_LS_DES_MASK) >> + CHA_ID_LS_DES_SHIFT; + aes_inst = cha_inst & CHA_ID_LS_AES_MASK; + md_inst = (cha_inst & CHA_ID_LS_MD_MASK) >> CHA_ID_LS_MD_SHIFT; + ccha_inst = 0; + ptha_inst = 0; + } else { + u32 aesa, mdha; + + aesa = rd_reg32(&priv->ctrl->vreg.aesa); + mdha = rd_reg32(&priv->ctrl->vreg.mdha); + + aes_vid = (aesa & CHA_VER_VID_MASK) >> CHA_VER_VID_SHIFT; + md_vid = (mdha & CHA_VER_VID_MASK) >> CHA_VER_VID_SHIFT; + + des_inst = rd_reg32(&priv->ctrl->vreg.desa) & CHA_VER_NUM_MASK; + aes_inst = aesa & CHA_VER_NUM_MASK; + md_inst = mdha & CHA_VER_NUM_MASK; + ccha_inst = rd_reg32(&priv->ctrl->vreg.ccha) & CHA_VER_NUM_MASK; + ptha_inst = rd_reg32(&priv->ctrl->vreg.ptha) & CHA_VER_NUM_MASK; + } /* If MD is present, limit digest size based on LP256 */ - if (md_inst && ((cha_vid & CHA_ID_LS_MD_MASK) == CHA_ID_LS_MD_LP256)) + if (md_inst && md_vid == CHA_VER_VID_MD_LP256) md_limit = SHA256_DIGEST_SIZE; for (i = 0; i < ARRAY_SIZE(driver_algs); i++) { @@ -3196,10 +3421,10 @@ static int __init caam_algapi_init(void) * Check support for AES modes not available * on LP devices. */ - if ((cha_vid & CHA_ID_LS_AES_MASK) == CHA_ID_LS_AES_LP) - if ((t_alg->caam.class1_alg_type & OP_ALG_AAI_MASK) == - OP_ALG_AAI_XTS) - continue; + if (aes_vid == CHA_VER_VID_AES_LP && + (t_alg->caam.class1_alg_type & OP_ALG_AAI_MASK) == + OP_ALG_AAI_XTS) + continue; caam_skcipher_alg_init(t_alg); @@ -3232,21 +3457,28 @@ static int __init caam_algapi_init(void) if (!aes_inst && (c1_alg_sel == OP_ALG_ALGSEL_AES)) continue; + /* Skip CHACHA20 algorithms if not supported by device */ + if (c1_alg_sel == OP_ALG_ALGSEL_CHACHA20 && !ccha_inst) + continue; + + /* Skip POLY1305 algorithms if not supported by device */ + if (c2_alg_sel == OP_ALG_ALGSEL_POLY1305 && !ptha_inst) + continue; + /* * Check support for AES algorithms not available * on LP devices. */ - if ((cha_vid & CHA_ID_LS_AES_MASK) == CHA_ID_LS_AES_LP) - if (alg_aai == OP_ALG_AAI_GCM) - continue; + if (aes_vid == CHA_VER_VID_AES_LP && alg_aai == OP_ALG_AAI_GCM) + continue; /* * Skip algorithms requiring message digests * if MD or MD size is not supported by device. */ - if (c2_alg_sel && - (!md_inst || (t_alg->aead.maxauthsize > md_limit))) - continue; + if ((c2_alg_sel & ~OP_ALG_ALGSEL_SUBMASK) == 0x40 && + (!md_inst || t_alg->aead.maxauthsize > md_limit)) + continue; caam_aead_alg_init(t_alg); diff --git a/drivers/crypto/caam/caamalg_desc.c b/drivers/crypto/caam/caamalg_desc.c index 1a6f0da14106..7db1640d3577 100644 --- a/drivers/crypto/caam/caamalg_desc.c +++ b/drivers/crypto/caam/caamalg_desc.c @@ -1213,6 +1213,139 @@ void cnstr_shdsc_rfc4543_decap(u32 * const desc, struct alginfo *cdata, } EXPORT_SYMBOL(cnstr_shdsc_rfc4543_decap); +/** + * cnstr_shdsc_chachapoly - Chacha20 + Poly1305 generic AEAD (rfc7539) and + * IPsec ESP (rfc7634, a.k.a. rfc7539esp) shared + * descriptor (non-protocol). + * @desc: pointer to buffer used for descriptor construction + * @cdata: pointer to block cipher transform definitions + * Valid algorithm values - OP_ALG_ALGSEL_CHACHA20 ANDed with + * OP_ALG_AAI_AEAD. + * @adata: pointer to authentication transform definitions + * Valid algorithm values - OP_ALG_ALGSEL_POLY1305 ANDed with + * OP_ALG_AAI_AEAD. + * @ivsize: initialization vector size + * @icvsize: integrity check value (ICV) size (truncated or full) + * @encap: true if encapsulation, false if decapsulation + * @is_qi: true when called from caam/qi + */ +void cnstr_shdsc_chachapoly(u32 * const desc, struct alginfo *cdata, + struct alginfo *adata, unsigned int ivsize, + unsigned int icvsize, const bool encap, + const bool is_qi) +{ + u32 *key_jump_cmd, *wait_cmd; + u32 nfifo; + const bool is_ipsec = (ivsize != CHACHAPOLY_IV_SIZE); + + /* Note: Context registers are saved. */ + init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); + + /* skip key loading if they are loaded due to sharing */ + key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | + JUMP_COND_SHRD); + + append_key_as_imm(desc, cdata->key_virt, cdata->keylen, cdata->keylen, + CLASS_1 | KEY_DEST_CLASS_REG); + + /* For IPsec load the salt from keymat in the context register */ + if (is_ipsec) + append_load_as_imm(desc, cdata->key_virt + cdata->keylen, 4, + LDST_CLASS_1_CCB | LDST_SRCDST_BYTE_CONTEXT | + 4 << LDST_OFFSET_SHIFT); + + set_jump_tgt_here(desc, key_jump_cmd); + + /* Class 2 and 1 operations: Poly & ChaCha */ + if (encap) { + append_operation(desc, adata->algtype | OP_ALG_AS_INITFINAL | + OP_ALG_ENCRYPT); + append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL | + OP_ALG_ENCRYPT); + } else { + append_operation(desc, adata->algtype | OP_ALG_AS_INITFINAL | + OP_ALG_DECRYPT | OP_ALG_ICV_ON); + append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL | + OP_ALG_DECRYPT); + } + + if (is_qi) { + u32 *wait_load_cmd; + u32 ctx1_iv_off = is_ipsec ? 8 : 4; + + /* REG3 = assoclen */ + append_seq_load(desc, 4, LDST_CLASS_DECO | + LDST_SRCDST_WORD_DECO_MATH3 | + 4 << LDST_OFFSET_SHIFT); + + wait_load_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | + JUMP_COND_CALM | JUMP_COND_NCP | + JUMP_COND_NOP | JUMP_COND_NIP | + JUMP_COND_NIFP); + set_jump_tgt_here(desc, wait_load_cmd); + + append_seq_load(desc, ivsize, LDST_CLASS_1_CCB | + LDST_SRCDST_BYTE_CONTEXT | + ctx1_iv_off << LDST_OFFSET_SHIFT); + } + + /* + * MAGIC with NFIFO + * Read associated data from the input and send them to class1 and + * class2 alignment blocks. From class1 send data to output fifo and + * then write it to memory since we don't need to encrypt AD. + */ + nfifo = NFIFOENTRY_DEST_BOTH | NFIFOENTRY_FC1 | NFIFOENTRY_FC2 | + NFIFOENTRY_DTYPE_POLY | NFIFOENTRY_BND; + append_load_imm_u32(desc, nfifo, LDST_CLASS_IND_CCB | + LDST_SRCDST_WORD_INFO_FIFO_SM | LDLEN_MATH3); + + append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); + append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); + append_seq_fifo_load(desc, 0, FIFOLD_TYPE_NOINFOFIFO | + FIFOLD_CLASS_CLASS1 | LDST_VLF); + append_move_len(desc, MOVE_AUX_LS | MOVE_SRC_AUX_ABLK | + MOVE_DEST_OUTFIFO | MOVELEN_MRSEL_MATH3); + append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | LDST_VLF); + + /* IPsec - copy IV at the output */ + if (is_ipsec) + append_seq_fifo_store(desc, ivsize, FIFOST_TYPE_METADATA | + 0x2 << 25); + + wait_cmd = append_jump(desc, JUMP_JSL | JUMP_TYPE_LOCAL | + JUMP_COND_NOP | JUMP_TEST_ALL); + set_jump_tgt_here(desc, wait_cmd); + + if (encap) { + /* Read and write cryptlen bytes */ + append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); + append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, + CAAM_CMD_SZ); + aead_append_src_dst(desc, FIFOLD_TYPE_MSG1OUT2); + + /* Write ICV */ + append_seq_store(desc, icvsize, LDST_CLASS_2_CCB | + LDST_SRCDST_BYTE_CONTEXT); + } else { + /* Read and write cryptlen bytes */ + append_math_add(desc, VARSEQINLEN, SEQOUTLEN, REG0, + CAAM_CMD_SZ); + append_math_add(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, + CAAM_CMD_SZ); + aead_append_src_dst(desc, FIFOLD_TYPE_MSG); + + /* Load ICV for verification */ + append_seq_fifo_load(desc, icvsize, FIFOLD_CLASS_CLASS2 | + FIFOLD_TYPE_LAST2 | FIFOLD_TYPE_ICV); + } + + print_hex_dump_debug("chachapoly shdesc@" __stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), + 1); +} +EXPORT_SYMBOL(cnstr_shdsc_chachapoly); + /* For skcipher encrypt and decrypt, read from req->src and write to req->dst */ static inline void skcipher_append_src_dst(u32 *desc) { @@ -1228,7 +1361,8 @@ static inline void skcipher_append_src_dst(u32 *desc) * @desc: pointer to buffer used for descriptor construction * @cdata: pointer to block cipher transform definitions * Valid algorithm values - one of OP_ALG_ALGSEL_{AES, DES, 3DES} ANDed - * with OP_ALG_AAI_CBC or OP_ALG_AAI_CTR_MOD128. + * with OP_ALG_AAI_CBC or OP_ALG_AAI_CTR_MOD128 + * - OP_ALG_ALGSEL_CHACHA20 * @ivsize: initialization vector size * @is_rfc3686: true when ctr(aes) is wrapped by rfc3686 template * @ctx1_iv_off: IV offset in CONTEXT1 register @@ -1293,7 +1427,8 @@ EXPORT_SYMBOL(cnstr_shdsc_skcipher_encap); * @desc: pointer to buffer used for descriptor construction * @cdata: pointer to block cipher transform definitions * Valid algorithm values - one of OP_ALG_ALGSEL_{AES, DES, 3DES} ANDed - * with OP_ALG_AAI_CBC or OP_ALG_AAI_CTR_MOD128. + * with OP_ALG_AAI_CBC or OP_ALG_AAI_CTR_MOD128 + * - OP_ALG_ALGSEL_CHACHA20 * @ivsize: initialization vector size * @is_rfc3686: true when ctr(aes) is wrapped by rfc3686 template * @ctx1_iv_off: IV offset in CONTEXT1 register diff --git a/drivers/crypto/caam/caamalg_desc.h b/drivers/crypto/caam/caamalg_desc.h index 1315c8f6f951..d5ca42ff961a 100644 --- a/drivers/crypto/caam/caamalg_desc.h +++ b/drivers/crypto/caam/caamalg_desc.h @@ -96,6 +96,11 @@ void cnstr_shdsc_rfc4543_decap(u32 * const desc, struct alginfo *cdata, unsigned int ivsize, unsigned int icvsize, const bool is_qi); +void cnstr_shdsc_chachapoly(u32 * const desc, struct alginfo *cdata, + struct alginfo *adata, unsigned int ivsize, + unsigned int icvsize, const bool encap, + const bool is_qi); + void cnstr_shdsc_skcipher_encap(u32 * const desc, struct alginfo *cdata, unsigned int ivsize, const bool is_rfc3686, const u32 ctx1_iv_off); diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c index 23c9fc4975f8..c0d55310aade 100644 --- a/drivers/crypto/caam/caamalg_qi.c +++ b/drivers/crypto/caam/caamalg_qi.c @@ -2462,7 +2462,7 @@ static int __init caam_qi_algapi_init(void) struct device *ctrldev; struct caam_drv_private *priv; int i = 0, err = 0; - u32 cha_vid, cha_inst, des_inst, aes_inst, md_inst; + u32 aes_vid, aes_inst, des_inst, md_vid, md_inst; unsigned int md_limit = SHA512_DIGEST_SIZE; bool registered = false; @@ -2497,14 +2497,34 @@ static int __init caam_qi_algapi_init(void) * Register crypto algorithms the device supports. * First, detect presence and attributes of DES, AES, and MD blocks. */ - cha_vid = rd_reg32(&priv->ctrl->perfmon.cha_id_ls); - cha_inst = rd_reg32(&priv->ctrl->perfmon.cha_num_ls); - des_inst = (cha_inst & CHA_ID_LS_DES_MASK) >> CHA_ID_LS_DES_SHIFT; - aes_inst = (cha_inst & CHA_ID_LS_AES_MASK) >> CHA_ID_LS_AES_SHIFT; - md_inst = (cha_inst & CHA_ID_LS_MD_MASK) >> CHA_ID_LS_MD_SHIFT; + if (priv->era < 10) { + u32 cha_vid, cha_inst; + + cha_vid = rd_reg32(&priv->ctrl->perfmon.cha_id_ls); + aes_vid = cha_vid & CHA_ID_LS_AES_MASK; + md_vid = (cha_vid & CHA_ID_LS_MD_MASK) >> CHA_ID_LS_MD_SHIFT; + + cha_inst = rd_reg32(&priv->ctrl->perfmon.cha_num_ls); + des_inst = (cha_inst & CHA_ID_LS_DES_MASK) >> + CHA_ID_LS_DES_SHIFT; + aes_inst = cha_inst & CHA_ID_LS_AES_MASK; + md_inst = (cha_inst & CHA_ID_LS_MD_MASK) >> CHA_ID_LS_MD_SHIFT; + } else { + u32 aesa, mdha; + + aesa = rd_reg32(&priv->ctrl->vreg.aesa); + mdha = rd_reg32(&priv->ctrl->vreg.mdha); + + aes_vid = (aesa & CHA_VER_VID_MASK) >> CHA_VER_VID_SHIFT; + md_vid = (mdha & CHA_VER_VID_MASK) >> CHA_VER_VID_SHIFT; + + des_inst = rd_reg32(&priv->ctrl->vreg.desa) & CHA_VER_NUM_MASK; + aes_inst = aesa & CHA_VER_NUM_MASK; + md_inst = mdha & CHA_VER_NUM_MASK; + } /* If MD is present, limit digest size based on LP256 */ - if (md_inst && ((cha_vid & CHA_ID_LS_MD_MASK) == CHA_ID_LS_MD_LP256)) + if (md_inst && md_vid == CHA_VER_VID_MD_LP256) md_limit = SHA256_DIGEST_SIZE; for (i = 0; i < ARRAY_SIZE(driver_algs); i++) { @@ -2556,8 +2576,7 @@ static int __init caam_qi_algapi_init(void) * Check support for AES algorithms not available * on LP devices. */ - if (((cha_vid & CHA_ID_LS_AES_MASK) == CHA_ID_LS_AES_LP) && - (alg_aai == OP_ALG_AAI_GCM)) + if (aes_vid == CHA_VER_VID_AES_LP && alg_aai == OP_ALG_AAI_GCM) continue; /* diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c index 7d8ac0222fa3..425d5d974613 100644 --- a/drivers/crypto/caam/caamalg_qi2.c +++ b/drivers/crypto/caam/caamalg_qi2.c @@ -462,7 +462,15 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, edesc->dst_nents = dst_nents; edesc->iv_dma = iv_dma; - edesc->assoclen = cpu_to_caam32(req->assoclen); + if ((alg->caam.class1_alg_type & OP_ALG_ALGSEL_MASK) == + OP_ALG_ALGSEL_CHACHA20 && ivsize != CHACHAPOLY_IV_SIZE) + /* + * The associated data comes already with the IV but we need + * to skip it when we authenticate or encrypt... + */ + edesc->assoclen = cpu_to_caam32(req->assoclen - ivsize); + else + edesc->assoclen = cpu_to_caam32(req->assoclen); edesc->assoclen_dma = dma_map_single(dev, &edesc->assoclen, 4, DMA_TO_DEVICE); if (dma_mapping_error(dev, edesc->assoclen_dma)) { @@ -532,6 +540,68 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, return edesc; } +static int chachapoly_set_sh_desc(struct crypto_aead *aead) +{ + struct caam_ctx *ctx = crypto_aead_ctx(aead); + unsigned int ivsize = crypto_aead_ivsize(aead); + struct device *dev = ctx->dev; + struct caam_flc *flc; + u32 *desc; + + if (!ctx->cdata.keylen || !ctx->authsize) + return 0; + + flc = &ctx->flc[ENCRYPT]; + desc = flc->sh_desc; + cnstr_shdsc_chachapoly(desc, &ctx->cdata, &ctx->adata, ivsize, + ctx->authsize, true, true); + flc->flc[1] = cpu_to_caam32(desc_len(desc)); /* SDL */ + dma_sync_single_for_device(dev, ctx->flc_dma[ENCRYPT], + sizeof(flc->flc) + desc_bytes(desc), + ctx->dir); + + flc = &ctx->flc[DECRYPT]; + desc = flc->sh_desc; + cnstr_shdsc_chachapoly(desc, &ctx->cdata, &ctx->adata, ivsize, + ctx->authsize, false, true); + flc->flc[1] = cpu_to_caam32(desc_len(desc)); /* SDL */ + dma_sync_single_for_device(dev, ctx->flc_dma[DECRYPT], + sizeof(flc->flc) + desc_bytes(desc), + ctx->dir); + + return 0; +} + +static int chachapoly_setauthsize(struct crypto_aead *aead, + unsigned int authsize) +{ + struct caam_ctx *ctx = crypto_aead_ctx(aead); + + if (authsize != POLY1305_DIGEST_SIZE) + return -EINVAL; + + ctx->authsize = authsize; + return chachapoly_set_sh_desc(aead); +} + +static int chachapoly_setkey(struct crypto_aead *aead, const u8 *key, + unsigned int keylen) +{ + struct caam_ctx *ctx = crypto_aead_ctx(aead); + unsigned int ivsize = crypto_aead_ivsize(aead); + unsigned int saltlen = CHACHAPOLY_IV_SIZE - ivsize; + + if (keylen != CHACHA_KEY_SIZE + saltlen) { + crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + ctx->cdata.key_virt = key; + ctx->cdata.keylen = keylen - saltlen; + + return chachapoly_set_sh_desc(aead); +} + static int gcm_set_sh_desc(struct crypto_aead *aead) { struct caam_ctx *ctx = crypto_aead_ctx(aead); @@ -816,7 +886,9 @@ static int skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key, u32 *desc; u32 ctx1_iv_off = 0; const bool ctr_mode = ((ctx->cdata.algtype & OP_ALG_AAI_MASK) == - OP_ALG_AAI_CTR_MOD128); + OP_ALG_AAI_CTR_MOD128) && + ((ctx->cdata.algtype & OP_ALG_ALGSEL_MASK) != + OP_ALG_ALGSEL_CHACHA20); const bool is_rfc3686 = alg->caam.rfc3686; print_hex_dump_debug("key in @" __stringify(__LINE__)": ", @@ -1494,7 +1566,23 @@ static struct caam_skcipher_alg driver_algs[] = { .ivsize = AES_BLOCK_SIZE, }, .caam.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_XTS, - } + }, + { + .skcipher = { + .base = { + .cra_name = "chacha20", + .cra_driver_name = "chacha20-caam-qi2", + .cra_blocksize = 1, + }, + .setkey = skcipher_setkey, + .encrypt = skcipher_encrypt, + .decrypt = skcipher_decrypt, + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = CHACHA_IV_SIZE, + }, + .caam.class1_alg_type = OP_ALG_ALGSEL_CHACHA20, + }, }; static struct caam_aead_alg driver_aeads[] = { @@ -2608,6 +2696,50 @@ static struct caam_aead_alg driver_aeads[] = { .geniv = true, }, }, + { + .aead = { + .base = { + .cra_name = "rfc7539(chacha20,poly1305)", + .cra_driver_name = "rfc7539-chacha20-poly1305-" + "caam-qi2", + .cra_blocksize = 1, + }, + .setkey = chachapoly_setkey, + .setauthsize = chachapoly_setauthsize, + .encrypt = aead_encrypt, + .decrypt = aead_decrypt, + .ivsize = CHACHAPOLY_IV_SIZE, + .maxauthsize = POLY1305_DIGEST_SIZE, + }, + .caam = { + .class1_alg_type = OP_ALG_ALGSEL_CHACHA20 | + OP_ALG_AAI_AEAD, + .class2_alg_type = OP_ALG_ALGSEL_POLY1305 | + OP_ALG_AAI_AEAD, + }, + }, + { + .aead = { + .base = { + .cra_name = "rfc7539esp(chacha20,poly1305)", + .cra_driver_name = "rfc7539esp-chacha20-" + "poly1305-caam-qi2", + .cra_blocksize = 1, + }, + .setkey = chachapoly_setkey, + .setauthsize = chachapoly_setauthsize, + .encrypt = aead_encrypt, + .decrypt = aead_decrypt, + .ivsize = 8, + .maxauthsize = POLY1305_DIGEST_SIZE, + }, + .caam = { + .class1_alg_type = OP_ALG_ALGSEL_CHACHA20 | + OP_ALG_AAI_AEAD, + .class2_alg_type = OP_ALG_ALGSEL_POLY1305 | + OP_ALG_AAI_AEAD, + }, + }, { .aead = { .base = { @@ -4908,6 +5040,11 @@ static int dpaa2_caam_probe(struct fsl_mc_device *dpseci_dev) alg_sel == OP_ALG_ALGSEL_AES) continue; + /* Skip CHACHA20 algorithms if not supported by device */ + if (alg_sel == OP_ALG_ALGSEL_CHACHA20 && + !priv->sec_attr.ccha_acc_num) + continue; + t_alg->caam.dev = dev; caam_skcipher_alg_init(t_alg); @@ -4940,11 +5077,22 @@ static int dpaa2_caam_probe(struct fsl_mc_device *dpseci_dev) c1_alg_sel == OP_ALG_ALGSEL_AES) continue; + /* Skip CHACHA20 algorithms if not supported by device */ + if (c1_alg_sel == OP_ALG_ALGSEL_CHACHA20 && + !priv->sec_attr.ccha_acc_num) + continue; + + /* Skip POLY1305 algorithms if not supported by device */ + if (c2_alg_sel == OP_ALG_ALGSEL_POLY1305 && + !priv->sec_attr.ptha_acc_num) + continue; + /* * Skip algorithms requiring message digests * if MD not supported by device. */ - if (!priv->sec_attr.md_acc_num && c2_alg_sel) + if ((c2_alg_sel & ~OP_ALG_ALGSEL_SUBMASK) == 0x40 && + !priv->sec_attr.md_acc_num) continue; t_alg->caam.dev = dev; diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 46924affa0bd..81712aa5d0f2 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -3,6 +3,7 @@ * caam - Freescale FSL CAAM support for ahash functions of crypto API * * Copyright 2011 Freescale Semiconductor, Inc. + * Copyright 2018 NXP * * Based on caamalg.c crypto API driver. * @@ -1801,7 +1802,7 @@ static int __init caam_algapi_hash_init(void) int i = 0, err = 0; struct caam_drv_private *priv; unsigned int md_limit = SHA512_DIGEST_SIZE; - u32 cha_inst, cha_vid; + u32 md_inst, md_vid; dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0"); if (!dev_node) { @@ -1831,18 +1832,27 @@ static int __init caam_algapi_hash_init(void) * Register crypto algorithms the device supports. First, identify * presence and attributes of MD block. */ - cha_vid = rd_reg32(&priv->ctrl->perfmon.cha_id_ls); - cha_inst = rd_reg32(&priv->ctrl->perfmon.cha_num_ls); + if (priv->era < 10) { + md_vid = (rd_reg32(&priv->ctrl->perfmon.cha_id_ls) & + CHA_ID_LS_MD_MASK) >> CHA_ID_LS_MD_SHIFT; + md_inst = (rd_reg32(&priv->ctrl->perfmon.cha_num_ls) & + CHA_ID_LS_MD_MASK) >> CHA_ID_LS_MD_SHIFT; + } else { + u32 mdha = rd_reg32(&priv->ctrl->vreg.mdha); + + md_vid = (mdha & CHA_VER_VID_MASK) >> CHA_VER_VID_SHIFT; + md_inst = mdha & CHA_VER_NUM_MASK; + } /* * Skip registration of any hashing algorithms if MD block * is not present. */ - if (!((cha_inst & CHA_ID_LS_MD_MASK) >> CHA_ID_LS_MD_SHIFT)) + if (!md_inst) return -ENODEV; /* Limit digest size based on LP256 */ - if ((cha_vid & CHA_ID_LS_MD_MASK) == CHA_ID_LS_MD_LP256) + if (md_vid == CHA_VER_VID_MD_LP256) md_limit = SHA256_DIGEST_SIZE; INIT_LIST_HEAD(&hash_list); diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c index 4fc209cbbeab..77ab28a2811a 100644 --- a/drivers/crypto/caam/caampkc.c +++ b/drivers/crypto/caam/caampkc.c @@ -3,6 +3,7 @@ * caam - Freescale FSL CAAM support for Public Key Cryptography * * Copyright 2016 Freescale Semiconductor, Inc. + * Copyright 2018 NXP * * There is no Shared Descriptor for PKC so that the Job Descriptor must carry * all the desired key parameters, input and output pointers. @@ -1017,7 +1018,7 @@ static int __init caam_pkc_init(void) struct platform_device *pdev; struct device *ctrldev; struct caam_drv_private *priv; - u32 cha_inst, pk_inst; + u32 pk_inst; int err; dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0"); @@ -1045,8 +1046,11 @@ static int __init caam_pkc_init(void) return -ENODEV; /* Determine public key hardware accelerator presence. */ - cha_inst = rd_reg32(&priv->ctrl->perfmon.cha_num_ls); - pk_inst = (cha_inst & CHA_ID_LS_PK_MASK) >> CHA_ID_LS_PK_SHIFT; + if (priv->era < 10) + pk_inst = (rd_reg32(&priv->ctrl->perfmon.cha_num_ls) & + CHA_ID_LS_PK_MASK) >> CHA_ID_LS_PK_SHIFT; + else + pk_inst = rd_reg32(&priv->ctrl->vreg.pkha) & CHA_VER_NUM_MASK; /* Do not register algorithms if PKHA is not present. */ if (!pk_inst) diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c index 4318b0aa6fb9..a387c8d49a62 100644 --- a/drivers/crypto/caam/caamrng.c +++ b/drivers/crypto/caam/caamrng.c @@ -3,6 +3,7 @@ * caam - Freescale FSL CAAM support for hw_random * * Copyright 2011 Freescale Semiconductor, Inc. + * Copyright 2018 NXP * * Based on caamalg.c crypto API driver. * @@ -309,6 +310,7 @@ static int __init caam_rng_init(void) struct platform_device *pdev; struct device *ctrldev; struct caam_drv_private *priv; + u32 rng_inst; int err; dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0"); @@ -336,7 +338,13 @@ static int __init caam_rng_init(void) return -ENODEV; /* Check for an instantiated RNG before registration */ - if (!(rd_reg32(&priv->ctrl->perfmon.cha_num_ls) & CHA_ID_LS_RNG_MASK)) + if (priv->era < 10) + rng_inst = (rd_reg32(&priv->ctrl->perfmon.cha_num_ls) & + CHA_ID_LS_RNG_MASK) >> CHA_ID_LS_RNG_SHIFT; + else + rng_inst = rd_reg32(&priv->ctrl->vreg.rng) & CHA_VER_NUM_MASK; + + if (!rng_inst) return -ENODEV; dev = caam_jr_alloc(); diff --git a/drivers/crypto/caam/compat.h b/drivers/crypto/caam/compat.h index 9604ff7a335e..87d9efe4c7aa 100644 --- a/drivers/crypto/caam/compat.h +++ b/drivers/crypto/caam/compat.h @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include #include #include diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 3fc793193821..16bbc72f041a 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -3,6 +3,7 @@ * Controller-level driver, kernel property detection, initialization * * Copyright 2008-2012 Freescale Semiconductor, Inc. + * Copyright 2018 NXP */ #include @@ -106,7 +107,7 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc, struct caam_ctrl __iomem *ctrl = ctrlpriv->ctrl; struct caam_deco __iomem *deco = ctrlpriv->deco; unsigned int timeout = 100000; - u32 deco_dbg_reg, flags; + u32 deco_dbg_reg, deco_state, flags; int i; @@ -149,13 +150,22 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc, timeout = 10000000; do { deco_dbg_reg = rd_reg32(&deco->desc_dbg); + + if (ctrlpriv->era < 10) + deco_state = (deco_dbg_reg & DESC_DBG_DECO_STAT_MASK) >> + DESC_DBG_DECO_STAT_SHIFT; + else + deco_state = (rd_reg32(&deco->dbg_exec) & + DESC_DER_DECO_STAT_MASK) >> + DESC_DER_DECO_STAT_SHIFT; + /* * If an error occured in the descriptor, then * the DECO status field will be set to 0x0D */ - if ((deco_dbg_reg & DESC_DBG_DECO_STAT_MASK) == - DESC_DBG_DECO_STAT_HOST_ERR) + if (deco_state == DECO_STAT_HOST_ERR) break; + cpu_relax(); } while ((deco_dbg_reg & DESC_DBG_DECO_STAT_VALID) && --timeout); @@ -491,7 +501,7 @@ static int caam_probe(struct platform_device *pdev) struct caam_perfmon *perfmon; #endif u32 scfgr, comp_params; - u32 cha_vid_ls; + u8 rng_vid; int pg_size; int BLOCK_OFFSET = 0; @@ -733,15 +743,19 @@ static int caam_probe(struct platform_device *pdev) goto caam_remove; } - cha_vid_ls = rd_reg32(&ctrl->perfmon.cha_id_ls); + if (ctrlpriv->era < 10) + rng_vid = (rd_reg32(&ctrl->perfmon.cha_id_ls) & + CHA_ID_LS_RNG_MASK) >> CHA_ID_LS_RNG_SHIFT; + else + rng_vid = (rd_reg32(&ctrl->vreg.rng) & CHA_VER_VID_MASK) >> + CHA_VER_VID_SHIFT; /* * If SEC has RNG version >= 4 and RNG state handle has not been * already instantiated, do RNG instantiation * In case of SoCs with Management Complex, RNG is managed by MC f/w. */ - if (!ctrlpriv->mc_en && - (cha_vid_ls & CHA_ID_LS_RNG_MASK) >> CHA_ID_LS_RNG_SHIFT >= 4) { + if (!ctrlpriv->mc_en && rng_vid >= 4) { ctrlpriv->rng4_sh_init = rd_reg32(&ctrl->r4tst[0].rdsta); /* diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h index f76ff160a02c..ec10230178c5 100644 --- a/drivers/crypto/caam/desc.h +++ b/drivers/crypto/caam/desc.h @@ -4,6 +4,7 @@ * Definitions to support CAAM descriptor instruction generation * * Copyright 2008-2011 Freescale Semiconductor, Inc. + * Copyright 2018 NXP */ #ifndef DESC_H @@ -242,6 +243,7 @@ #define LDST_SRCDST_WORD_DESCBUF_SHARED (0x42 << LDST_SRCDST_SHIFT) #define LDST_SRCDST_WORD_DESCBUF_JOB_WE (0x45 << LDST_SRCDST_SHIFT) #define LDST_SRCDST_WORD_DESCBUF_SHARED_WE (0x46 << LDST_SRCDST_SHIFT) +#define LDST_SRCDST_WORD_INFO_FIFO_SM (0x71 << LDST_SRCDST_SHIFT) #define LDST_SRCDST_WORD_INFO_FIFO (0x7a << LDST_SRCDST_SHIFT) /* Offset in source/destination */ @@ -284,6 +286,12 @@ #define LDLEN_SET_OFIFO_OFFSET_SHIFT 0 #define LDLEN_SET_OFIFO_OFFSET_MASK (3 << LDLEN_SET_OFIFO_OFFSET_SHIFT) +/* Special Length definitions when dst=sm, nfifo-{sm,m} */ +#define LDLEN_MATH0 0 +#define LDLEN_MATH1 1 +#define LDLEN_MATH2 2 +#define LDLEN_MATH3 3 + /* * FIFO_LOAD/FIFO_STORE/SEQ_FIFO_LOAD/SEQ_FIFO_STORE * Command Constructs @@ -408,6 +416,7 @@ #define FIFOST_TYPE_MESSAGE_DATA (0x30 << FIFOST_TYPE_SHIFT) #define FIFOST_TYPE_RNGSTORE (0x34 << FIFOST_TYPE_SHIFT) #define FIFOST_TYPE_RNGFIFO (0x35 << FIFOST_TYPE_SHIFT) +#define FIFOST_TYPE_METADATA (0x3e << FIFOST_TYPE_SHIFT) #define FIFOST_TYPE_SKIP (0x3f << FIFOST_TYPE_SHIFT) /* @@ -1133,6 +1142,12 @@ #define OP_ALG_TYPE_CLASS1 (2 << OP_ALG_TYPE_SHIFT) #define OP_ALG_TYPE_CLASS2 (4 << OP_ALG_TYPE_SHIFT) +/* version register fields */ +#define OP_VER_CCHA_NUM 0x000000ff /* Number CCHAs instantiated */ +#define OP_VER_CCHA_MISC 0x0000ff00 /* CCHA Miscellaneous Information */ +#define OP_VER_CCHA_REV 0x00ff0000 /* CCHA Revision Number */ +#define OP_VER_CCHA_VID 0xff000000 /* CCHA Version ID */ + #define OP_ALG_ALGSEL_SHIFT 16 #define OP_ALG_ALGSEL_MASK (0xff << OP_ALG_ALGSEL_SHIFT) #define OP_ALG_ALGSEL_SUBMASK (0x0f << OP_ALG_ALGSEL_SHIFT) @@ -1152,6 +1167,8 @@ #define OP_ALG_ALGSEL_KASUMI (0x70 << OP_ALG_ALGSEL_SHIFT) #define OP_ALG_ALGSEL_CRC (0x90 << OP_ALG_ALGSEL_SHIFT) #define OP_ALG_ALGSEL_SNOW_F9 (0xA0 << OP_ALG_ALGSEL_SHIFT) +#define OP_ALG_ALGSEL_CHACHA20 (0xD0 << OP_ALG_ALGSEL_SHIFT) +#define OP_ALG_ALGSEL_POLY1305 (0xE0 << OP_ALG_ALGSEL_SHIFT) #define OP_ALG_AAI_SHIFT 4 #define OP_ALG_AAI_MASK (0x1ff << OP_ALG_AAI_SHIFT) @@ -1199,6 +1216,11 @@ #define OP_ALG_AAI_RNG4_AI (0x80 << OP_ALG_AAI_SHIFT) #define OP_ALG_AAI_RNG4_SK (0x100 << OP_ALG_AAI_SHIFT) +/* Chacha20 AAI set */ +#define OP_ALG_AAI_AEAD (0x002 << OP_ALG_AAI_SHIFT) +#define OP_ALG_AAI_KEYSTREAM (0x001 << OP_ALG_AAI_SHIFT) +#define OP_ALG_AAI_BC8 (0x008 << OP_ALG_AAI_SHIFT) + /* hmac/smac AAI set */ #define OP_ALG_AAI_HASH (0x00 << OP_ALG_AAI_SHIFT) #define OP_ALG_AAI_HMAC (0x01 << OP_ALG_AAI_SHIFT) @@ -1387,6 +1409,7 @@ #define MOVE_SRC_MATH3 (0x07 << MOVE_SRC_SHIFT) #define MOVE_SRC_INFIFO (0x08 << MOVE_SRC_SHIFT) #define MOVE_SRC_INFIFO_CL (0x09 << MOVE_SRC_SHIFT) +#define MOVE_SRC_AUX_ABLK (0x0a << MOVE_SRC_SHIFT) #define MOVE_DEST_SHIFT 16 #define MOVE_DEST_MASK (0x0f << MOVE_DEST_SHIFT) @@ -1413,6 +1436,10 @@ #define MOVELEN_MRSEL_SHIFT 0 #define MOVELEN_MRSEL_MASK (0x3 << MOVE_LEN_SHIFT) +#define MOVELEN_MRSEL_MATH0 (0 << MOVELEN_MRSEL_SHIFT) +#define MOVELEN_MRSEL_MATH1 (1 << MOVELEN_MRSEL_SHIFT) +#define MOVELEN_MRSEL_MATH2 (2 << MOVELEN_MRSEL_SHIFT) +#define MOVELEN_MRSEL_MATH3 (3 << MOVELEN_MRSEL_SHIFT) /* * MATH Command Constructs @@ -1589,6 +1616,7 @@ #define NFIFOENTRY_DTYPE_IV (0x2 << NFIFOENTRY_DTYPE_SHIFT) #define NFIFOENTRY_DTYPE_SAD (0x3 << NFIFOENTRY_DTYPE_SHIFT) #define NFIFOENTRY_DTYPE_ICV (0xA << NFIFOENTRY_DTYPE_SHIFT) +#define NFIFOENTRY_DTYPE_POLY (0xB << NFIFOENTRY_DTYPE_SHIFT) #define NFIFOENTRY_DTYPE_SKIP (0xE << NFIFOENTRY_DTYPE_SHIFT) #define NFIFOENTRY_DTYPE_MSG (0xF << NFIFOENTRY_DTYPE_SHIFT) diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h index d4256fa4a1d6..2980b8ef1fb1 100644 --- a/drivers/crypto/caam/desc_constr.h +++ b/drivers/crypto/caam/desc_constr.h @@ -189,6 +189,7 @@ static inline u32 *append_##cmd(u32 * const desc, u32 options) \ } APPEND_CMD_RET(jump, JUMP) APPEND_CMD_RET(move, MOVE) +APPEND_CMD_RET(move_len, MOVE_LEN) static inline void set_jump_tgt_here(u32 * const desc, u32 *jump_cmd) { @@ -327,7 +328,11 @@ static inline void append_##cmd##_imm_##type(u32 * const desc, type immediate, \ u32 options) \ { \ PRINT_POS; \ - append_cmd(desc, CMD_##op | IMMEDIATE | options | sizeof(type)); \ + if (options & LDST_LEN_MASK) \ + append_cmd(desc, CMD_##op | IMMEDIATE | options); \ + else \ + append_cmd(desc, CMD_##op | IMMEDIATE | options | \ + sizeof(type)); \ append_cmd(desc, immediate); \ } APPEND_CMD_RAW_IMM(load, LOAD, u32); diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index 457815f965c0..3cd0822ea819 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h @@ -3,6 +3,7 @@ * CAAM hardware register-level view * * Copyright 2008-2011 Freescale Semiconductor, Inc. + * Copyright 2018 NXP */ #ifndef REGS_H @@ -211,6 +212,47 @@ struct jr_outentry { u32 jrstatus; /* Status for completed descriptor */ } __packed; +/* Version registers (Era 10+) e80-eff */ +struct version_regs { + u32 crca; /* CRCA_VERSION */ + u32 afha; /* AFHA_VERSION */ + u32 kfha; /* KFHA_VERSION */ + u32 pkha; /* PKHA_VERSION */ + u32 aesa; /* AESA_VERSION */ + u32 mdha; /* MDHA_VERSION */ + u32 desa; /* DESA_VERSION */ + u32 snw8a; /* SNW8A_VERSION */ + u32 snw9a; /* SNW9A_VERSION */ + u32 zuce; /* ZUCE_VERSION */ + u32 zuca; /* ZUCA_VERSION */ + u32 ccha; /* CCHA_VERSION */ + u32 ptha; /* PTHA_VERSION */ + u32 rng; /* RNG_VERSION */ + u32 trng; /* TRNG_VERSION */ + u32 aaha; /* AAHA_VERSION */ + u32 rsvd[10]; + u32 sr; /* SR_VERSION */ + u32 dma; /* DMA_VERSION */ + u32 ai; /* AI_VERSION */ + u32 qi; /* QI_VERSION */ + u32 jr; /* JR_VERSION */ + u32 deco; /* DECO_VERSION */ +}; + +/* Version registers bitfields */ + +/* Number of CHAs instantiated */ +#define CHA_VER_NUM_MASK 0xffull +/* CHA Miscellaneous Information */ +#define CHA_VER_MISC_SHIFT 8 +#define CHA_VER_MISC_MASK (0xffull << CHA_VER_MISC_SHIFT) +/* CHA Revision Number */ +#define CHA_VER_REV_SHIFT 16 +#define CHA_VER_REV_MASK (0xffull << CHA_VER_REV_SHIFT) +/* CHA Version ID */ +#define CHA_VER_VID_SHIFT 24 +#define CHA_VER_VID_MASK (0xffull << CHA_VER_VID_SHIFT) + /* * caam_perfmon - Performance Monitor/Secure Memory Status/ * CAAM Global Status/Component Version IDs @@ -223,15 +265,13 @@ struct jr_outentry { #define CHA_NUM_MS_DECONUM_MASK (0xfull << CHA_NUM_MS_DECONUM_SHIFT) /* - * CHA version IDs / instantiation bitfields + * CHA version IDs / instantiation bitfields (< Era 10) * Defined for use with the cha_id fields in perfmon, but the same shift/mask * selectors can be used to pull out the number of instantiated blocks within * cha_num fields in perfmon because the locations are the same. */ #define CHA_ID_LS_AES_SHIFT 0 #define CHA_ID_LS_AES_MASK (0xfull << CHA_ID_LS_AES_SHIFT) -#define CHA_ID_LS_AES_LP (0x3ull << CHA_ID_LS_AES_SHIFT) -#define CHA_ID_LS_AES_HP (0x4ull << CHA_ID_LS_AES_SHIFT) #define CHA_ID_LS_DES_SHIFT 4 #define CHA_ID_LS_DES_MASK (0xfull << CHA_ID_LS_DES_SHIFT) @@ -241,9 +281,6 @@ struct jr_outentry { #define CHA_ID_LS_MD_SHIFT 12 #define CHA_ID_LS_MD_MASK (0xfull << CHA_ID_LS_MD_SHIFT) -#define CHA_ID_LS_MD_LP256 (0x0ull << CHA_ID_LS_MD_SHIFT) -#define CHA_ID_LS_MD_LP512 (0x1ull << CHA_ID_LS_MD_SHIFT) -#define CHA_ID_LS_MD_HP (0x2ull << CHA_ID_LS_MD_SHIFT) #define CHA_ID_LS_RNG_SHIFT 16 #define CHA_ID_LS_RNG_MASK (0xfull << CHA_ID_LS_RNG_SHIFT) @@ -269,6 +306,13 @@ struct jr_outentry { #define CHA_ID_MS_JR_SHIFT 28 #define CHA_ID_MS_JR_MASK (0xfull << CHA_ID_MS_JR_SHIFT) +/* Specific CHA version IDs */ +#define CHA_VER_VID_AES_LP 0x3ull +#define CHA_VER_VID_AES_HP 0x4ull +#define CHA_VER_VID_MD_LP256 0x0ull +#define CHA_VER_VID_MD_LP512 0x1ull +#define CHA_VER_VID_MD_HP 0x2ull + struct sec_vid { u16 ip_id; u8 maj_rev; @@ -479,8 +523,10 @@ struct caam_ctrl { struct rng4tst r4tst[2]; }; - u32 rsvd9[448]; + u32 rsvd9[416]; + /* Version registers - introduced with era 10 e80-eff */ + struct version_regs vreg; /* Performance Monitor f00-fff */ struct caam_perfmon perfmon; }; @@ -570,8 +616,10 @@ struct caam_job_ring { u32 rsvd11; u32 jrcommand; /* JRCRx - JobR command */ - u32 rsvd12[932]; + u32 rsvd12[900]; + /* Version registers - introduced with era 10 e80-eff */ + struct version_regs vreg; /* Performance Monitor f00-fff */ struct caam_perfmon perfmon; }; @@ -878,13 +926,19 @@ struct caam_deco { u32 rsvd29[48]; u32 descbuf[64]; /* DxDESB - Descriptor buffer */ u32 rscvd30[193]; -#define DESC_DBG_DECO_STAT_HOST_ERR 0x00D00000 #define DESC_DBG_DECO_STAT_VALID 0x80000000 #define DESC_DBG_DECO_STAT_MASK 0x00F00000 +#define DESC_DBG_DECO_STAT_SHIFT 20 u32 desc_dbg; /* DxDDR - DECO Debug Register */ - u32 rsvd31[126]; + u32 rsvd31[13]; +#define DESC_DER_DECO_STAT_MASK 0x000F0000 +#define DESC_DER_DECO_STAT_SHIFT 16 + u32 dbg_exec; /* DxDER - DECO Debug Exec Register */ + u32 rsvd32[112]; }; +#define DECO_STAT_HOST_ERR 0xD + #define DECO_JQCR_WHL 0x20000000 #define DECO_JQCR_FOUR 0x10000000 diff --git a/drivers/crypto/cavium/nitrox/Makefile b/drivers/crypto/cavium/nitrox/Makefile index e12954791673..f83991aaf820 100644 --- a/drivers/crypto/cavium/nitrox/Makefile +++ b/drivers/crypto/cavium/nitrox/Makefile @@ -6,7 +6,10 @@ n5pf-objs := nitrox_main.o \ nitrox_lib.o \ nitrox_hal.o \ nitrox_reqmgr.o \ - nitrox_algs.o + nitrox_algs.o \ + nitrox_mbx.o \ + nitrox_skcipher.o \ + nitrox_aead.o n5pf-$(CONFIG_PCI_IOV) += nitrox_sriov.o n5pf-$(CONFIG_DEBUG_FS) += nitrox_debugfs.o diff --git a/drivers/crypto/cavium/nitrox/nitrox_aead.c b/drivers/crypto/cavium/nitrox/nitrox_aead.c new file mode 100644 index 000000000000..4f43eacd2557 --- /dev/null +++ b/drivers/crypto/cavium/nitrox/nitrox_aead.c @@ -0,0 +1,364 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "nitrox_dev.h" +#include "nitrox_common.h" +#include "nitrox_req.h" + +#define GCM_AES_SALT_SIZE 4 + +/** + * struct nitrox_crypt_params - Params to set nitrox crypto request. + * @cryptlen: Encryption/Decryption data length + * @authlen: Assoc data length + Cryptlen + * @srclen: Input buffer length + * @dstlen: Output buffer length + * @iv: IV data + * @ivsize: IV data length + * @ctrl_arg: Identifies the request type (ENCRYPT/DECRYPT) + */ +struct nitrox_crypt_params { + unsigned int cryptlen; + unsigned int authlen; + unsigned int srclen; + unsigned int dstlen; + u8 *iv; + int ivsize; + u8 ctrl_arg; +}; + +union gph_p3 { + struct { +#ifdef __BIG_ENDIAN_BITFIELD + u16 iv_offset : 8; + u16 auth_offset : 8; +#else + u16 auth_offset : 8; + u16 iv_offset : 8; +#endif + }; + u16 param; +}; + +static int nitrox_aes_gcm_setkey(struct crypto_aead *aead, const u8 *key, + unsigned int keylen) +{ + int aes_keylen; + struct nitrox_crypto_ctx *nctx = crypto_aead_ctx(aead); + struct flexi_crypto_context *fctx; + union fc_ctx_flags flags; + + aes_keylen = flexi_aes_keylen(keylen); + if (aes_keylen < 0) { + crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + /* fill crypto context */ + fctx = nctx->u.fctx; + flags.f = be64_to_cpu(fctx->flags.f); + flags.w0.aes_keylen = aes_keylen; + fctx->flags.f = cpu_to_be64(flags.f); + + /* copy enc key to context */ + memset(&fctx->crypto, 0, sizeof(fctx->crypto)); + memcpy(fctx->crypto.u.key, key, keylen); + + return 0; +} + +static int nitrox_aead_setauthsize(struct crypto_aead *aead, + unsigned int authsize) +{ + struct nitrox_crypto_ctx *nctx = crypto_aead_ctx(aead); + struct flexi_crypto_context *fctx = nctx->u.fctx; + union fc_ctx_flags flags; + + flags.f = be64_to_cpu(fctx->flags.f); + flags.w0.mac_len = authsize; + fctx->flags.f = cpu_to_be64(flags.f); + + aead->authsize = authsize; + + return 0; +} + +static int alloc_src_sglist(struct aead_request *areq, char *iv, int ivsize, + int buflen) +{ + struct nitrox_kcrypt_request *nkreq = aead_request_ctx(areq); + int nents = sg_nents_for_len(areq->src, buflen) + 1; + int ret; + + if (nents < 0) + return nents; + + /* Allocate buffer to hold IV and input scatterlist array */ + ret = alloc_src_req_buf(nkreq, nents, ivsize); + if (ret) + return ret; + + nitrox_creq_copy_iv(nkreq->src, iv, ivsize); + nitrox_creq_set_src_sg(nkreq, nents, ivsize, areq->src, buflen); + + return 0; +} + +static int alloc_dst_sglist(struct aead_request *areq, int ivsize, int buflen) +{ + struct nitrox_kcrypt_request *nkreq = aead_request_ctx(areq); + int nents = sg_nents_for_len(areq->dst, buflen) + 3; + int ret; + + if (nents < 0) + return nents; + + /* Allocate buffer to hold ORH, COMPLETION and output scatterlist + * array + */ + ret = alloc_dst_req_buf(nkreq, nents); + if (ret) + return ret; + + nitrox_creq_set_orh(nkreq); + nitrox_creq_set_comp(nkreq); + nitrox_creq_set_dst_sg(nkreq, nents, ivsize, areq->dst, buflen); + + return 0; +} + +static void free_src_sglist(struct aead_request *areq) +{ + struct nitrox_kcrypt_request *nkreq = aead_request_ctx(areq); + + kfree(nkreq->src); +} + +static void free_dst_sglist(struct aead_request *areq) +{ + struct nitrox_kcrypt_request *nkreq = aead_request_ctx(areq); + + kfree(nkreq->dst); +} + +static int nitrox_set_creq(struct aead_request *areq, + struct nitrox_crypt_params *params) +{ + struct nitrox_kcrypt_request *nkreq = aead_request_ctx(areq); + struct se_crypto_request *creq = &nkreq->creq; + struct crypto_aead *aead = crypto_aead_reqtfm(areq); + union gph_p3 param3; + struct nitrox_crypto_ctx *nctx = crypto_aead_ctx(aead); + int ret; + + creq->flags = areq->base.flags; + creq->gfp = (areq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC; + + creq->ctrl.value = 0; + creq->opcode = FLEXI_CRYPTO_ENCRYPT_HMAC; + creq->ctrl.s.arg = params->ctrl_arg; + + creq->gph.param0 = cpu_to_be16(params->cryptlen); + creq->gph.param1 = cpu_to_be16(params->authlen); + creq->gph.param2 = cpu_to_be16(params->ivsize + areq->assoclen); + param3.iv_offset = 0; + param3.auth_offset = params->ivsize; + creq->gph.param3 = cpu_to_be16(param3.param); + + creq->ctx_handle = nctx->u.ctx_handle; + creq->ctrl.s.ctxl = sizeof(struct flexi_crypto_context); + + ret = alloc_src_sglist(areq, params->iv, params->ivsize, + params->srclen); + if (ret) + return ret; + + ret = alloc_dst_sglist(areq, params->ivsize, params->dstlen); + if (ret) { + free_src_sglist(areq); + return ret; + } + + return 0; +} + +static void nitrox_aead_callback(void *arg, int err) +{ + struct aead_request *areq = arg; + + free_src_sglist(areq); + free_dst_sglist(areq); + if (err) { + pr_err_ratelimited("request failed status 0x%0x\n", err); + err = -EINVAL; + } + + areq->base.complete(&areq->base, err); +} + +static int nitrox_aes_gcm_enc(struct aead_request *areq) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(areq); + struct nitrox_crypto_ctx *nctx = crypto_aead_ctx(aead); + struct nitrox_kcrypt_request *nkreq = aead_request_ctx(areq); + struct se_crypto_request *creq = &nkreq->creq; + struct flexi_crypto_context *fctx = nctx->u.fctx; + struct nitrox_crypt_params params; + int ret; + + memcpy(fctx->crypto.iv, areq->iv, GCM_AES_SALT_SIZE); + + memset(¶ms, 0, sizeof(params)); + params.cryptlen = areq->cryptlen; + params.authlen = areq->assoclen + params.cryptlen; + params.srclen = params.authlen; + params.dstlen = params.srclen + aead->authsize; + params.iv = &areq->iv[GCM_AES_SALT_SIZE]; + params.ivsize = GCM_AES_IV_SIZE - GCM_AES_SALT_SIZE; + params.ctrl_arg = ENCRYPT; + ret = nitrox_set_creq(areq, ¶ms); + if (ret) + return ret; + + /* send the crypto request */ + return nitrox_process_se_request(nctx->ndev, creq, nitrox_aead_callback, + areq); +} + +static int nitrox_aes_gcm_dec(struct aead_request *areq) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(areq); + struct nitrox_crypto_ctx *nctx = crypto_aead_ctx(aead); + struct nitrox_kcrypt_request *nkreq = aead_request_ctx(areq); + struct se_crypto_request *creq = &nkreq->creq; + struct flexi_crypto_context *fctx = nctx->u.fctx; + struct nitrox_crypt_params params; + int ret; + + memcpy(fctx->crypto.iv, areq->iv, GCM_AES_SALT_SIZE); + + memset(¶ms, 0, sizeof(params)); + params.cryptlen = areq->cryptlen - aead->authsize; + params.authlen = areq->assoclen + params.cryptlen; + params.srclen = areq->cryptlen + areq->assoclen; + params.dstlen = params.srclen - aead->authsize; + params.iv = &areq->iv[GCM_AES_SALT_SIZE]; + params.ivsize = GCM_AES_IV_SIZE - GCM_AES_SALT_SIZE; + params.ctrl_arg = DECRYPT; + ret = nitrox_set_creq(areq, ¶ms); + if (ret) + return ret; + + /* send the crypto request */ + return nitrox_process_se_request(nctx->ndev, creq, nitrox_aead_callback, + areq); +} + +static int nitrox_aead_init(struct crypto_aead *aead) +{ + struct nitrox_crypto_ctx *nctx = crypto_aead_ctx(aead); + struct crypto_ctx_hdr *chdr; + + /* get the first device */ + nctx->ndev = nitrox_get_first_device(); + if (!nctx->ndev) + return -ENODEV; + + /* allocate nitrox crypto context */ + chdr = crypto_alloc_context(nctx->ndev); + if (!chdr) { + nitrox_put_device(nctx->ndev); + return -ENOMEM; + } + nctx->chdr = chdr; + nctx->u.ctx_handle = (uintptr_t)((u8 *)chdr->vaddr + + sizeof(struct ctx_hdr)); + nctx->u.fctx->flags.f = 0; + + return 0; +} + +static int nitrox_aes_gcm_init(struct crypto_aead *aead) +{ + int ret; + struct nitrox_crypto_ctx *nctx = crypto_aead_ctx(aead); + union fc_ctx_flags *flags; + + ret = nitrox_aead_init(aead); + if (ret) + return ret; + + flags = &nctx->u.fctx->flags; + flags->w0.cipher_type = CIPHER_AES_GCM; + flags->w0.hash_type = AUTH_NULL; + flags->w0.iv_source = IV_FROM_DPTR; + /* ask microcode to calculate ipad/opad */ + flags->w0.auth_input_type = 1; + flags->f = be64_to_cpu(flags->f); + + crypto_aead_set_reqsize(aead, sizeof(struct aead_request) + + sizeof(struct nitrox_kcrypt_request)); + + return 0; +} + +static void nitrox_aead_exit(struct crypto_aead *aead) +{ + struct nitrox_crypto_ctx *nctx = crypto_aead_ctx(aead); + + /* free the nitrox crypto context */ + if (nctx->u.ctx_handle) { + struct flexi_crypto_context *fctx = nctx->u.fctx; + + memzero_explicit(&fctx->crypto, sizeof(struct crypto_keys)); + memzero_explicit(&fctx->auth, sizeof(struct auth_keys)); + crypto_free_context((void *)nctx->chdr); + } + nitrox_put_device(nctx->ndev); + + nctx->u.ctx_handle = 0; + nctx->ndev = NULL; +} + +static struct aead_alg nitrox_aeads[] = { { + .base = { + .cra_name = "gcm(aes)", + .cra_driver_name = "n5_aes_gcm", + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct nitrox_crypto_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .setkey = nitrox_aes_gcm_setkey, + .setauthsize = nitrox_aead_setauthsize, + .encrypt = nitrox_aes_gcm_enc, + .decrypt = nitrox_aes_gcm_dec, + .init = nitrox_aes_gcm_init, + .exit = nitrox_aead_exit, + .ivsize = GCM_AES_IV_SIZE, + .maxauthsize = AES_BLOCK_SIZE, +} }; + +int nitrox_register_aeads(void) +{ + return crypto_register_aeads(nitrox_aeads, ARRAY_SIZE(nitrox_aeads)); +} + +void nitrox_unregister_aeads(void) +{ + crypto_unregister_aeads(nitrox_aeads, ARRAY_SIZE(nitrox_aeads)); +} diff --git a/drivers/crypto/cavium/nitrox/nitrox_algs.c b/drivers/crypto/cavium/nitrox/nitrox_algs.c index 2ae6124e5da6..d646ae5f29b0 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_algs.c +++ b/drivers/crypto/cavium/nitrox/nitrox_algs.c @@ -1,458 +1,24 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "nitrox_dev.h" #include "nitrox_common.h" -#include "nitrox_req.h" - -#define PRIO 4001 - -struct nitrox_cipher { - const char *name; - enum flexi_cipher value; -}; - -/** - * supported cipher list - */ -static const struct nitrox_cipher flexi_cipher_table[] = { - { "null", CIPHER_NULL }, - { "cbc(des3_ede)", CIPHER_3DES_CBC }, - { "ecb(des3_ede)", CIPHER_3DES_ECB }, - { "cbc(aes)", CIPHER_AES_CBC }, - { "ecb(aes)", CIPHER_AES_ECB }, - { "cfb(aes)", CIPHER_AES_CFB }, - { "rfc3686(ctr(aes))", CIPHER_AES_CTR }, - { "xts(aes)", CIPHER_AES_XTS }, - { "cts(cbc(aes))", CIPHER_AES_CBC_CTS }, - { NULL, CIPHER_INVALID } -}; - -static enum flexi_cipher flexi_cipher_type(const char *name) -{ - const struct nitrox_cipher *cipher = flexi_cipher_table; - - while (cipher->name) { - if (!strcmp(cipher->name, name)) - break; - cipher++; - } - return cipher->value; -} - -static int flexi_aes_keylen(int keylen) -{ - int aes_keylen; - - switch (keylen) { - case AES_KEYSIZE_128: - aes_keylen = 1; - break; - case AES_KEYSIZE_192: - aes_keylen = 2; - break; - case AES_KEYSIZE_256: - aes_keylen = 3; - break; - default: - aes_keylen = -EINVAL; - break; - } - return aes_keylen; -} - -static int nitrox_skcipher_init(struct crypto_skcipher *tfm) -{ - struct nitrox_crypto_ctx *nctx = crypto_skcipher_ctx(tfm); - void *fctx; - - /* get the first device */ - nctx->ndev = nitrox_get_first_device(); - if (!nctx->ndev) - return -ENODEV; - - /* allocate nitrox crypto context */ - fctx = crypto_alloc_context(nctx->ndev); - if (!fctx) { - nitrox_put_device(nctx->ndev); - return -ENOMEM; - } - nctx->u.ctx_handle = (uintptr_t)fctx; - crypto_skcipher_set_reqsize(tfm, crypto_skcipher_reqsize(tfm) + - sizeof(struct nitrox_kcrypt_request)); - return 0; -} - -static void nitrox_skcipher_exit(struct crypto_skcipher *tfm) -{ - struct nitrox_crypto_ctx *nctx = crypto_skcipher_ctx(tfm); - - /* free the nitrox crypto context */ - if (nctx->u.ctx_handle) { - struct flexi_crypto_context *fctx = nctx->u.fctx; - - memset(&fctx->crypto, 0, sizeof(struct crypto_keys)); - memset(&fctx->auth, 0, sizeof(struct auth_keys)); - crypto_free_context((void *)fctx); - } - nitrox_put_device(nctx->ndev); - - nctx->u.ctx_handle = 0; - nctx->ndev = NULL; -} - -static inline int nitrox_skcipher_setkey(struct crypto_skcipher *cipher, - int aes_keylen, const u8 *key, - unsigned int keylen) -{ - struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher); - struct nitrox_crypto_ctx *nctx = crypto_tfm_ctx(tfm); - struct flexi_crypto_context *fctx; - enum flexi_cipher cipher_type; - const char *name; - - name = crypto_tfm_alg_name(tfm); - cipher_type = flexi_cipher_type(name); - if (unlikely(cipher_type == CIPHER_INVALID)) { - pr_err("unsupported cipher: %s\n", name); - return -EINVAL; - } - - /* fill crypto context */ - fctx = nctx->u.fctx; - fctx->flags = 0; - fctx->w0.cipher_type = cipher_type; - fctx->w0.aes_keylen = aes_keylen; - fctx->w0.iv_source = IV_FROM_DPTR; - fctx->flags = cpu_to_be64(*(u64 *)&fctx->w0); - /* copy the key to context */ - memcpy(fctx->crypto.u.key, key, keylen); - - return 0; -} - -static int nitrox_aes_setkey(struct crypto_skcipher *cipher, const u8 *key, - unsigned int keylen) -{ - int aes_keylen; - - aes_keylen = flexi_aes_keylen(keylen); - if (aes_keylen < 0) { - crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - return nitrox_skcipher_setkey(cipher, aes_keylen, key, keylen); -} - -static void nitrox_skcipher_callback(struct skcipher_request *skreq, - int err) -{ - if (err) { - pr_err_ratelimited("request failed status 0x%0x\n", err); - err = -EINVAL; - } - skcipher_request_complete(skreq, err); -} - -static int nitrox_skcipher_crypt(struct skcipher_request *skreq, bool enc) -{ - struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(skreq); - struct nitrox_crypto_ctx *nctx = crypto_skcipher_ctx(cipher); - struct nitrox_kcrypt_request *nkreq = skcipher_request_ctx(skreq); - int ivsize = crypto_skcipher_ivsize(cipher); - struct se_crypto_request *creq; - - creq = &nkreq->creq; - creq->flags = skreq->base.flags; - creq->gfp = (skreq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC; - - /* fill the request */ - creq->ctrl.value = 0; - creq->opcode = FLEXI_CRYPTO_ENCRYPT_HMAC; - creq->ctrl.s.arg = (enc ? ENCRYPT : DECRYPT); - /* param0: length of the data to be encrypted */ - creq->gph.param0 = cpu_to_be16(skreq->cryptlen); - creq->gph.param1 = 0; - /* param2: encryption data offset */ - creq->gph.param2 = cpu_to_be16(ivsize); - creq->gph.param3 = 0; - - creq->ctx_handle = nctx->u.ctx_handle; - creq->ctrl.s.ctxl = sizeof(struct flexi_crypto_context); - - /* copy the iv */ - memcpy(creq->iv, skreq->iv, ivsize); - creq->ivsize = ivsize; - creq->src = skreq->src; - creq->dst = skreq->dst; - - nkreq->nctx = nctx; - nkreq->skreq = skreq; - - /* send the crypto request */ - return nitrox_process_se_request(nctx->ndev, creq, - nitrox_skcipher_callback, skreq); -} - -static int nitrox_aes_encrypt(struct skcipher_request *skreq) -{ - return nitrox_skcipher_crypt(skreq, true); -} - -static int nitrox_aes_decrypt(struct skcipher_request *skreq) -{ - return nitrox_skcipher_crypt(skreq, false); -} - -static int nitrox_3des_setkey(struct crypto_skcipher *cipher, - const u8 *key, unsigned int keylen) -{ - if (keylen != DES3_EDE_KEY_SIZE) { - crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - - return nitrox_skcipher_setkey(cipher, 0, key, keylen); -} - -static int nitrox_3des_encrypt(struct skcipher_request *skreq) -{ - return nitrox_skcipher_crypt(skreq, true); -} - -static int nitrox_3des_decrypt(struct skcipher_request *skreq) -{ - return nitrox_skcipher_crypt(skreq, false); -} - -static int nitrox_aes_xts_setkey(struct crypto_skcipher *cipher, - const u8 *key, unsigned int keylen) -{ - struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher); - struct nitrox_crypto_ctx *nctx = crypto_tfm_ctx(tfm); - struct flexi_crypto_context *fctx; - int aes_keylen, ret; - - ret = xts_check_key(tfm, key, keylen); - if (ret) - return ret; - - keylen /= 2; - - aes_keylen = flexi_aes_keylen(keylen); - if (aes_keylen < 0) { - crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - - fctx = nctx->u.fctx; - /* copy KEY2 */ - memcpy(fctx->auth.u.key2, (key + keylen), keylen); - - return nitrox_skcipher_setkey(cipher, aes_keylen, key, keylen); -} - -static int nitrox_aes_ctr_rfc3686_setkey(struct crypto_skcipher *cipher, - const u8 *key, unsigned int keylen) -{ - struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher); - struct nitrox_crypto_ctx *nctx = crypto_tfm_ctx(tfm); - struct flexi_crypto_context *fctx; - int aes_keylen; - - if (keylen < CTR_RFC3686_NONCE_SIZE) - return -EINVAL; - - fctx = nctx->u.fctx; - - memcpy(fctx->crypto.iv, key + (keylen - CTR_RFC3686_NONCE_SIZE), - CTR_RFC3686_NONCE_SIZE); - - keylen -= CTR_RFC3686_NONCE_SIZE; - - aes_keylen = flexi_aes_keylen(keylen); - if (aes_keylen < 0) { - crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - return nitrox_skcipher_setkey(cipher, aes_keylen, key, keylen); -} - -static struct skcipher_alg nitrox_skciphers[] = { { - .base = { - .cra_name = "cbc(aes)", - .cra_driver_name = "n5_cbc(aes)", - .cra_priority = PRIO, - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct nitrox_crypto_ctx), - .cra_alignmask = 0, - .cra_module = THIS_MODULE, - }, - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = nitrox_aes_setkey, - .encrypt = nitrox_aes_encrypt, - .decrypt = nitrox_aes_decrypt, - .init = nitrox_skcipher_init, - .exit = nitrox_skcipher_exit, -}, { - .base = { - .cra_name = "ecb(aes)", - .cra_driver_name = "n5_ecb(aes)", - .cra_priority = PRIO, - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct nitrox_crypto_ctx), - .cra_alignmask = 0, - .cra_module = THIS_MODULE, - }, - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = nitrox_aes_setkey, - .encrypt = nitrox_aes_encrypt, - .decrypt = nitrox_aes_decrypt, - .init = nitrox_skcipher_init, - .exit = nitrox_skcipher_exit, -}, { - .base = { - .cra_name = "cfb(aes)", - .cra_driver_name = "n5_cfb(aes)", - .cra_priority = PRIO, - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct nitrox_crypto_ctx), - .cra_alignmask = 0, - .cra_module = THIS_MODULE, - }, - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = nitrox_aes_setkey, - .encrypt = nitrox_aes_encrypt, - .decrypt = nitrox_aes_decrypt, - .init = nitrox_skcipher_init, - .exit = nitrox_skcipher_exit, -}, { - .base = { - .cra_name = "xts(aes)", - .cra_driver_name = "n5_xts(aes)", - .cra_priority = PRIO, - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct nitrox_crypto_ctx), - .cra_alignmask = 0, - .cra_module = THIS_MODULE, - }, - .min_keysize = 2 * AES_MIN_KEY_SIZE, - .max_keysize = 2 * AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = nitrox_aes_xts_setkey, - .encrypt = nitrox_aes_encrypt, - .decrypt = nitrox_aes_decrypt, - .init = nitrox_skcipher_init, - .exit = nitrox_skcipher_exit, -}, { - .base = { - .cra_name = "rfc3686(ctr(aes))", - .cra_driver_name = "n5_rfc3686(ctr(aes))", - .cra_priority = PRIO, - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct nitrox_crypto_ctx), - .cra_alignmask = 0, - .cra_module = THIS_MODULE, - }, - .min_keysize = AES_MIN_KEY_SIZE + CTR_RFC3686_NONCE_SIZE, - .max_keysize = AES_MAX_KEY_SIZE + CTR_RFC3686_NONCE_SIZE, - .ivsize = CTR_RFC3686_IV_SIZE, - .init = nitrox_skcipher_init, - .exit = nitrox_skcipher_exit, - .setkey = nitrox_aes_ctr_rfc3686_setkey, - .encrypt = nitrox_aes_encrypt, - .decrypt = nitrox_aes_decrypt, -}, { - .base = { - .cra_name = "cts(cbc(aes))", - .cra_driver_name = "n5_cts(cbc(aes))", - .cra_priority = PRIO, - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct nitrox_crypto_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - }, - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = nitrox_aes_setkey, - .encrypt = nitrox_aes_encrypt, - .decrypt = nitrox_aes_decrypt, - .init = nitrox_skcipher_init, - .exit = nitrox_skcipher_exit, -}, { - .base = { - .cra_name = "cbc(des3_ede)", - .cra_driver_name = "n5_cbc(des3_ede)", - .cra_priority = PRIO, - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = DES3_EDE_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct nitrox_crypto_ctx), - .cra_alignmask = 0, - .cra_module = THIS_MODULE, - }, - .min_keysize = DES3_EDE_KEY_SIZE, - .max_keysize = DES3_EDE_KEY_SIZE, - .ivsize = DES3_EDE_BLOCK_SIZE, - .setkey = nitrox_3des_setkey, - .encrypt = nitrox_3des_encrypt, - .decrypt = nitrox_3des_decrypt, - .init = nitrox_skcipher_init, - .exit = nitrox_skcipher_exit, -}, { - .base = { - .cra_name = "ecb(des3_ede)", - .cra_driver_name = "n5_ecb(des3_ede)", - .cra_priority = PRIO, - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = DES3_EDE_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct nitrox_crypto_ctx), - .cra_alignmask = 0, - .cra_module = THIS_MODULE, - }, - .min_keysize = DES3_EDE_KEY_SIZE, - .max_keysize = DES3_EDE_KEY_SIZE, - .ivsize = DES3_EDE_BLOCK_SIZE, - .setkey = nitrox_3des_setkey, - .encrypt = nitrox_3des_encrypt, - .decrypt = nitrox_3des_decrypt, - .init = nitrox_skcipher_init, - .exit = nitrox_skcipher_exit, -} - -}; int nitrox_crypto_register(void) { - return crypto_register_skciphers(nitrox_skciphers, - ARRAY_SIZE(nitrox_skciphers)); + int err; + + err = nitrox_register_skciphers(); + if (err) + return err; + + err = nitrox_register_aeads(); + if (err) { + nitrox_unregister_skciphers(); + return err; + } + + return 0; } void nitrox_crypto_unregister(void) { - crypto_unregister_skciphers(nitrox_skciphers, - ARRAY_SIZE(nitrox_skciphers)); + nitrox_unregister_aeads(); + nitrox_unregister_skciphers(); } diff --git a/drivers/crypto/cavium/nitrox/nitrox_common.h b/drivers/crypto/cavium/nitrox/nitrox_common.h index 863143a8336b..e4be69d7e6e5 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_common.h +++ b/drivers/crypto/cavium/nitrox/nitrox_common.h @@ -7,6 +7,10 @@ int nitrox_crypto_register(void); void nitrox_crypto_unregister(void); +int nitrox_register_aeads(void); +void nitrox_unregister_aeads(void); +int nitrox_register_skciphers(void); +void nitrox_unregister_skciphers(void); void *crypto_alloc_context(struct nitrox_device *ndev); void crypto_free_context(void *ctx); struct nitrox_device *nitrox_get_first_device(void); @@ -19,7 +23,7 @@ void pkt_slc_resp_tasklet(unsigned long data); int nitrox_process_se_request(struct nitrox_device *ndev, struct se_crypto_request *req, completion_t cb, - struct skcipher_request *skreq); + void *cb_arg); void backlog_qflush_work(struct work_struct *work); diff --git a/drivers/crypto/cavium/nitrox/nitrox_csr.h b/drivers/crypto/cavium/nitrox/nitrox_csr.h index 1ad27b1a87c5..a2a452642b38 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_csr.h +++ b/drivers/crypto/cavium/nitrox/nitrox_csr.h @@ -54,7 +54,13 @@ #define NPS_STATS_PKT_DMA_WR_CNT 0x1000190 /* NPS packet registers */ -#define NPS_PKT_INT 0x1040018 +#define NPS_PKT_INT 0x1040018 +#define NPS_PKT_MBOX_INT_LO 0x1040020 +#define NPS_PKT_MBOX_INT_LO_ENA_W1C 0x1040030 +#define NPS_PKT_MBOX_INT_LO_ENA_W1S 0x1040038 +#define NPS_PKT_MBOX_INT_HI 0x1040040 +#define NPS_PKT_MBOX_INT_HI_ENA_W1C 0x1040050 +#define NPS_PKT_MBOX_INT_HI_ENA_W1S 0x1040058 #define NPS_PKT_IN_RERR_HI 0x1040108 #define NPS_PKT_IN_RERR_HI_ENA_W1S 0x1040120 #define NPS_PKT_IN_RERR_LO 0x1040128 @@ -74,6 +80,10 @@ #define NPS_PKT_SLC_RERR_LO_ENA_W1S 0x1040240 #define NPS_PKT_SLC_ERR_TYPE 0x1040248 #define NPS_PKT_SLC_ERR_TYPE_ENA_W1S 0x1040260 +/* Mailbox PF->VF PF Accessible Data registers */ +#define NPS_PKT_MBOX_PF_VF_PFDATAX(_i) (0x1040800 + ((_i) * 0x8)) +#define NPS_PKT_MBOX_VF_PF_PFDATAX(_i) (0x1040C00 + ((_i) * 0x8)) + #define NPS_PKT_SLC_CTLX(_i) (0x10000 + ((_i) * 0x40000)) #define NPS_PKT_SLC_CNTSX(_i) (0x10008 + ((_i) * 0x40000)) #define NPS_PKT_SLC_INT_LEVELSX(_i) (0x10010 + ((_i) * 0x40000)) diff --git a/drivers/crypto/cavium/nitrox/nitrox_debugfs.c b/drivers/crypto/cavium/nitrox/nitrox_debugfs.c index 5f3cd5fafe04..0196b992280f 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_debugfs.c +++ b/drivers/crypto/cavium/nitrox/nitrox_debugfs.c @@ -13,18 +13,7 @@ static int firmware_show(struct seq_file *s, void *v) return 0; } -static int firmware_open(struct inode *inode, struct file *file) -{ - return single_open(file, firmware_show, inode->i_private); -} - -static const struct file_operations firmware_fops = { - .owner = THIS_MODULE, - .open = firmware_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(firmware); static int device_show(struct seq_file *s, void *v) { @@ -41,18 +30,7 @@ static int device_show(struct seq_file *s, void *v) return 0; } -static int nitrox_open(struct inode *inode, struct file *file) -{ - return single_open(file, device_show, inode->i_private); -} - -static const struct file_operations nitrox_fops = { - .owner = THIS_MODULE, - .open = nitrox_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(device); static int stats_show(struct seq_file *s, void *v) { @@ -69,18 +47,7 @@ static int stats_show(struct seq_file *s, void *v) return 0; } -static int nitrox_stats_open(struct inode *inode, struct file *file) -{ - return single_open(file, stats_show, inode->i_private); -} - -static const struct file_operations nitrox_stats_fops = { - .owner = THIS_MODULE, - .open = nitrox_stats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(stats); void nitrox_debugfs_exit(struct nitrox_device *ndev) { @@ -97,13 +64,16 @@ int nitrox_debugfs_init(struct nitrox_device *ndev) return -ENOMEM; ndev->debugfs_dir = dir; - f = debugfs_create_file("firmware", 0400, dir, ndev, &firmware_fops); + f = debugfs_create_file("firmware", 0400, dir, ndev, + &firmware_fops); if (!f) goto err; - f = debugfs_create_file("device", 0400, dir, ndev, &nitrox_fops); + f = debugfs_create_file("device", 0400, dir, ndev, + &device_fops); if (!f) goto err; - f = debugfs_create_file("stats", 0400, dir, ndev, &nitrox_stats_fops); + f = debugfs_create_file("stats", 0400, dir, ndev, + &stats_fops); if (!f) goto err; diff --git a/drivers/crypto/cavium/nitrox/nitrox_debugfs.h b/drivers/crypto/cavium/nitrox/nitrox_debugfs.h new file mode 100644 index 000000000000..a8d85ffa619c --- /dev/null +++ b/drivers/crypto/cavium/nitrox/nitrox_debugfs.h @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef __NITROX_DEBUGFS_H +#define __NITROX_DEBUGFS_H + +#include "nitrox_dev.h" + +#ifdef CONFIG_DEBUG_FS +int nitrox_debugfs_init(struct nitrox_device *ndev); +void nitrox_debugfs_exit(struct nitrox_device *ndev); +#else +static inline int nitrox_debugfs_init(struct nitrox_device *ndev) +{ + return 0; +} + +static inline void nitrox_debugfs_exit(struct nitrox_device *ndev) +{ +} +#endif /* !CONFIG_DEBUG_FS */ + +#endif /* __NITROX_DEBUGFS_H */ diff --git a/drivers/crypto/cavium/nitrox/nitrox_dev.h b/drivers/crypto/cavium/nitrox/nitrox_dev.h index 283e252385fb..0338877b828f 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_dev.h +++ b/drivers/crypto/cavium/nitrox/nitrox_dev.h @@ -8,6 +8,8 @@ #include #define VERSION_LEN 32 +/* Maximum queues in PF mode */ +#define MAX_PF_QUEUES 64 /** * struct nitrox_cmdq - NITROX command queue @@ -103,6 +105,61 @@ struct nitrox_q_vector { }; }; +/** + * mbox_msg - Mailbox message data + * @type: message type + * @opcode: message opcode + * @data: message data + */ +union mbox_msg { + u64 value; + struct { + u64 type: 2; + u64 opcode: 6; + u64 data: 58; + }; + struct { + u64 type: 2; + u64 opcode: 6; + u64 chipid: 8; + u64 vfid: 8; + } id; +}; + +/** + * nitrox_vfdev - NITROX VF device instance in PF + * @state: VF device state + * @vfno: VF number + * @nr_queues: number of queues enabled in VF + * @ring: ring to communicate with VF + * @msg: Mailbox message data from VF + * @mbx_resp: Mailbox counters + */ +struct nitrox_vfdev { + atomic_t state; + int vfno; + int nr_queues; + int ring; + union mbox_msg msg; + atomic64_t mbx_resp; +}; + +/** + * struct nitrox_iov - SR-IOV information + * @num_vfs: number of VF(s) enabled + * @max_vf_queues: Maximum number of queues allowed for VF + * @vfdev: VF(s) devices + * @pf2vf_wq: workqueue for PF2VF communication + * @msix: MSI-X entry for PF in SR-IOV case + */ +struct nitrox_iov { + int num_vfs; + int max_vf_queues; + struct nitrox_vfdev *vfdev; + struct workqueue_struct *pf2vf_wq; + struct msix_entry msix; +}; + /* * NITROX Device states */ @@ -150,6 +207,9 @@ enum vf_mode { * @ctx_pool: DMA pool for crypto context * @pkt_inq: Packet input rings * @qvec: MSI-X queue vectors information + * @iov: SR-IOV informatin + * @num_vecs: number of MSI-X vectors + * @stats: request statistics * @hw: hardware information * @debugfs_dir: debugfs directory */ @@ -168,13 +228,13 @@ struct nitrox_device { int node; u16 qlen; u16 nr_queues; - int num_vfs; enum vf_mode mode; struct dma_pool *ctx_pool; struct nitrox_cmdq *pkt_inq; struct nitrox_q_vector *qvec; + struct nitrox_iov iov; int num_vecs; struct nitrox_stats stats; @@ -213,17 +273,9 @@ static inline bool nitrox_ready(struct nitrox_device *ndev) return atomic_read(&ndev->state) == __NDEV_READY; } -#ifdef CONFIG_DEBUG_FS -int nitrox_debugfs_init(struct nitrox_device *ndev); -void nitrox_debugfs_exit(struct nitrox_device *ndev); -#else -static inline int nitrox_debugfs_init(struct nitrox_device *ndev) +static inline bool nitrox_vfdev_ready(struct nitrox_vfdev *vfdev) { - return 0; + return atomic_read(&vfdev->state) == __NDEV_READY; } -static inline void nitrox_debugfs_exit(struct nitrox_device *ndev) -{ } -#endif - #endif /* __NITROX_DEV_H */ diff --git a/drivers/crypto/cavium/nitrox/nitrox_hal.c b/drivers/crypto/cavium/nitrox/nitrox_hal.c index a9b82387cf53..c08d9f33a3b1 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_hal.c +++ b/drivers/crypto/cavium/nitrox/nitrox_hal.c @@ -5,10 +5,11 @@ #include "nitrox_csr.h" #define PLL_REF_CLK 50 +#define MAX_CSR_RETRIES 10 /** * emu_enable_cores - Enable EMU cluster cores. - * @ndev: N5 device + * @ndev: NITROX device */ static void emu_enable_cores(struct nitrox_device *ndev) { @@ -33,7 +34,7 @@ static void emu_enable_cores(struct nitrox_device *ndev) /** * nitrox_config_emu_unit - configure EMU unit. - * @ndev: N5 device + * @ndev: NITROX device */ void nitrox_config_emu_unit(struct nitrox_device *ndev) { @@ -63,29 +64,26 @@ void nitrox_config_emu_unit(struct nitrox_device *ndev) static void reset_pkt_input_ring(struct nitrox_device *ndev, int ring) { union nps_pkt_in_instr_ctl pkt_in_ctl; - union nps_pkt_in_instr_baoff_dbell pkt_in_dbell; union nps_pkt_in_done_cnts pkt_in_cnts; + int max_retries = MAX_CSR_RETRIES; u64 offset; + /* step 1: disable the ring, clear enable bit */ offset = NPS_PKT_IN_INSTR_CTLX(ring); - /* disable the ring */ pkt_in_ctl.value = nitrox_read_csr(ndev, offset); pkt_in_ctl.s.enb = 0; nitrox_write_csr(ndev, offset, pkt_in_ctl.value); - usleep_range(100, 150); - /* wait to clear [ENB] */ + /* step 2: wait to clear [ENB] */ + usleep_range(100, 150); do { pkt_in_ctl.value = nitrox_read_csr(ndev, offset); - } while (pkt_in_ctl.s.enb); + if (!pkt_in_ctl.s.enb) + break; + udelay(50); + } while (max_retries--); - /* clear off door bell counts */ - offset = NPS_PKT_IN_INSTR_BAOFF_DBELLX(ring); - pkt_in_dbell.value = 0; - pkt_in_dbell.s.dbell = 0xffffffff; - nitrox_write_csr(ndev, offset, pkt_in_dbell.value); - - /* clear done counts */ + /* step 3: clear done counts */ offset = NPS_PKT_IN_DONE_CNTSX(ring); pkt_in_cnts.value = nitrox_read_csr(ndev, offset); nitrox_write_csr(ndev, offset, pkt_in_cnts.value); @@ -95,6 +93,7 @@ static void reset_pkt_input_ring(struct nitrox_device *ndev, int ring) void enable_pkt_input_ring(struct nitrox_device *ndev, int ring) { union nps_pkt_in_instr_ctl pkt_in_ctl; + int max_retries = MAX_CSR_RETRIES; u64 offset; /* 64-byte instruction size */ @@ -107,12 +106,15 @@ void enable_pkt_input_ring(struct nitrox_device *ndev, int ring) /* wait for set [ENB] */ do { pkt_in_ctl.value = nitrox_read_csr(ndev, offset); - } while (!pkt_in_ctl.s.enb); + if (pkt_in_ctl.s.enb) + break; + udelay(50); + } while (max_retries--); } /** * nitrox_config_pkt_input_rings - configure Packet Input Rings - * @ndev: N5 device + * @ndev: NITROX device */ void nitrox_config_pkt_input_rings(struct nitrox_device *ndev) { @@ -121,11 +123,14 @@ void nitrox_config_pkt_input_rings(struct nitrox_device *ndev) for (i = 0; i < ndev->nr_queues; i++) { struct nitrox_cmdq *cmdq = &ndev->pkt_inq[i]; union nps_pkt_in_instr_rsize pkt_in_rsize; + union nps_pkt_in_instr_baoff_dbell pkt_in_dbell; u64 offset; reset_pkt_input_ring(ndev, i); - /* configure ring base address 16-byte aligned, + /** + * step 4: + * configure ring base address 16-byte aligned, * size and interrupt threshold. */ offset = NPS_PKT_IN_INSTR_BADDRX(i); @@ -141,6 +146,13 @@ void nitrox_config_pkt_input_rings(struct nitrox_device *ndev) offset = NPS_PKT_IN_INT_LEVELSX(i); nitrox_write_csr(ndev, offset, 0xffffffff); + /* step 5: clear off door bell counts */ + offset = NPS_PKT_IN_INSTR_BAOFF_DBELLX(i); + pkt_in_dbell.value = 0; + pkt_in_dbell.s.dbell = 0xffffffff; + nitrox_write_csr(ndev, offset, pkt_in_dbell.value); + + /* enable the ring */ enable_pkt_input_ring(ndev, i); } } @@ -149,21 +161,26 @@ static void reset_pkt_solicit_port(struct nitrox_device *ndev, int port) { union nps_pkt_slc_ctl pkt_slc_ctl; union nps_pkt_slc_cnts pkt_slc_cnts; + int max_retries = MAX_CSR_RETRIES; u64 offset; - /* disable slc port */ + /* step 1: disable slc port */ offset = NPS_PKT_SLC_CTLX(port); pkt_slc_ctl.value = nitrox_read_csr(ndev, offset); pkt_slc_ctl.s.enb = 0; nitrox_write_csr(ndev, offset, pkt_slc_ctl.value); - usleep_range(100, 150); + /* step 2 */ + usleep_range(100, 150); /* wait to clear [ENB] */ do { pkt_slc_ctl.value = nitrox_read_csr(ndev, offset); - } while (pkt_slc_ctl.s.enb); + if (!pkt_slc_ctl.s.enb) + break; + udelay(50); + } while (max_retries--); - /* clear slc counters */ + /* step 3: clear slc counters */ offset = NPS_PKT_SLC_CNTSX(port); pkt_slc_cnts.value = nitrox_read_csr(ndev, offset); nitrox_write_csr(ndev, offset, pkt_slc_cnts.value); @@ -173,12 +190,12 @@ static void reset_pkt_solicit_port(struct nitrox_device *ndev, int port) void enable_pkt_solicit_port(struct nitrox_device *ndev, int port) { union nps_pkt_slc_ctl pkt_slc_ctl; + int max_retries = MAX_CSR_RETRIES; u64 offset; offset = NPS_PKT_SLC_CTLX(port); pkt_slc_ctl.value = 0; pkt_slc_ctl.s.enb = 1; - /* * 8 trailing 0x00 bytes will be added * to the end of the outgoing packet. @@ -191,23 +208,27 @@ void enable_pkt_solicit_port(struct nitrox_device *ndev, int port) /* wait to set [ENB] */ do { pkt_slc_ctl.value = nitrox_read_csr(ndev, offset); - } while (!pkt_slc_ctl.s.enb); + if (pkt_slc_ctl.s.enb) + break; + udelay(50); + } while (max_retries--); } -static void config_single_pkt_solicit_port(struct nitrox_device *ndev, - int port) +static void config_pkt_solicit_port(struct nitrox_device *ndev, int port) { union nps_pkt_slc_int_levels pkt_slc_int; u64 offset; reset_pkt_solicit_port(ndev, port); + /* step 4: configure interrupt levels */ offset = NPS_PKT_SLC_INT_LEVELSX(port); pkt_slc_int.value = 0; /* time interrupt threshold */ pkt_slc_int.s.timet = 0x3fffff; nitrox_write_csr(ndev, offset, pkt_slc_int.value); + /* enable the solicit port */ enable_pkt_solicit_port(ndev, port); } @@ -216,12 +237,12 @@ void nitrox_config_pkt_solicit_ports(struct nitrox_device *ndev) int i; for (i = 0; i < ndev->nr_queues; i++) - config_single_pkt_solicit_port(ndev, i); + config_pkt_solicit_port(ndev, i); } /** * enable_nps_interrupts - enable NPS interrutps - * @ndev: N5 device. + * @ndev: NITROX device. * * This includes NPS core, packet in and slc interrupts. */ @@ -284,8 +305,8 @@ void nitrox_config_pom_unit(struct nitrox_device *ndev) } /** - * nitrox_config_rand_unit - enable N5 random number unit - * @ndev: N5 device + * nitrox_config_rand_unit - enable NITROX random number unit + * @ndev: NITROX device */ void nitrox_config_rand_unit(struct nitrox_device *ndev) { @@ -361,6 +382,7 @@ void invalidate_lbc(struct nitrox_device *ndev) { union lbc_inval_ctl lbc_ctl; union lbc_inval_status lbc_stat; + int max_retries = MAX_CSR_RETRIES; u64 offset; /* invalidate LBC */ @@ -370,10 +392,12 @@ void invalidate_lbc(struct nitrox_device *ndev) nitrox_write_csr(ndev, offset, lbc_ctl.value); offset = LBC_INVAL_STATUS; - do { lbc_stat.value = nitrox_read_csr(ndev, offset); - } while (!lbc_stat.s.done); + if (lbc_stat.s.done) + break; + udelay(50); + } while (max_retries--); } void nitrox_config_lbc_unit(struct nitrox_device *ndev) @@ -467,3 +491,31 @@ void nitrox_get_hwinfo(struct nitrox_device *ndev) /* copy partname */ strncpy(ndev->hw.partname, name, sizeof(ndev->hw.partname)); } + +void enable_pf2vf_mbox_interrupts(struct nitrox_device *ndev) +{ + u64 value = ~0ULL; + u64 reg_addr; + + /* Mailbox interrupt low enable set register */ + reg_addr = NPS_PKT_MBOX_INT_LO_ENA_W1S; + nitrox_write_csr(ndev, reg_addr, value); + + /* Mailbox interrupt high enable set register */ + reg_addr = NPS_PKT_MBOX_INT_HI_ENA_W1S; + nitrox_write_csr(ndev, reg_addr, value); +} + +void disable_pf2vf_mbox_interrupts(struct nitrox_device *ndev) +{ + u64 value = ~0ULL; + u64 reg_addr; + + /* Mailbox interrupt low enable clear register */ + reg_addr = NPS_PKT_MBOX_INT_LO_ENA_W1C; + nitrox_write_csr(ndev, reg_addr, value); + + /* Mailbox interrupt high enable clear register */ + reg_addr = NPS_PKT_MBOX_INT_HI_ENA_W1C; + nitrox_write_csr(ndev, reg_addr, value); +} diff --git a/drivers/crypto/cavium/nitrox/nitrox_hal.h b/drivers/crypto/cavium/nitrox/nitrox_hal.h index 489ee64c119e..d6606418ba38 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_hal.h +++ b/drivers/crypto/cavium/nitrox/nitrox_hal.h @@ -19,5 +19,7 @@ void enable_pkt_input_ring(struct nitrox_device *ndev, int ring); void enable_pkt_solicit_port(struct nitrox_device *ndev, int port); void config_nps_core_vfcfg_mode(struct nitrox_device *ndev, enum vf_mode mode); void nitrox_get_hwinfo(struct nitrox_device *ndev); +void enable_pf2vf_mbox_interrupts(struct nitrox_device *ndev); +void disable_pf2vf_mbox_interrupts(struct nitrox_device *ndev); #endif /* __NITROX_HAL_H */ diff --git a/drivers/crypto/cavium/nitrox/nitrox_isr.c b/drivers/crypto/cavium/nitrox/nitrox_isr.c index 88a77b8fb3fb..3dec570a190a 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_isr.c +++ b/drivers/crypto/cavium/nitrox/nitrox_isr.c @@ -7,12 +7,14 @@ #include "nitrox_csr.h" #include "nitrox_common.h" #include "nitrox_hal.h" +#include "nitrox_mbx.h" /** * One vector for each type of ring * - NPS packet ring, AQMQ ring and ZQMQ ring */ #define NR_RING_VECTORS 3 +#define NR_NON_RING_VECTORS 1 /* base entry for packet ring/port */ #define PKT_RING_MSIX_BASE 0 #define NON_RING_MSIX_BASE 192 @@ -219,7 +221,8 @@ static void nps_core_int_tasklet(unsigned long data) */ static irqreturn_t nps_core_int_isr(int irq, void *data) { - struct nitrox_device *ndev = data; + struct nitrox_q_vector *qvec = data; + struct nitrox_device *ndev = qvec->ndev; union nps_core_int_active core_int; core_int.value = nitrox_read_csr(ndev, NPS_CORE_INT_ACTIVE); @@ -245,6 +248,10 @@ static irqreturn_t nps_core_int_isr(int irq, void *data) if (core_int.s.bmi) clear_bmi_err_intr(ndev); + /* Mailbox interrupt */ + if (core_int.s.mbox) + nitrox_pf2vf_mbox_handler(ndev); + /* If more work callback the ISR, set resend */ core_int.s.resend = 1; nitrox_write_csr(ndev, NPS_CORE_INT_ACTIVE, core_int.value); @@ -275,6 +282,7 @@ void nitrox_unregister_interrupts(struct nitrox_device *ndev) qvec->valid = false; } kfree(ndev->qvec); + ndev->qvec = NULL; pci_free_irq_vectors(pdev); } @@ -321,6 +329,7 @@ int nitrox_register_interrupts(struct nitrox_device *ndev) if (qvec->ring >= ndev->nr_queues) break; + qvec->cmdq = &ndev->pkt_inq[qvec->ring]; snprintf(qvec->name, IRQ_NAMESZ, "nitrox-pkt%d", qvec->ring); /* get the vector number */ vec = pci_irq_vector(pdev, i); @@ -335,13 +344,13 @@ int nitrox_register_interrupts(struct nitrox_device *ndev) tasklet_init(&qvec->resp_tasklet, pkt_slc_resp_tasklet, (unsigned long)qvec); - qvec->cmdq = &ndev->pkt_inq[qvec->ring]; qvec->valid = true; } /* request irqs for non ring vectors */ i = NON_RING_MSIX_BASE; qvec = &ndev->qvec[i]; + qvec->ndev = ndev; snprintf(qvec->name, IRQ_NAMESZ, "nitrox-core-int%d", i); /* get the vector number */ @@ -356,7 +365,6 @@ int nitrox_register_interrupts(struct nitrox_device *ndev) tasklet_init(&qvec->resp_tasklet, nps_core_int_tasklet, (unsigned long)qvec); - qvec->ndev = ndev; qvec->valid = true; return 0; @@ -365,3 +373,81 @@ irq_fail: nitrox_unregister_interrupts(ndev); return ret; } + +void nitrox_sriov_unregister_interrupts(struct nitrox_device *ndev) +{ + struct pci_dev *pdev = ndev->pdev; + int i; + + for (i = 0; i < ndev->num_vecs; i++) { + struct nitrox_q_vector *qvec; + int vec; + + qvec = ndev->qvec + i; + if (!qvec->valid) + continue; + + vec = ndev->iov.msix.vector; + irq_set_affinity_hint(vec, NULL); + free_irq(vec, qvec); + + tasklet_disable(&qvec->resp_tasklet); + tasklet_kill(&qvec->resp_tasklet); + qvec->valid = false; + } + kfree(ndev->qvec); + ndev->qvec = NULL; + pci_disable_msix(pdev); +} + +int nitrox_sriov_register_interupts(struct nitrox_device *ndev) +{ + struct pci_dev *pdev = ndev->pdev; + struct nitrox_q_vector *qvec; + int vec, cpu; + int ret; + + /** + * only non ring vectors i.e Entry 192 is available + * for PF in SR-IOV mode. + */ + ndev->iov.msix.entry = NON_RING_MSIX_BASE; + ret = pci_enable_msix_exact(pdev, &ndev->iov.msix, NR_NON_RING_VECTORS); + if (ret) { + dev_err(DEV(ndev), "failed to allocate nps-core-int%d\n", + NON_RING_MSIX_BASE); + return ret; + } + + qvec = kcalloc(NR_NON_RING_VECTORS, sizeof(*qvec), GFP_KERNEL); + if (!qvec) { + pci_disable_msix(pdev); + return -ENOMEM; + } + qvec->ndev = ndev; + + ndev->qvec = qvec; + ndev->num_vecs = NR_NON_RING_VECTORS; + snprintf(qvec->name, IRQ_NAMESZ, "nitrox-core-int%d", + NON_RING_MSIX_BASE); + + vec = ndev->iov.msix.vector; + ret = request_irq(vec, nps_core_int_isr, 0, qvec->name, qvec); + if (ret) { + dev_err(DEV(ndev), "irq failed for nitrox-core-int%d\n", + NON_RING_MSIX_BASE); + goto iov_irq_fail; + } + cpu = num_online_cpus(); + irq_set_affinity_hint(vec, get_cpu_mask(cpu)); + + tasklet_init(&qvec->resp_tasklet, nps_core_int_tasklet, + (unsigned long)qvec); + qvec->valid = true; + + return 0; + +iov_irq_fail: + nitrox_sriov_unregister_interrupts(ndev); + return ret; +} diff --git a/drivers/crypto/cavium/nitrox/nitrox_isr.h b/drivers/crypto/cavium/nitrox/nitrox_isr.h index 63418a6cc52c..1062c9336c1f 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_isr.h +++ b/drivers/crypto/cavium/nitrox/nitrox_isr.h @@ -6,5 +6,7 @@ int nitrox_register_interrupts(struct nitrox_device *ndev); void nitrox_unregister_interrupts(struct nitrox_device *ndev); +int nitrox_sriov_register_interupts(struct nitrox_device *ndev); +void nitrox_sriov_unregister_interrupts(struct nitrox_device *ndev); #endif /* __NITROX_ISR_H */ diff --git a/drivers/crypto/cavium/nitrox/nitrox_lib.c b/drivers/crypto/cavium/nitrox/nitrox_lib.c index 2260efa42308..9138bae12521 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_lib.c +++ b/drivers/crypto/cavium/nitrox/nitrox_lib.c @@ -158,20 +158,31 @@ static void destroy_crypto_dma_pool(struct nitrox_device *ndev) void *crypto_alloc_context(struct nitrox_device *ndev) { struct ctx_hdr *ctx; + struct crypto_ctx_hdr *chdr; void *vaddr; dma_addr_t dma; - vaddr = dma_pool_zalloc(ndev->ctx_pool, GFP_KERNEL, &dma); - if (!vaddr) + chdr = kmalloc(sizeof(*chdr), GFP_KERNEL); + if (!chdr) return NULL; + vaddr = dma_pool_zalloc(ndev->ctx_pool, GFP_KERNEL, &dma); + if (!vaddr) { + kfree(chdr); + return NULL; + } + /* fill meta data */ ctx = vaddr; ctx->pool = ndev->ctx_pool; ctx->dma = dma; ctx->ctx_dma = dma + sizeof(struct ctx_hdr); - return ((u8 *)vaddr + sizeof(struct ctx_hdr)); + chdr->pool = ndev->ctx_pool; + chdr->dma = dma; + chdr->vaddr = vaddr; + + return chdr; } /** @@ -180,13 +191,14 @@ void *crypto_alloc_context(struct nitrox_device *ndev) */ void crypto_free_context(void *ctx) { - struct ctx_hdr *ctxp; + struct crypto_ctx_hdr *ctxp; if (!ctx) return; - ctxp = (struct ctx_hdr *)((u8 *)ctx - sizeof(struct ctx_hdr)); - dma_pool_free(ctxp->pool, ctxp, ctxp->dma); + ctxp = ctx; + dma_pool_free(ctxp->pool, ctxp->vaddr, ctxp->dma); + kfree(ctxp); } /** diff --git a/drivers/crypto/cavium/nitrox/nitrox_main.c b/drivers/crypto/cavium/nitrox/nitrox_main.c index 6595c95af9f1..014e9863c20e 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_main.c +++ b/drivers/crypto/cavium/nitrox/nitrox_main.c @@ -1,6 +1,5 @@ #include #include -#include #include #include #include @@ -13,9 +12,9 @@ #include "nitrox_csr.h" #include "nitrox_hal.h" #include "nitrox_isr.h" +#include "nitrox_debugfs.h" #define CNN55XX_DEV_ID 0x12 -#define MAX_PF_QUEUES 64 #define UCODE_HLEN 48 #define SE_GROUP 0 diff --git a/drivers/crypto/cavium/nitrox/nitrox_mbx.c b/drivers/crypto/cavium/nitrox/nitrox_mbx.c new file mode 100644 index 000000000000..02ee95064841 --- /dev/null +++ b/drivers/crypto/cavium/nitrox/nitrox_mbx.c @@ -0,0 +1,204 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +#include "nitrox_csr.h" +#include "nitrox_hal.h" +#include "nitrox_dev.h" + +#define RING_TO_VFNO(_x, _y) ((_x) / (_y)) + +/** + * mbx_msg_type - Mailbox message types + */ +enum mbx_msg_type { + MBX_MSG_TYPE_NOP, + MBX_MSG_TYPE_REQ, + MBX_MSG_TYPE_ACK, + MBX_MSG_TYPE_NACK, +}; + +/** + * mbx_msg_opcode - Mailbox message opcodes + */ +enum mbx_msg_opcode { + MSG_OP_VF_MODE = 1, + MSG_OP_VF_UP, + MSG_OP_VF_DOWN, + MSG_OP_CHIPID_VFID, +}; + +struct pf2vf_work { + struct nitrox_vfdev *vfdev; + struct nitrox_device *ndev; + struct work_struct pf2vf_resp; +}; + +static inline u64 pf2vf_read_mbox(struct nitrox_device *ndev, int ring) +{ + u64 reg_addr; + + reg_addr = NPS_PKT_MBOX_VF_PF_PFDATAX(ring); + return nitrox_read_csr(ndev, reg_addr); +} + +static inline void pf2vf_write_mbox(struct nitrox_device *ndev, u64 value, + int ring) +{ + u64 reg_addr; + + reg_addr = NPS_PKT_MBOX_PF_VF_PFDATAX(ring); + nitrox_write_csr(ndev, reg_addr, value); +} + +static void pf2vf_send_response(struct nitrox_device *ndev, + struct nitrox_vfdev *vfdev) +{ + union mbox_msg msg; + + msg.value = vfdev->msg.value; + + switch (vfdev->msg.opcode) { + case MSG_OP_VF_MODE: + msg.data = ndev->mode; + break; + case MSG_OP_VF_UP: + vfdev->nr_queues = vfdev->msg.data; + atomic_set(&vfdev->state, __NDEV_READY); + break; + case MSG_OP_CHIPID_VFID: + msg.id.chipid = ndev->idx; + msg.id.vfid = vfdev->vfno; + break; + case MSG_OP_VF_DOWN: + vfdev->nr_queues = 0; + atomic_set(&vfdev->state, __NDEV_NOT_READY); + break; + default: + msg.type = MBX_MSG_TYPE_NOP; + break; + } + + if (msg.type == MBX_MSG_TYPE_NOP) + return; + + /* send ACK to VF */ + msg.type = MBX_MSG_TYPE_ACK; + pf2vf_write_mbox(ndev, msg.value, vfdev->ring); + + vfdev->msg.value = 0; + atomic64_inc(&vfdev->mbx_resp); +} + +static void pf2vf_resp_handler(struct work_struct *work) +{ + struct pf2vf_work *pf2vf_resp = container_of(work, struct pf2vf_work, + pf2vf_resp); + struct nitrox_vfdev *vfdev = pf2vf_resp->vfdev; + struct nitrox_device *ndev = pf2vf_resp->ndev; + + switch (vfdev->msg.type) { + case MBX_MSG_TYPE_REQ: + /* process the request from VF */ + pf2vf_send_response(ndev, vfdev); + break; + case MBX_MSG_TYPE_ACK: + case MBX_MSG_TYPE_NACK: + break; + }; + + kfree(pf2vf_resp); +} + +void nitrox_pf2vf_mbox_handler(struct nitrox_device *ndev) +{ + struct nitrox_vfdev *vfdev; + struct pf2vf_work *pfwork; + u64 value, reg_addr; + u32 i; + int vfno; + + /* loop for VF(0..63) */ + reg_addr = NPS_PKT_MBOX_INT_LO; + value = nitrox_read_csr(ndev, reg_addr); + for_each_set_bit(i, (const unsigned long *)&value, BITS_PER_LONG) { + /* get the vfno from ring */ + vfno = RING_TO_VFNO(i, ndev->iov.max_vf_queues); + vfdev = ndev->iov.vfdev + vfno; + vfdev->ring = i; + /* fill the vf mailbox data */ + vfdev->msg.value = pf2vf_read_mbox(ndev, vfdev->ring); + pfwork = kzalloc(sizeof(*pfwork), GFP_ATOMIC); + if (!pfwork) + continue; + + pfwork->vfdev = vfdev; + pfwork->ndev = ndev; + INIT_WORK(&pfwork->pf2vf_resp, pf2vf_resp_handler); + queue_work(ndev->iov.pf2vf_wq, &pfwork->pf2vf_resp); + /* clear the corresponding vf bit */ + nitrox_write_csr(ndev, reg_addr, BIT_ULL(i)); + } + + /* loop for VF(64..127) */ + reg_addr = NPS_PKT_MBOX_INT_HI; + value = nitrox_read_csr(ndev, reg_addr); + for_each_set_bit(i, (const unsigned long *)&value, BITS_PER_LONG) { + /* get the vfno from ring */ + vfno = RING_TO_VFNO(i + 64, ndev->iov.max_vf_queues); + vfdev = ndev->iov.vfdev + vfno; + vfdev->ring = (i + 64); + /* fill the vf mailbox data */ + vfdev->msg.value = pf2vf_read_mbox(ndev, vfdev->ring); + + pfwork = kzalloc(sizeof(*pfwork), GFP_ATOMIC); + if (!pfwork) + continue; + + pfwork->vfdev = vfdev; + pfwork->ndev = ndev; + INIT_WORK(&pfwork->pf2vf_resp, pf2vf_resp_handler); + queue_work(ndev->iov.pf2vf_wq, &pfwork->pf2vf_resp); + /* clear the corresponding vf bit */ + nitrox_write_csr(ndev, reg_addr, BIT_ULL(i)); + } +} + +int nitrox_mbox_init(struct nitrox_device *ndev) +{ + struct nitrox_vfdev *vfdev; + int i; + + ndev->iov.vfdev = kcalloc(ndev->iov.num_vfs, + sizeof(struct nitrox_vfdev), GFP_KERNEL); + if (!ndev->iov.vfdev) + return -ENOMEM; + + for (i = 0; i < ndev->iov.num_vfs; i++) { + vfdev = ndev->iov.vfdev + i; + vfdev->vfno = i; + } + + /* allocate pf2vf response workqueue */ + ndev->iov.pf2vf_wq = alloc_workqueue("nitrox_pf2vf", 0, 0); + if (!ndev->iov.pf2vf_wq) { + kfree(ndev->iov.vfdev); + return -ENOMEM; + } + /* enable pf2vf mailbox interrupts */ + enable_pf2vf_mbox_interrupts(ndev); + + return 0; +} + +void nitrox_mbox_cleanup(struct nitrox_device *ndev) +{ + /* disable pf2vf mailbox interrupts */ + disable_pf2vf_mbox_interrupts(ndev); + /* destroy workqueue */ + if (ndev->iov.pf2vf_wq) + destroy_workqueue(ndev->iov.pf2vf_wq); + + kfree(ndev->iov.vfdev); + ndev->iov.pf2vf_wq = NULL; + ndev->iov.vfdev = NULL; +} diff --git a/drivers/crypto/cavium/nitrox/nitrox_mbx.h b/drivers/crypto/cavium/nitrox/nitrox_mbx.h new file mode 100644 index 000000000000..5008399775a9 --- /dev/null +++ b/drivers/crypto/cavium/nitrox/nitrox_mbx.h @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef __NITROX_MBX_H +#define __NITROX_MBX_H + +int nitrox_mbox_init(struct nitrox_device *ndev); +void nitrox_mbox_cleanup(struct nitrox_device *ndev); +void nitrox_pf2vf_mbox_handler(struct nitrox_device *ndev); + +#endif /* __NITROX_MBX_H */ diff --git a/drivers/crypto/cavium/nitrox/nitrox_req.h b/drivers/crypto/cavium/nitrox/nitrox_req.h index d091b6f5f5dd..76c0f0be7233 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_req.h +++ b/drivers/crypto/cavium/nitrox/nitrox_req.h @@ -7,6 +7,9 @@ #include "nitrox_dev.h" +#define PENDING_SIG 0xFFFFFFFFFFFFFFFFUL +#define PRIO 4001 + /** * struct gphdr - General purpose Header * @param0: first parameter. @@ -46,13 +49,6 @@ union se_req_ctrl { } s; }; -struct nitrox_sglist { - u16 len; - u16 raz0; - u32 raz1; - dma_addr_t dma; -}; - #define MAX_IV_LEN 16 /** @@ -62,8 +58,10 @@ struct nitrox_sglist { * @ctx_handle: Crypto context handle. * @gph: GP Header * @ctrl: Request Information. - * @in: Input sglist - * @out: Output sglist + * @orh: ORH address + * @comp: completion address + * @src: Input sglist + * @dst: Output sglist */ struct se_crypto_request { u8 opcode; @@ -73,9 +71,8 @@ struct se_crypto_request { struct gphdr gph; union se_req_ctrl ctrl; - - u8 iv[MAX_IV_LEN]; - u16 ivsize; + u64 *orh; + u64 *comp; struct scatterlist *src; struct scatterlist *dst; @@ -110,6 +107,18 @@ enum flexi_cipher { CIPHER_INVALID }; +enum flexi_auth { + AUTH_NULL = 0, + AUTH_MD5, + AUTH_SHA1, + AUTH_SHA2_SHA224, + AUTH_SHA2_SHA256, + AUTH_SHA2_SHA384, + AUTH_SHA2_SHA512, + AUTH_GMAC, + AUTH_INVALID +}; + /** * struct crypto_keys - Crypto keys * @key: Encryption key or KEY1 for AES-XTS @@ -136,6 +145,32 @@ struct auth_keys { u8 opad[64]; }; +union fc_ctx_flags { + __be64 f; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 cipher_type : 4; + u64 reserved_59 : 1; + u64 aes_keylen : 2; + u64 iv_source : 1; + u64 hash_type : 4; + u64 reserved_49_51 : 3; + u64 auth_input_type: 1; + u64 mac_len : 8; + u64 reserved_0_39 : 40; +#else + u64 reserved_0_39 : 40; + u64 mac_len : 8; + u64 auth_input_type: 1; + u64 reserved_49_51 : 3; + u64 hash_type : 4; + u64 iv_source : 1; + u64 aes_keylen : 2; + u64 reserved_59 : 1; + u64 cipher_type : 4; +#endif + } w0; +}; /** * struct flexi_crypto_context - Crypto context * @cipher_type: Encryption cipher type @@ -150,49 +185,30 @@ struct auth_keys { * @auth: Authentication keys */ struct flexi_crypto_context { - union { - __be64 flags; - struct { -#if defined(__BIG_ENDIAN_BITFIELD) - u64 cipher_type : 4; - u64 reserved_59 : 1; - u64 aes_keylen : 2; - u64 iv_source : 1; - u64 hash_type : 4; - u64 reserved_49_51 : 3; - u64 auth_input_type: 1; - u64 mac_len : 8; - u64 reserved_0_39 : 40; -#else - u64 reserved_0_39 : 40; - u64 mac_len : 8; - u64 auth_input_type: 1; - u64 reserved_49_51 : 3; - u64 hash_type : 4; - u64 iv_source : 1; - u64 aes_keylen : 2; - u64 reserved_59 : 1; - u64 cipher_type : 4; -#endif - } w0; - }; - + union fc_ctx_flags flags; struct crypto_keys crypto; struct auth_keys auth; }; +struct crypto_ctx_hdr { + struct dma_pool *pool; + dma_addr_t dma; + void *vaddr; +}; + struct nitrox_crypto_ctx { struct nitrox_device *ndev; union { u64 ctx_handle; struct flexi_crypto_context *fctx; } u; + struct crypto_ctx_hdr *chdr; }; struct nitrox_kcrypt_request { struct se_crypto_request creq; - struct nitrox_crypto_ctx *nctx; - struct skcipher_request *skreq; + u8 *src; + u8 *dst; }; /** @@ -369,26 +385,19 @@ struct nitrox_sgcomp { /* * strutct nitrox_sgtable - SG list information - * @map_cnt: Number of buffers mapped - * @nr_comp: Number of sglist components + * @sgmap_cnt: Number of buffers mapped * @total_bytes: Total bytes in sglist. - * @len: Total sglist components length. - * @dma: DMA address of sglist component. - * @dir: DMA direction. - * @buf: crypto request buffer. - * @sglist: SG list of input/output buffers. + * @sgcomp_len: Total sglist components length. + * @sgcomp_dma: DMA address of sglist component. + * @sg: crypto request buffer. * @sgcomp: sglist component for NITROX. */ struct nitrox_sgtable { - u8 map_bufs_cnt; - u8 nr_sgcomp; + u8 sgmap_cnt; u16 total_bytes; - u32 len; - dma_addr_t dma; - enum dma_data_direction dir; - - struct scatterlist *buf; - struct nitrox_sglist *sglist; + u32 sgcomp_len; + dma_addr_t sgcomp_dma; + struct scatterlist *sg; struct nitrox_sgcomp *sgcomp; }; @@ -398,13 +407,11 @@ struct nitrox_sgtable { #define COMP_HLEN 8 struct resp_hdr { - u64 orh; - dma_addr_t orh_dma; - u64 completion; - dma_addr_t completion_dma; + u64 *orh; + u64 *completion; }; -typedef void (*completion_t)(struct skcipher_request *skreq, int err); +typedef void (*completion_t)(void *arg, int err); /** * struct nitrox_softreq - Represents the NIROX Request. @@ -427,7 +434,6 @@ struct nitrox_softreq { u32 flags; gfp_t gfp; atomic_t status; - bool inplace; struct nitrox_device *ndev; struct nitrox_cmdq *cmdq; @@ -440,7 +446,201 @@ struct nitrox_softreq { unsigned long tstamp; completion_t callback; - struct skcipher_request *skreq; + void *cb_arg; }; +static inline int flexi_aes_keylen(int keylen) +{ + int aes_keylen; + + switch (keylen) { + case AES_KEYSIZE_128: + aes_keylen = 1; + break; + case AES_KEYSIZE_192: + aes_keylen = 2; + break; + case AES_KEYSIZE_256: + aes_keylen = 3; + break; + default: + aes_keylen = -EINVAL; + break; + } + return aes_keylen; +} + +static inline void *alloc_req_buf(int nents, int extralen, gfp_t gfp) +{ + size_t size; + + size = sizeof(struct scatterlist) * nents; + size += extralen; + + return kzalloc(size, gfp); +} + +/** + * create_single_sg - Point SG entry to the data + * @sg: Destination SG list + * @buf: Data + * @buflen: Data length + * + * Returns next free entry in the destination SG list + **/ +static inline struct scatterlist *create_single_sg(struct scatterlist *sg, + void *buf, int buflen) +{ + sg_set_buf(sg, buf, buflen); + sg++; + return sg; +} + +/** + * create_multi_sg - Create multiple sg entries with buflen data length from + * source sglist + * @to_sg: Destination SG list + * @from_sg: Source SG list + * @buflen: Data length + * + * Returns next free entry in the destination SG list + **/ +static inline struct scatterlist *create_multi_sg(struct scatterlist *to_sg, + struct scatterlist *from_sg, + int buflen) +{ + struct scatterlist *sg = to_sg; + unsigned int sglen; + + for (; buflen; buflen -= sglen) { + sglen = from_sg->length; + if (sglen > buflen) + sglen = buflen; + + sg_set_buf(sg, sg_virt(from_sg), sglen); + from_sg = sg_next(from_sg); + sg++; + } + + return sg; +} + +static inline void set_orh_value(u64 *orh) +{ + WRITE_ONCE(*orh, PENDING_SIG); +} + +static inline void set_comp_value(u64 *comp) +{ + WRITE_ONCE(*comp, PENDING_SIG); +} + +static inline int alloc_src_req_buf(struct nitrox_kcrypt_request *nkreq, + int nents, int ivsize) +{ + struct se_crypto_request *creq = &nkreq->creq; + + nkreq->src = alloc_req_buf(nents, ivsize, creq->gfp); + if (!nkreq->src) + return -ENOMEM; + + return 0; +} + +static inline void nitrox_creq_copy_iv(char *dst, char *src, int size) +{ + memcpy(dst, src, size); +} + +static inline struct scatterlist *nitrox_creq_src_sg(char *iv, int ivsize) +{ + return (struct scatterlist *)(iv + ivsize); +} + +static inline void nitrox_creq_set_src_sg(struct nitrox_kcrypt_request *nkreq, + int nents, int ivsize, + struct scatterlist *src, int buflen) +{ + char *iv = nkreq->src; + struct scatterlist *sg; + struct se_crypto_request *creq = &nkreq->creq; + + creq->src = nitrox_creq_src_sg(iv, ivsize); + sg = creq->src; + sg_init_table(sg, nents); + + /* Input format: + * +----+----------------+ + * | IV | SRC sg entries | + * +----+----------------+ + */ + + /* IV */ + sg = create_single_sg(sg, iv, ivsize); + /* SRC entries */ + create_multi_sg(sg, src, buflen); +} + +static inline int alloc_dst_req_buf(struct nitrox_kcrypt_request *nkreq, + int nents) +{ + int extralen = ORH_HLEN + COMP_HLEN; + struct se_crypto_request *creq = &nkreq->creq; + + nkreq->dst = alloc_req_buf(nents, extralen, creq->gfp); + if (!nkreq->dst) + return -ENOMEM; + + return 0; +} + +static inline void nitrox_creq_set_orh(struct nitrox_kcrypt_request *nkreq) +{ + struct se_crypto_request *creq = &nkreq->creq; + + creq->orh = (u64 *)(nkreq->dst); + set_orh_value(creq->orh); +} + +static inline void nitrox_creq_set_comp(struct nitrox_kcrypt_request *nkreq) +{ + struct se_crypto_request *creq = &nkreq->creq; + + creq->comp = (u64 *)(nkreq->dst + ORH_HLEN); + set_comp_value(creq->comp); +} + +static inline struct scatterlist *nitrox_creq_dst_sg(char *dst) +{ + return (struct scatterlist *)(dst + ORH_HLEN + COMP_HLEN); +} + +static inline void nitrox_creq_set_dst_sg(struct nitrox_kcrypt_request *nkreq, + int nents, int ivsize, + struct scatterlist *dst, int buflen) +{ + struct se_crypto_request *creq = &nkreq->creq; + struct scatterlist *sg; + char *iv = nkreq->src; + + creq->dst = nitrox_creq_dst_sg(nkreq->dst); + sg = creq->dst; + sg_init_table(sg, nents); + + /* Output format: + * +-----+----+----------------+-----------------+ + * | ORH | IV | DST sg entries | COMPLETION Bytes| + * +-----+----+----------------+-----------------+ + */ + + /* ORH */ + sg = create_single_sg(sg, creq->orh, ORH_HLEN); + /* IV */ + sg = create_single_sg(sg, iv, ivsize); + /* DST entries */ + sg = create_multi_sg(sg, dst, buflen); + /* COMPLETION Bytes */ + create_single_sg(sg, creq->comp, COMP_HLEN); +} + #endif /* __NITROX_REQ_H */ diff --git a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c index 3987cd84c033..e34e4df8fd24 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c +++ b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c @@ -13,7 +13,6 @@ #define FDATA_SIZE 32 /* Base destination port for the solicited requests */ #define SOLICIT_BASE_DPORT 256 -#define PENDING_SIG 0xFFFFFFFFFFFFFFFFUL #define REQ_NOT_POSTED 1 #define REQ_BACKLOG 2 @@ -52,58 +51,26 @@ static inline int incr_index(int index, int count, int max) return index; } -/** - * dma_free_sglist - unmap and free the sg lists. - * @ndev: N5 device - * @sgtbl: SG table - */ static void softreq_unmap_sgbufs(struct nitrox_softreq *sr) { struct nitrox_device *ndev = sr->ndev; struct device *dev = DEV(ndev); - struct nitrox_sglist *sglist; - /* unmap in sgbuf */ - sglist = sr->in.sglist; - if (!sglist) - goto out_unmap; - /* unmap iv */ - dma_unmap_single(dev, sglist->dma, sglist->len, DMA_BIDIRECTIONAL); - /* unmpa src sglist */ - dma_unmap_sg(dev, sr->in.buf, (sr->in.map_bufs_cnt - 1), sr->in.dir); - /* unamp gather component */ - dma_unmap_single(dev, sr->in.dma, sr->in.len, DMA_TO_DEVICE); - kfree(sr->in.sglist); + dma_unmap_sg(dev, sr->in.sg, sr->in.sgmap_cnt, DMA_BIDIRECTIONAL); + dma_unmap_single(dev, sr->in.sgcomp_dma, sr->in.sgcomp_len, + DMA_TO_DEVICE); kfree(sr->in.sgcomp); - sr->in.sglist = NULL; - sr->in.buf = NULL; - sr->in.map_bufs_cnt = 0; + sr->in.sg = NULL; + sr->in.sgmap_cnt = 0; -out_unmap: - /* unmap out sgbuf */ - sglist = sr->out.sglist; - if (!sglist) - return; - - /* unmap orh */ - dma_unmap_single(dev, sr->resp.orh_dma, ORH_HLEN, sr->out.dir); - - /* unmap dst sglist */ - if (!sr->inplace) { - dma_unmap_sg(dev, sr->out.buf, (sr->out.map_bufs_cnt - 3), - sr->out.dir); - } - /* unmap completion */ - dma_unmap_single(dev, sr->resp.completion_dma, COMP_HLEN, sr->out.dir); - - /* unmap scatter component */ - dma_unmap_single(dev, sr->out.dma, sr->out.len, DMA_TO_DEVICE); - kfree(sr->out.sglist); + dma_unmap_sg(dev, sr->out.sg, sr->out.sgmap_cnt, + DMA_BIDIRECTIONAL); + dma_unmap_single(dev, sr->out.sgcomp_dma, sr->out.sgcomp_len, + DMA_TO_DEVICE); kfree(sr->out.sgcomp); - sr->out.sglist = NULL; - sr->out.buf = NULL; - sr->out.map_bufs_cnt = 0; + sr->out.sg = NULL; + sr->out.sgmap_cnt = 0; } static void softreq_destroy(struct nitrox_softreq *sr) @@ -116,7 +83,7 @@ static void softreq_destroy(struct nitrox_softreq *sr) * create_sg_component - create SG componets for N5 device. * @sr: Request structure * @sgtbl: SG table - * @nr_comp: total number of components required + * @map_nents: number of dma mapped entries * * Component structure * @@ -140,7 +107,7 @@ static int create_sg_component(struct nitrox_softreq *sr, { struct nitrox_device *ndev = sr->ndev; struct nitrox_sgcomp *sgcomp; - struct nitrox_sglist *sglist; + struct scatterlist *sg; dma_addr_t dma; size_t sz_comp; int i, j, nr_sgcomp; @@ -154,17 +121,15 @@ static int create_sg_component(struct nitrox_softreq *sr, return -ENOMEM; sgtbl->sgcomp = sgcomp; - sgtbl->nr_sgcomp = nr_sgcomp; - sglist = sgtbl->sglist; + sg = sgtbl->sg; /* populate device sg component */ for (i = 0; i < nr_sgcomp; i++) { - for (j = 0; j < 4; j++) { - sgcomp->len[j] = cpu_to_be16(sglist->len); - sgcomp->dma[j] = cpu_to_be64(sglist->dma); - sglist++; + for (j = 0; j < 4 && sg; j++) { + sgcomp[i].len[j] = cpu_to_be16(sg_dma_len(sg)); + sgcomp[i].dma[j] = cpu_to_be64(sg_dma_address(sg)); + sg = sg_next(sg); } - sgcomp++; } /* map the device sg component */ dma = dma_map_single(DEV(ndev), sgtbl->sgcomp, sz_comp, DMA_TO_DEVICE); @@ -174,8 +139,8 @@ static int create_sg_component(struct nitrox_softreq *sr, return -ENOMEM; } - sgtbl->dma = dma; - sgtbl->len = sz_comp; + sgtbl->sgcomp_dma = dma; + sgtbl->sgcomp_len = sz_comp; return 0; } @@ -193,66 +158,27 @@ static int dma_map_inbufs(struct nitrox_softreq *sr, { struct device *dev = DEV(sr->ndev); struct scatterlist *sg = req->src; - struct nitrox_sglist *glist; int i, nents, ret = 0; - dma_addr_t dma; - size_t sz; - nents = sg_nents(req->src); + nents = dma_map_sg(dev, req->src, sg_nents(req->src), + DMA_BIDIRECTIONAL); + if (!nents) + return -EINVAL; - /* creater gather list IV and src entries */ - sz = roundup((1 + nents), 4) * sizeof(*glist); - glist = kzalloc(sz, sr->gfp); - if (!glist) - return -ENOMEM; + for_each_sg(req->src, sg, nents, i) + sr->in.total_bytes += sg_dma_len(sg); - sr->in.sglist = glist; - /* map IV */ - dma = dma_map_single(dev, &req->iv, req->ivsize, DMA_BIDIRECTIONAL); - if (dma_mapping_error(dev, dma)) { - ret = -EINVAL; - goto iv_map_err; - } - - sr->in.dir = (req->src == req->dst) ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE; - /* map src entries */ - nents = dma_map_sg(dev, req->src, nents, sr->in.dir); - if (!nents) { - ret = -EINVAL; - goto src_map_err; - } - sr->in.buf = req->src; - - /* store the mappings */ - glist->len = req->ivsize; - glist->dma = dma; - glist++; - sr->in.total_bytes += req->ivsize; - - for_each_sg(req->src, sg, nents, i) { - glist->len = sg_dma_len(sg); - glist->dma = sg_dma_address(sg); - sr->in.total_bytes += glist->len; - glist++; - } - /* roundup map count to align with entires in sg component */ - sr->in.map_bufs_cnt = (1 + nents); - - /* create NITROX gather component */ - ret = create_sg_component(sr, &sr->in, sr->in.map_bufs_cnt); + sr->in.sg = req->src; + sr->in.sgmap_cnt = nents; + ret = create_sg_component(sr, &sr->in, sr->in.sgmap_cnt); if (ret) goto incomp_err; return 0; incomp_err: - dma_unmap_sg(dev, req->src, nents, sr->in.dir); - sr->in.map_bufs_cnt = 0; -src_map_err: - dma_unmap_single(dev, dma, req->ivsize, DMA_BIDIRECTIONAL); -iv_map_err: - kfree(sr->in.sglist); - sr->in.sglist = NULL; + dma_unmap_sg(dev, req->src, nents, DMA_BIDIRECTIONAL); + sr->in.sgmap_cnt = 0; return ret; } @@ -260,104 +186,25 @@ static int dma_map_outbufs(struct nitrox_softreq *sr, struct se_crypto_request *req) { struct device *dev = DEV(sr->ndev); - struct nitrox_sglist *glist = sr->in.sglist; - struct nitrox_sglist *slist; - struct scatterlist *sg; - int i, nents, map_bufs_cnt, ret = 0; - size_t sz; + int nents, ret = 0; - nents = sg_nents(req->dst); + nents = dma_map_sg(dev, req->dst, sg_nents(req->dst), + DMA_BIDIRECTIONAL); + if (!nents) + return -EINVAL; - /* create scatter list ORH, IV, dst entries and Completion header */ - sz = roundup((3 + nents), 4) * sizeof(*slist); - slist = kzalloc(sz, sr->gfp); - if (!slist) - return -ENOMEM; - - sr->out.sglist = slist; - sr->out.dir = DMA_BIDIRECTIONAL; - /* map ORH */ - sr->resp.orh_dma = dma_map_single(dev, &sr->resp.orh, ORH_HLEN, - sr->out.dir); - if (dma_mapping_error(dev, sr->resp.orh_dma)) { - ret = -EINVAL; - goto orh_map_err; - } - - /* map completion */ - sr->resp.completion_dma = dma_map_single(dev, &sr->resp.completion, - COMP_HLEN, sr->out.dir); - if (dma_mapping_error(dev, sr->resp.completion_dma)) { - ret = -EINVAL; - goto compl_map_err; - } - - sr->inplace = (req->src == req->dst) ? true : false; - /* out place */ - if (!sr->inplace) { - nents = dma_map_sg(dev, req->dst, nents, sr->out.dir); - if (!nents) { - ret = -EINVAL; - goto dst_map_err; - } - } - sr->out.buf = req->dst; - - /* store the mappings */ - /* orh */ - slist->len = ORH_HLEN; - slist->dma = sr->resp.orh_dma; - slist++; - - /* copy the glist mappings */ - if (sr->inplace) { - nents = sr->in.map_bufs_cnt - 1; - map_bufs_cnt = sr->in.map_bufs_cnt; - while (map_bufs_cnt--) { - slist->len = glist->len; - slist->dma = glist->dma; - slist++; - glist++; - } - } else { - /* copy iv mapping */ - slist->len = glist->len; - slist->dma = glist->dma; - slist++; - /* copy remaining maps */ - for_each_sg(req->dst, sg, nents, i) { - slist->len = sg_dma_len(sg); - slist->dma = sg_dma_address(sg); - slist++; - } - } - - /* completion */ - slist->len = COMP_HLEN; - slist->dma = sr->resp.completion_dma; - - sr->out.map_bufs_cnt = (3 + nents); - - ret = create_sg_component(sr, &sr->out, sr->out.map_bufs_cnt); + sr->out.sg = req->dst; + sr->out.sgmap_cnt = nents; + ret = create_sg_component(sr, &sr->out, sr->out.sgmap_cnt); if (ret) goto outcomp_map_err; return 0; outcomp_map_err: - if (!sr->inplace) - dma_unmap_sg(dev, req->dst, nents, sr->out.dir); - sr->out.map_bufs_cnt = 0; - sr->out.buf = NULL; -dst_map_err: - dma_unmap_single(dev, sr->resp.completion_dma, COMP_HLEN, sr->out.dir); - sr->resp.completion_dma = 0; -compl_map_err: - dma_unmap_single(dev, sr->resp.orh_dma, ORH_HLEN, sr->out.dir); - sr->resp.orh_dma = 0; -orh_map_err: - kfree(sr->out.sglist); - sr->out.sglist = NULL; + dma_unmap_sg(dev, req->dst, nents, DMA_BIDIRECTIONAL); + sr->out.sgmap_cnt = 0; + sr->out.sg = NULL; return ret; } @@ -422,6 +269,8 @@ static inline bool cmdq_full(struct nitrox_cmdq *cmdq, int qlen) smp_mb__after_atomic(); return true; } + /* sync with other cpus */ + smp_mb__after_atomic(); return false; } @@ -477,8 +326,6 @@ static int post_backlog_cmds(struct nitrox_cmdq *cmdq) spin_lock_bh(&cmdq->backlog_qlock); list_for_each_entry_safe(sr, tmp, &cmdq->backlog_head, backlog) { - struct skcipher_request *skreq; - /* submit until space available */ if (unlikely(cmdq_full(cmdq, ndev->qlen))) { ret = -ENOSPC; @@ -490,12 +337,8 @@ static int post_backlog_cmds(struct nitrox_cmdq *cmdq) /* sync with other cpus */ smp_mb__after_atomic(); - skreq = sr->skreq; /* post the command */ post_se_instr(sr, cmdq); - - /* backlog requests are posted, wakeup with -EINPROGRESS */ - skcipher_request_complete(skreq, -EINPROGRESS); } spin_unlock_bh(&cmdq->backlog_qlock); @@ -518,7 +361,7 @@ static int nitrox_enqueue_request(struct nitrox_softreq *sr) } /* add to backlog list */ backlog_list_add(sr, cmdq); - return -EBUSY; + return -EINPROGRESS; } post_se_instr(sr, cmdq); @@ -535,7 +378,7 @@ static int nitrox_enqueue_request(struct nitrox_softreq *sr) int nitrox_process_se_request(struct nitrox_device *ndev, struct se_crypto_request *req, completion_t callback, - struct skcipher_request *skreq) + void *cb_arg) { struct nitrox_softreq *sr; dma_addr_t ctx_handle = 0; @@ -552,12 +395,12 @@ int nitrox_process_se_request(struct nitrox_device *ndev, sr->flags = req->flags; sr->gfp = req->gfp; sr->callback = callback; - sr->skreq = skreq; + sr->cb_arg = cb_arg; atomic_set(&sr->status, REQ_NOT_POSTED); - WRITE_ONCE(sr->resp.orh, PENDING_SIG); - WRITE_ONCE(sr->resp.completion, PENDING_SIG); + sr->resp.orh = req->orh; + sr->resp.completion = req->comp; ret = softreq_map_iobuf(sr, req); if (ret) { @@ -598,13 +441,13 @@ int nitrox_process_se_request(struct nitrox_device *ndev, /* fill the packet instruction */ /* word 0 */ - sr->instr.dptr0 = cpu_to_be64(sr->in.dma); + sr->instr.dptr0 = cpu_to_be64(sr->in.sgcomp_dma); /* word 1 */ sr->instr.ih.value = 0; sr->instr.ih.s.g = 1; - sr->instr.ih.s.gsz = sr->in.map_bufs_cnt; - sr->instr.ih.s.ssz = sr->out.map_bufs_cnt; + sr->instr.ih.s.gsz = sr->in.sgmap_cnt; + sr->instr.ih.s.ssz = sr->out.sgmap_cnt; sr->instr.ih.s.fsz = FDATA_SIZE + sizeof(struct gphdr); sr->instr.ih.s.tlen = sr->instr.ih.s.fsz + sr->in.total_bytes; sr->instr.ih.value = cpu_to_be64(sr->instr.ih.value); @@ -626,11 +469,11 @@ int nitrox_process_se_request(struct nitrox_device *ndev, /* word 4 */ sr->instr.slc.value[0] = 0; - sr->instr.slc.s.ssz = sr->out.map_bufs_cnt; + sr->instr.slc.s.ssz = sr->out.sgmap_cnt; sr->instr.slc.value[0] = cpu_to_be64(sr->instr.slc.value[0]); /* word 5 */ - sr->instr.slc.s.rptr = cpu_to_be64(sr->out.dma); + sr->instr.slc.s.rptr = cpu_to_be64(sr->out.sgcomp_dma); /* * No conversion for front data, @@ -664,6 +507,24 @@ void backlog_qflush_work(struct work_struct *work) post_backlog_cmds(cmdq); } +static bool sr_completed(struct nitrox_softreq *sr) +{ + u64 orh = READ_ONCE(*sr->resp.orh); + unsigned long timeout = jiffies + msecs_to_jiffies(1); + + if ((orh != PENDING_SIG) && (orh & 0xff)) + return true; + + while (READ_ONCE(*sr->resp.completion) == PENDING_SIG) { + if (time_after(jiffies, timeout)) { + pr_err("comp not done\n"); + return false; + } + } + + return true; +} + /** * process_request_list - process completed requests * @ndev: N5 device @@ -675,8 +536,6 @@ static void process_response_list(struct nitrox_cmdq *cmdq) { struct nitrox_device *ndev = cmdq->ndev; struct nitrox_softreq *sr; - struct skcipher_request *skreq; - completion_t callback; int req_completed = 0, err = 0, budget; /* check all pending requests */ @@ -691,13 +550,13 @@ static void process_response_list(struct nitrox_cmdq *cmdq) break; /* check orh and completion bytes updates */ - if (READ_ONCE(sr->resp.orh) == READ_ONCE(sr->resp.completion)) { + if (!sr_completed(sr)) { /* request not completed, check for timeout */ if (!cmd_timeout(sr->tstamp, ndev->timeout)) break; dev_err_ratelimited(DEV(ndev), "Request timeout, orh 0x%016llx\n", - READ_ONCE(sr->resp.orh)); + READ_ONCE(*sr->resp.orh)); } atomic_dec(&cmdq->pending_count); atomic64_inc(&ndev->stats.completed); @@ -706,15 +565,12 @@ static void process_response_list(struct nitrox_cmdq *cmdq) /* remove from response list */ response_list_del(sr, cmdq); - callback = sr->callback; - skreq = sr->skreq; - /* ORH error code */ - err = READ_ONCE(sr->resp.orh) & 0xff; + err = READ_ONCE(*sr->resp.orh) & 0xff; softreq_destroy(sr); - if (callback) - callback(skreq, err); + if (sr->callback) + sr->callback(sr->cb_arg, err); req_completed++; } diff --git a/drivers/crypto/cavium/nitrox/nitrox_skcipher.c b/drivers/crypto/cavium/nitrox/nitrox_skcipher.c new file mode 100644 index 000000000000..d4935d6cefdd --- /dev/null +++ b/drivers/crypto/cavium/nitrox/nitrox_skcipher.c @@ -0,0 +1,498 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "nitrox_dev.h" +#include "nitrox_common.h" +#include "nitrox_req.h" + +struct nitrox_cipher { + const char *name; + enum flexi_cipher value; +}; + +/** + * supported cipher list + */ +static const struct nitrox_cipher flexi_cipher_table[] = { + { "null", CIPHER_NULL }, + { "cbc(des3_ede)", CIPHER_3DES_CBC }, + { "ecb(des3_ede)", CIPHER_3DES_ECB }, + { "cbc(aes)", CIPHER_AES_CBC }, + { "ecb(aes)", CIPHER_AES_ECB }, + { "cfb(aes)", CIPHER_AES_CFB }, + { "rfc3686(ctr(aes))", CIPHER_AES_CTR }, + { "xts(aes)", CIPHER_AES_XTS }, + { "cts(cbc(aes))", CIPHER_AES_CBC_CTS }, + { NULL, CIPHER_INVALID } +}; + +static enum flexi_cipher flexi_cipher_type(const char *name) +{ + const struct nitrox_cipher *cipher = flexi_cipher_table; + + while (cipher->name) { + if (!strcmp(cipher->name, name)) + break; + cipher++; + } + return cipher->value; +} + +static int nitrox_skcipher_init(struct crypto_skcipher *tfm) +{ + struct nitrox_crypto_ctx *nctx = crypto_skcipher_ctx(tfm); + struct crypto_ctx_hdr *chdr; + + /* get the first device */ + nctx->ndev = nitrox_get_first_device(); + if (!nctx->ndev) + return -ENODEV; + + /* allocate nitrox crypto context */ + chdr = crypto_alloc_context(nctx->ndev); + if (!chdr) { + nitrox_put_device(nctx->ndev); + return -ENOMEM; + } + nctx->chdr = chdr; + nctx->u.ctx_handle = (uintptr_t)((u8 *)chdr->vaddr + + sizeof(struct ctx_hdr)); + crypto_skcipher_set_reqsize(tfm, crypto_skcipher_reqsize(tfm) + + sizeof(struct nitrox_kcrypt_request)); + return 0; +} + +static void nitrox_skcipher_exit(struct crypto_skcipher *tfm) +{ + struct nitrox_crypto_ctx *nctx = crypto_skcipher_ctx(tfm); + + /* free the nitrox crypto context */ + if (nctx->u.ctx_handle) { + struct flexi_crypto_context *fctx = nctx->u.fctx; + + memzero_explicit(&fctx->crypto, sizeof(struct crypto_keys)); + memzero_explicit(&fctx->auth, sizeof(struct auth_keys)); + crypto_free_context((void *)nctx->chdr); + } + nitrox_put_device(nctx->ndev); + + nctx->u.ctx_handle = 0; + nctx->ndev = NULL; +} + +static inline int nitrox_skcipher_setkey(struct crypto_skcipher *cipher, + int aes_keylen, const u8 *key, + unsigned int keylen) +{ + struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher); + struct nitrox_crypto_ctx *nctx = crypto_tfm_ctx(tfm); + struct flexi_crypto_context *fctx; + union fc_ctx_flags *flags; + enum flexi_cipher cipher_type; + const char *name; + + name = crypto_tfm_alg_name(tfm); + cipher_type = flexi_cipher_type(name); + if (unlikely(cipher_type == CIPHER_INVALID)) { + pr_err("unsupported cipher: %s\n", name); + return -EINVAL; + } + + /* fill crypto context */ + fctx = nctx->u.fctx; + flags = &fctx->flags; + flags->f = 0; + flags->w0.cipher_type = cipher_type; + flags->w0.aes_keylen = aes_keylen; + flags->w0.iv_source = IV_FROM_DPTR; + flags->f = cpu_to_be64(*(u64 *)&flags->w0); + /* copy the key to context */ + memcpy(fctx->crypto.u.key, key, keylen); + + return 0; +} + +static int nitrox_aes_setkey(struct crypto_skcipher *cipher, const u8 *key, + unsigned int keylen) +{ + int aes_keylen; + + aes_keylen = flexi_aes_keylen(keylen); + if (aes_keylen < 0) { + crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + return nitrox_skcipher_setkey(cipher, aes_keylen, key, keylen); +} + +static int alloc_src_sglist(struct skcipher_request *skreq, int ivsize) +{ + struct nitrox_kcrypt_request *nkreq = skcipher_request_ctx(skreq); + int nents = sg_nents(skreq->src) + 1; + int ret; + + /* Allocate buffer to hold IV and input scatterlist array */ + ret = alloc_src_req_buf(nkreq, nents, ivsize); + if (ret) + return ret; + + nitrox_creq_copy_iv(nkreq->src, skreq->iv, ivsize); + nitrox_creq_set_src_sg(nkreq, nents, ivsize, skreq->src, + skreq->cryptlen); + + return 0; +} + +static int alloc_dst_sglist(struct skcipher_request *skreq, int ivsize) +{ + struct nitrox_kcrypt_request *nkreq = skcipher_request_ctx(skreq); + int nents = sg_nents(skreq->dst) + 3; + int ret; + + /* Allocate buffer to hold ORH, COMPLETION and output scatterlist + * array + */ + ret = alloc_dst_req_buf(nkreq, nents); + if (ret) + return ret; + + nitrox_creq_set_orh(nkreq); + nitrox_creq_set_comp(nkreq); + nitrox_creq_set_dst_sg(nkreq, nents, ivsize, skreq->dst, + skreq->cryptlen); + + return 0; +} + +static void free_src_sglist(struct skcipher_request *skreq) +{ + struct nitrox_kcrypt_request *nkreq = skcipher_request_ctx(skreq); + + kfree(nkreq->src); +} + +static void free_dst_sglist(struct skcipher_request *skreq) +{ + struct nitrox_kcrypt_request *nkreq = skcipher_request_ctx(skreq); + + kfree(nkreq->dst); +} + +static void nitrox_skcipher_callback(void *arg, int err) +{ + struct skcipher_request *skreq = arg; + + free_src_sglist(skreq); + free_dst_sglist(skreq); + if (err) { + pr_err_ratelimited("request failed status 0x%0x\n", err); + err = -EINVAL; + } + + skcipher_request_complete(skreq, err); +} + +static int nitrox_skcipher_crypt(struct skcipher_request *skreq, bool enc) +{ + struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(skreq); + struct nitrox_crypto_ctx *nctx = crypto_skcipher_ctx(cipher); + struct nitrox_kcrypt_request *nkreq = skcipher_request_ctx(skreq); + int ivsize = crypto_skcipher_ivsize(cipher); + struct se_crypto_request *creq; + int ret; + + creq = &nkreq->creq; + creq->flags = skreq->base.flags; + creq->gfp = (skreq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC; + + /* fill the request */ + creq->ctrl.value = 0; + creq->opcode = FLEXI_CRYPTO_ENCRYPT_HMAC; + creq->ctrl.s.arg = (enc ? ENCRYPT : DECRYPT); + /* param0: length of the data to be encrypted */ + creq->gph.param0 = cpu_to_be16(skreq->cryptlen); + creq->gph.param1 = 0; + /* param2: encryption data offset */ + creq->gph.param2 = cpu_to_be16(ivsize); + creq->gph.param3 = 0; + + creq->ctx_handle = nctx->u.ctx_handle; + creq->ctrl.s.ctxl = sizeof(struct flexi_crypto_context); + + ret = alloc_src_sglist(skreq, ivsize); + if (ret) + return ret; + + ret = alloc_dst_sglist(skreq, ivsize); + if (ret) { + free_src_sglist(skreq); + return ret; + } + + /* send the crypto request */ + return nitrox_process_se_request(nctx->ndev, creq, + nitrox_skcipher_callback, skreq); +} + +static int nitrox_aes_encrypt(struct skcipher_request *skreq) +{ + return nitrox_skcipher_crypt(skreq, true); +} + +static int nitrox_aes_decrypt(struct skcipher_request *skreq) +{ + return nitrox_skcipher_crypt(skreq, false); +} + +static int nitrox_3des_setkey(struct crypto_skcipher *cipher, + const u8 *key, unsigned int keylen) +{ + if (keylen != DES3_EDE_KEY_SIZE) { + crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + return nitrox_skcipher_setkey(cipher, 0, key, keylen); +} + +static int nitrox_3des_encrypt(struct skcipher_request *skreq) +{ + return nitrox_skcipher_crypt(skreq, true); +} + +static int nitrox_3des_decrypt(struct skcipher_request *skreq) +{ + return nitrox_skcipher_crypt(skreq, false); +} + +static int nitrox_aes_xts_setkey(struct crypto_skcipher *cipher, + const u8 *key, unsigned int keylen) +{ + struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher); + struct nitrox_crypto_ctx *nctx = crypto_tfm_ctx(tfm); + struct flexi_crypto_context *fctx; + int aes_keylen, ret; + + ret = xts_check_key(tfm, key, keylen); + if (ret) + return ret; + + keylen /= 2; + + aes_keylen = flexi_aes_keylen(keylen); + if (aes_keylen < 0) { + crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + fctx = nctx->u.fctx; + /* copy KEY2 */ + memcpy(fctx->auth.u.key2, (key + keylen), keylen); + + return nitrox_skcipher_setkey(cipher, aes_keylen, key, keylen); +} + +static int nitrox_aes_ctr_rfc3686_setkey(struct crypto_skcipher *cipher, + const u8 *key, unsigned int keylen) +{ + struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher); + struct nitrox_crypto_ctx *nctx = crypto_tfm_ctx(tfm); + struct flexi_crypto_context *fctx; + int aes_keylen; + + if (keylen < CTR_RFC3686_NONCE_SIZE) + return -EINVAL; + + fctx = nctx->u.fctx; + + memcpy(fctx->crypto.iv, key + (keylen - CTR_RFC3686_NONCE_SIZE), + CTR_RFC3686_NONCE_SIZE); + + keylen -= CTR_RFC3686_NONCE_SIZE; + + aes_keylen = flexi_aes_keylen(keylen); + if (aes_keylen < 0) { + crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + return nitrox_skcipher_setkey(cipher, aes_keylen, key, keylen); +} + +static struct skcipher_alg nitrox_skciphers[] = { { + .base = { + .cra_name = "cbc(aes)", + .cra_driver_name = "n5_cbc(aes)", + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct nitrox_crypto_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = nitrox_aes_setkey, + .encrypt = nitrox_aes_encrypt, + .decrypt = nitrox_aes_decrypt, + .init = nitrox_skcipher_init, + .exit = nitrox_skcipher_exit, +}, { + .base = { + .cra_name = "ecb(aes)", + .cra_driver_name = "n5_ecb(aes)", + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct nitrox_crypto_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = nitrox_aes_setkey, + .encrypt = nitrox_aes_encrypt, + .decrypt = nitrox_aes_decrypt, + .init = nitrox_skcipher_init, + .exit = nitrox_skcipher_exit, +}, { + .base = { + .cra_name = "cfb(aes)", + .cra_driver_name = "n5_cfb(aes)", + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct nitrox_crypto_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = nitrox_aes_setkey, + .encrypt = nitrox_aes_encrypt, + .decrypt = nitrox_aes_decrypt, + .init = nitrox_skcipher_init, + .exit = nitrox_skcipher_exit, +}, { + .base = { + .cra_name = "xts(aes)", + .cra_driver_name = "n5_xts(aes)", + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct nitrox_crypto_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = nitrox_aes_xts_setkey, + .encrypt = nitrox_aes_encrypt, + .decrypt = nitrox_aes_decrypt, + .init = nitrox_skcipher_init, + .exit = nitrox_skcipher_exit, +}, { + .base = { + .cra_name = "rfc3686(ctr(aes))", + .cra_driver_name = "n5_rfc3686(ctr(aes))", + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct nitrox_crypto_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .min_keysize = AES_MIN_KEY_SIZE + CTR_RFC3686_NONCE_SIZE, + .max_keysize = AES_MAX_KEY_SIZE + CTR_RFC3686_NONCE_SIZE, + .ivsize = CTR_RFC3686_IV_SIZE, + .init = nitrox_skcipher_init, + .exit = nitrox_skcipher_exit, + .setkey = nitrox_aes_ctr_rfc3686_setkey, + .encrypt = nitrox_aes_encrypt, + .decrypt = nitrox_aes_decrypt, +}, { + .base = { + .cra_name = "cts(cbc(aes))", + .cra_driver_name = "n5_cts(cbc(aes))", + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct nitrox_crypto_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = nitrox_aes_setkey, + .encrypt = nitrox_aes_encrypt, + .decrypt = nitrox_aes_decrypt, + .init = nitrox_skcipher_init, + .exit = nitrox_skcipher_exit, +}, { + .base = { + .cra_name = "cbc(des3_ede)", + .cra_driver_name = "n5_cbc(des3_ede)", + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct nitrox_crypto_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES3_EDE_BLOCK_SIZE, + .setkey = nitrox_3des_setkey, + .encrypt = nitrox_3des_encrypt, + .decrypt = nitrox_3des_decrypt, + .init = nitrox_skcipher_init, + .exit = nitrox_skcipher_exit, +}, { + .base = { + .cra_name = "ecb(des3_ede)", + .cra_driver_name = "n5_ecb(des3_ede)", + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct nitrox_crypto_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES3_EDE_BLOCK_SIZE, + .setkey = nitrox_3des_setkey, + .encrypt = nitrox_3des_encrypt, + .decrypt = nitrox_3des_decrypt, + .init = nitrox_skcipher_init, + .exit = nitrox_skcipher_exit, +} + +}; + +int nitrox_register_skciphers(void) +{ + return crypto_register_skciphers(nitrox_skciphers, + ARRAY_SIZE(nitrox_skciphers)); +} + +void nitrox_unregister_skciphers(void) +{ + crypto_unregister_skciphers(nitrox_skciphers, + ARRAY_SIZE(nitrox_skciphers)); +} diff --git a/drivers/crypto/cavium/nitrox/nitrox_sriov.c b/drivers/crypto/cavium/nitrox/nitrox_sriov.c index 30c0aa874583..bf439d8256ba 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_sriov.c +++ b/drivers/crypto/cavium/nitrox/nitrox_sriov.c @@ -6,7 +6,12 @@ #include "nitrox_hal.h" #include "nitrox_common.h" #include "nitrox_isr.h" +#include "nitrox_mbx.h" +/** + * num_vfs_valid - validate VF count + * @num_vfs: number of VF(s) + */ static inline bool num_vfs_valid(int num_vfs) { bool valid = false; @@ -48,7 +53,32 @@ static inline enum vf_mode num_vfs_to_mode(int num_vfs) return mode; } -static void pf_sriov_cleanup(struct nitrox_device *ndev) +static inline int vf_mode_to_nr_queues(enum vf_mode mode) +{ + int nr_queues = 0; + + switch (mode) { + case __NDEV_MODE_PF: + nr_queues = MAX_PF_QUEUES; + break; + case __NDEV_MODE_VF16: + nr_queues = 8; + break; + case __NDEV_MODE_VF32: + nr_queues = 4; + break; + case __NDEV_MODE_VF64: + nr_queues = 2; + break; + case __NDEV_MODE_VF128: + nr_queues = 1; + break; + } + + return nr_queues; +} + +static void nitrox_pf_cleanup(struct nitrox_device *ndev) { /* PF has no queues in SR-IOV mode */ atomic_set(&ndev->state, __NDEV_NOT_READY); @@ -60,7 +90,11 @@ static void pf_sriov_cleanup(struct nitrox_device *ndev) nitrox_common_sw_cleanup(ndev); } -static int pf_sriov_init(struct nitrox_device *ndev) +/** + * nitrox_pf_reinit - re-initialize PF resources once SR-IOV is disabled + * @ndev: NITROX device + */ +static int nitrox_pf_reinit(struct nitrox_device *ndev) { int err; @@ -86,6 +120,33 @@ static int pf_sriov_init(struct nitrox_device *ndev) return nitrox_crypto_register(); } +static void nitrox_sriov_cleanup(struct nitrox_device *ndev) +{ + /* unregister interrupts for PF in SR-IOV */ + nitrox_sriov_unregister_interrupts(ndev); + nitrox_mbox_cleanup(ndev); +} + +static int nitrox_sriov_init(struct nitrox_device *ndev) +{ + int ret; + + /* register interrupts for PF in SR-IOV */ + ret = nitrox_sriov_register_interupts(ndev); + if (ret) + return ret; + + ret = nitrox_mbox_init(ndev); + if (ret) + goto sriov_init_fail; + + return 0; + +sriov_init_fail: + nitrox_sriov_cleanup(ndev); + return ret; +} + static int nitrox_sriov_enable(struct pci_dev *pdev, int num_vfs) { struct nitrox_device *ndev = pci_get_drvdata(pdev); @@ -106,17 +167,32 @@ static int nitrox_sriov_enable(struct pci_dev *pdev, int num_vfs) } dev_info(DEV(ndev), "Enabled VF(s) %d\n", num_vfs); - ndev->num_vfs = num_vfs; ndev->mode = num_vfs_to_mode(num_vfs); + ndev->iov.num_vfs = num_vfs; + ndev->iov.max_vf_queues = vf_mode_to_nr_queues(ndev->mode); /* set bit in flags */ set_bit(__NDEV_SRIOV_BIT, &ndev->flags); /* cleanup PF resources */ - pf_sriov_cleanup(ndev); + nitrox_pf_cleanup(ndev); + + /* PF SR-IOV mode initialization */ + err = nitrox_sriov_init(ndev); + if (err) + goto iov_fail; config_nps_core_vfcfg_mode(ndev, ndev->mode); - return num_vfs; + +iov_fail: + pci_disable_sriov(pdev); + /* clear bit in flags */ + clear_bit(__NDEV_SRIOV_BIT, &ndev->flags); + ndev->iov.num_vfs = 0; + ndev->mode = __NDEV_MODE_PF; + /* reset back to working mode in PF */ + nitrox_pf_reinit(ndev); + return err; } static int nitrox_sriov_disable(struct pci_dev *pdev) @@ -134,12 +210,16 @@ static int nitrox_sriov_disable(struct pci_dev *pdev) /* clear bit in flags */ clear_bit(__NDEV_SRIOV_BIT, &ndev->flags); - ndev->num_vfs = 0; + ndev->iov.num_vfs = 0; + ndev->iov.max_vf_queues = 0; ndev->mode = __NDEV_MODE_PF; + /* cleanup PF SR-IOV resources */ + nitrox_sriov_cleanup(ndev); + config_nps_core_vfcfg_mode(ndev, ndev->mode); - return pf_sriov_init(ndev); + return nitrox_pf_reinit(ndev); } int nitrox_sriov_configure(struct pci_dev *pdev, int num_vfs) diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c index 3c6fe57f91f8..9108015e56cc 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c +++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c @@ -346,9 +346,7 @@ static int ccp_aes_cmac_cra_init(struct crypto_tfm *tfm) crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_aes_cmac_req_ctx)); - cipher_tfm = crypto_alloc_cipher("aes", 0, - CRYPTO_ALG_ASYNC | - CRYPTO_ALG_NEED_FALLBACK); + cipher_tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(cipher_tfm)) { pr_warn("could not load aes cipher driver\n"); return PTR_ERR(cipher_tfm); diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c index 01b82b82f8b8..f2643cda45db 100644 --- a/drivers/crypto/ccree/cc_aead.c +++ b/drivers/crypto/ccree/cc_aead.c @@ -58,6 +58,7 @@ struct cc_aead_ctx { unsigned int enc_keylen; unsigned int auth_keylen; unsigned int authsize; /* Actual (reduced?) size of the MAC/ICv */ + unsigned int hash_len; enum drv_cipher_mode cipher_mode; enum cc_flow_mode flow_mode; enum drv_hash_mode auth_mode; @@ -122,6 +123,13 @@ static void cc_aead_exit(struct crypto_aead *tfm) } } +static unsigned int cc_get_aead_hash_len(struct crypto_aead *tfm) +{ + struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); + + return cc_get_default_hash_len(ctx->drvdata); +} + static int cc_aead_init(struct crypto_aead *tfm) { struct aead_alg *alg = crypto_aead_alg(tfm); @@ -196,6 +204,7 @@ static int cc_aead_init(struct crypto_aead *tfm) ctx->auth_state.hmac.ipad_opad = NULL; ctx->auth_state.hmac.padded_authkey = NULL; } + ctx->hash_len = cc_get_aead_hash_len(tfm); return 0; @@ -327,7 +336,7 @@ static int hmac_setkey(struct cc_hw_desc *desc, struct cc_aead_ctx *ctx) /* Load the hash current length*/ hw_desc_init(&desc[idx]); set_cipher_mode(&desc[idx], hash_mode); - set_din_const(&desc[idx], 0, ctx->drvdata->hash_len_sz); + set_din_const(&desc[idx], 0, ctx->hash_len); set_flow_mode(&desc[idx], S_DIN_to_HASH); set_setup_mode(&desc[idx], SETUP_LOAD_KEY0); idx++; @@ -465,7 +474,7 @@ static int cc_get_plain_hmac_key(struct crypto_aead *tfm, const u8 *key, /* Load the hash current length*/ hw_desc_init(&desc[idx]); set_cipher_mode(&desc[idx], hashmode); - set_din_const(&desc[idx], 0, ctx->drvdata->hash_len_sz); + set_din_const(&desc[idx], 0, ctx->hash_len); set_cipher_config1(&desc[idx], HASH_PADDING_ENABLED); set_flow_mode(&desc[idx], S_DIN_to_HASH); set_setup_mode(&desc[idx], SETUP_LOAD_KEY0); @@ -1001,7 +1010,7 @@ static void cc_set_hmac_desc(struct aead_request *req, struct cc_hw_desc desc[], hw_desc_init(&desc[idx]); set_cipher_mode(&desc[idx], hash_mode); set_din_sram(&desc[idx], cc_digest_len_addr(ctx->drvdata, hash_mode), - ctx->drvdata->hash_len_sz); + ctx->hash_len); set_flow_mode(&desc[idx], S_DIN_to_HASH); set_setup_mode(&desc[idx], SETUP_LOAD_KEY0); idx++; @@ -1098,7 +1107,7 @@ static void cc_proc_scheme_desc(struct aead_request *req, hw_desc_init(&desc[idx]); set_cipher_mode(&desc[idx], hash_mode); set_dout_sram(&desc[idx], aead_handle->sram_workspace_addr, - ctx->drvdata->hash_len_sz); + ctx->hash_len); set_flow_mode(&desc[idx], S_HASH_to_DOUT); set_setup_mode(&desc[idx], SETUP_WRITE_STATE1); set_cipher_do(&desc[idx], DO_PAD); @@ -1128,7 +1137,7 @@ static void cc_proc_scheme_desc(struct aead_request *req, hw_desc_init(&desc[idx]); set_cipher_mode(&desc[idx], hash_mode); set_din_sram(&desc[idx], cc_digest_len_addr(ctx->drvdata, hash_mode), - ctx->drvdata->hash_len_sz); + ctx->hash_len); set_cipher_config1(&desc[idx], HASH_PADDING_ENABLED); set_flow_mode(&desc[idx], S_DIN_to_HASH); set_setup_mode(&desc[idx], SETUP_LOAD_KEY0); @@ -2358,6 +2367,7 @@ static struct cc_alg_template aead_algs[] = { .flow_mode = S_DIN_to_AES, .auth_mode = DRV_HASH_SHA1, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, }, { .name = "authenc(hmac(sha1),cbc(des3_ede))", @@ -2377,6 +2387,7 @@ static struct cc_alg_template aead_algs[] = { .flow_mode = S_DIN_to_DES, .auth_mode = DRV_HASH_SHA1, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, }, { .name = "authenc(hmac(sha256),cbc(aes))", @@ -2396,6 +2407,7 @@ static struct cc_alg_template aead_algs[] = { .flow_mode = S_DIN_to_AES, .auth_mode = DRV_HASH_SHA256, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, }, { .name = "authenc(hmac(sha256),cbc(des3_ede))", @@ -2415,6 +2427,7 @@ static struct cc_alg_template aead_algs[] = { .flow_mode = S_DIN_to_DES, .auth_mode = DRV_HASH_SHA256, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, }, { .name = "authenc(xcbc(aes),cbc(aes))", @@ -2434,6 +2447,7 @@ static struct cc_alg_template aead_algs[] = { .flow_mode = S_DIN_to_AES, .auth_mode = DRV_HASH_XCBC_MAC, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, }, { .name = "authenc(hmac(sha1),rfc3686(ctr(aes)))", @@ -2453,6 +2467,7 @@ static struct cc_alg_template aead_algs[] = { .flow_mode = S_DIN_to_AES, .auth_mode = DRV_HASH_SHA1, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, }, { .name = "authenc(hmac(sha256),rfc3686(ctr(aes)))", @@ -2472,6 +2487,7 @@ static struct cc_alg_template aead_algs[] = { .flow_mode = S_DIN_to_AES, .auth_mode = DRV_HASH_SHA256, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, }, { .name = "authenc(xcbc(aes),rfc3686(ctr(aes)))", @@ -2491,6 +2507,7 @@ static struct cc_alg_template aead_algs[] = { .flow_mode = S_DIN_to_AES, .auth_mode = DRV_HASH_XCBC_MAC, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, }, { .name = "ccm(aes)", @@ -2510,6 +2527,7 @@ static struct cc_alg_template aead_algs[] = { .flow_mode = S_DIN_to_AES, .auth_mode = DRV_HASH_NULL, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, }, { .name = "rfc4309(ccm(aes))", @@ -2529,6 +2547,7 @@ static struct cc_alg_template aead_algs[] = { .flow_mode = S_DIN_to_AES, .auth_mode = DRV_HASH_NULL, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, }, { .name = "gcm(aes)", @@ -2548,6 +2567,7 @@ static struct cc_alg_template aead_algs[] = { .flow_mode = S_DIN_to_AES, .auth_mode = DRV_HASH_NULL, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, }, { .name = "rfc4106(gcm(aes))", @@ -2567,6 +2587,7 @@ static struct cc_alg_template aead_algs[] = { .flow_mode = S_DIN_to_AES, .auth_mode = DRV_HASH_NULL, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, }, { .name = "rfc4543(gcm(aes))", @@ -2586,6 +2607,7 @@ static struct cc_alg_template aead_algs[] = { .flow_mode = S_DIN_to_AES, .auth_mode = DRV_HASH_NULL, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, }, }; @@ -2670,7 +2692,8 @@ int cc_aead_alloc(struct cc_drvdata *drvdata) /* Linux crypto */ for (alg = 0; alg < ARRAY_SIZE(aead_algs); alg++) { - if (aead_algs[alg].min_hw_rev > drvdata->hw_rev) + if ((aead_algs[alg].min_hw_rev > drvdata->hw_rev) || + !(drvdata->std_bodies & aead_algs[alg].std_body)) continue; t_alg = cc_create_aead_alg(&aead_algs[alg], dev); diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c index 7623b29911af..cc92b031fad1 100644 --- a/drivers/crypto/ccree/cc_cipher.c +++ b/drivers/crypto/ccree/cc_cipher.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include "cc_driver.h" @@ -83,6 +84,9 @@ static int validate_keys_sizes(struct cc_cipher_ctx *ctx_p, u32 size) if (size == DES3_EDE_KEY_SIZE || size == DES_KEY_SIZE) return 0; break; + case S_DIN_to_SM4: + if (size == SM4_KEY_SIZE) + return 0; default: break; } @@ -122,6 +126,17 @@ static int validate_data_size(struct cc_cipher_ctx *ctx_p, if (IS_ALIGNED(size, DES_BLOCK_SIZE)) return 0; break; + case S_DIN_to_SM4: + switch (ctx_p->cipher_mode) { + case DRV_CIPHER_CTR: + return 0; + case DRV_CIPHER_ECB: + case DRV_CIPHER_CBC: + if (IS_ALIGNED(size, SM4_BLOCK_SIZE)) + return 0; + default: + break; + } default: break; } @@ -522,6 +537,9 @@ static void cc_setup_cipher_data(struct crypto_tfm *tfm, case S_DIN_to_DES: flow_mode = DIN_DES_DOUT; break; + case S_DIN_to_SM4: + flow_mode = DIN_SM4_DOUT; + break; default: dev_err(dev, "invalid flow mode, flow_mode = %d\n", flow_mode); return; @@ -815,6 +833,7 @@ static const struct cc_alg_template skcipher_algs[] = { .cipher_mode = DRV_CIPHER_XTS, .flow_mode = S_DIN_to_AES, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, }, { .name = "xts512(paes)", @@ -832,6 +851,7 @@ static const struct cc_alg_template skcipher_algs[] = { .flow_mode = S_DIN_to_AES, .data_unit = 512, .min_hw_rev = CC_HW_REV_712, + .std_body = CC_STD_NIST, }, { .name = "xts4096(paes)", @@ -849,6 +869,7 @@ static const struct cc_alg_template skcipher_algs[] = { .flow_mode = S_DIN_to_AES, .data_unit = 4096, .min_hw_rev = CC_HW_REV_712, + .std_body = CC_STD_NIST, }, { .name = "essiv(paes)", @@ -865,6 +886,7 @@ static const struct cc_alg_template skcipher_algs[] = { .cipher_mode = DRV_CIPHER_ESSIV, .flow_mode = S_DIN_to_AES, .min_hw_rev = CC_HW_REV_712, + .std_body = CC_STD_NIST, }, { .name = "essiv512(paes)", @@ -882,6 +904,7 @@ static const struct cc_alg_template skcipher_algs[] = { .flow_mode = S_DIN_to_AES, .data_unit = 512, .min_hw_rev = CC_HW_REV_712, + .std_body = CC_STD_NIST, }, { .name = "essiv4096(paes)", @@ -899,6 +922,7 @@ static const struct cc_alg_template skcipher_algs[] = { .flow_mode = S_DIN_to_AES, .data_unit = 4096, .min_hw_rev = CC_HW_REV_712, + .std_body = CC_STD_NIST, }, { .name = "bitlocker(paes)", @@ -915,6 +939,7 @@ static const struct cc_alg_template skcipher_algs[] = { .cipher_mode = DRV_CIPHER_BITLOCKER, .flow_mode = S_DIN_to_AES, .min_hw_rev = CC_HW_REV_712, + .std_body = CC_STD_NIST, }, { .name = "bitlocker512(paes)", @@ -932,6 +957,7 @@ static const struct cc_alg_template skcipher_algs[] = { .flow_mode = S_DIN_to_AES, .data_unit = 512, .min_hw_rev = CC_HW_REV_712, + .std_body = CC_STD_NIST, }, { .name = "bitlocker4096(paes)", @@ -949,6 +975,7 @@ static const struct cc_alg_template skcipher_algs[] = { .flow_mode = S_DIN_to_AES, .data_unit = 4096, .min_hw_rev = CC_HW_REV_712, + .std_body = CC_STD_NIST, }, { .name = "ecb(paes)", @@ -965,6 +992,7 @@ static const struct cc_alg_template skcipher_algs[] = { .cipher_mode = DRV_CIPHER_ECB, .flow_mode = S_DIN_to_AES, .min_hw_rev = CC_HW_REV_712, + .std_body = CC_STD_NIST, }, { .name = "cbc(paes)", @@ -981,6 +1009,7 @@ static const struct cc_alg_template skcipher_algs[] = { .cipher_mode = DRV_CIPHER_CBC, .flow_mode = S_DIN_to_AES, .min_hw_rev = CC_HW_REV_712, + .std_body = CC_STD_NIST, }, { .name = "ofb(paes)", @@ -997,6 +1026,7 @@ static const struct cc_alg_template skcipher_algs[] = { .cipher_mode = DRV_CIPHER_OFB, .flow_mode = S_DIN_to_AES, .min_hw_rev = CC_HW_REV_712, + .std_body = CC_STD_NIST, }, { .name = "cts(cbc(paes))", @@ -1013,6 +1043,7 @@ static const struct cc_alg_template skcipher_algs[] = { .cipher_mode = DRV_CIPHER_CBC_CTS, .flow_mode = S_DIN_to_AES, .min_hw_rev = CC_HW_REV_712, + .std_body = CC_STD_NIST, }, { .name = "ctr(paes)", @@ -1029,6 +1060,7 @@ static const struct cc_alg_template skcipher_algs[] = { .cipher_mode = DRV_CIPHER_CTR, .flow_mode = S_DIN_to_AES, .min_hw_rev = CC_HW_REV_712, + .std_body = CC_STD_NIST, }, { .name = "xts(aes)", @@ -1045,6 +1077,7 @@ static const struct cc_alg_template skcipher_algs[] = { .cipher_mode = DRV_CIPHER_XTS, .flow_mode = S_DIN_to_AES, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, }, { .name = "xts512(aes)", @@ -1062,6 +1095,7 @@ static const struct cc_alg_template skcipher_algs[] = { .flow_mode = S_DIN_to_AES, .data_unit = 512, .min_hw_rev = CC_HW_REV_712, + .std_body = CC_STD_NIST, }, { .name = "xts4096(aes)", @@ -1079,6 +1113,7 @@ static const struct cc_alg_template skcipher_algs[] = { .flow_mode = S_DIN_to_AES, .data_unit = 4096, .min_hw_rev = CC_HW_REV_712, + .std_body = CC_STD_NIST, }, { .name = "essiv(aes)", @@ -1095,6 +1130,7 @@ static const struct cc_alg_template skcipher_algs[] = { .cipher_mode = DRV_CIPHER_ESSIV, .flow_mode = S_DIN_to_AES, .min_hw_rev = CC_HW_REV_712, + .std_body = CC_STD_NIST, }, { .name = "essiv512(aes)", @@ -1112,6 +1148,7 @@ static const struct cc_alg_template skcipher_algs[] = { .flow_mode = S_DIN_to_AES, .data_unit = 512, .min_hw_rev = CC_HW_REV_712, + .std_body = CC_STD_NIST, }, { .name = "essiv4096(aes)", @@ -1129,6 +1166,7 @@ static const struct cc_alg_template skcipher_algs[] = { .flow_mode = S_DIN_to_AES, .data_unit = 4096, .min_hw_rev = CC_HW_REV_712, + .std_body = CC_STD_NIST, }, { .name = "bitlocker(aes)", @@ -1145,6 +1183,7 @@ static const struct cc_alg_template skcipher_algs[] = { .cipher_mode = DRV_CIPHER_BITLOCKER, .flow_mode = S_DIN_to_AES, .min_hw_rev = CC_HW_REV_712, + .std_body = CC_STD_NIST, }, { .name = "bitlocker512(aes)", @@ -1162,6 +1201,7 @@ static const struct cc_alg_template skcipher_algs[] = { .flow_mode = S_DIN_to_AES, .data_unit = 512, .min_hw_rev = CC_HW_REV_712, + .std_body = CC_STD_NIST, }, { .name = "bitlocker4096(aes)", @@ -1179,6 +1219,7 @@ static const struct cc_alg_template skcipher_algs[] = { .flow_mode = S_DIN_to_AES, .data_unit = 4096, .min_hw_rev = CC_HW_REV_712, + .std_body = CC_STD_NIST, }, { .name = "ecb(aes)", @@ -1195,6 +1236,7 @@ static const struct cc_alg_template skcipher_algs[] = { .cipher_mode = DRV_CIPHER_ECB, .flow_mode = S_DIN_to_AES, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, }, { .name = "cbc(aes)", @@ -1211,6 +1253,7 @@ static const struct cc_alg_template skcipher_algs[] = { .cipher_mode = DRV_CIPHER_CBC, .flow_mode = S_DIN_to_AES, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, }, { .name = "ofb(aes)", @@ -1227,6 +1270,7 @@ static const struct cc_alg_template skcipher_algs[] = { .cipher_mode = DRV_CIPHER_OFB, .flow_mode = S_DIN_to_AES, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, }, { .name = "cts(cbc(aes))", @@ -1243,6 +1287,7 @@ static const struct cc_alg_template skcipher_algs[] = { .cipher_mode = DRV_CIPHER_CBC_CTS, .flow_mode = S_DIN_to_AES, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, }, { .name = "ctr(aes)", @@ -1259,6 +1304,7 @@ static const struct cc_alg_template skcipher_algs[] = { .cipher_mode = DRV_CIPHER_CTR, .flow_mode = S_DIN_to_AES, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, }, { .name = "cbc(des3_ede)", @@ -1275,6 +1321,7 @@ static const struct cc_alg_template skcipher_algs[] = { .cipher_mode = DRV_CIPHER_CBC, .flow_mode = S_DIN_to_DES, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, }, { .name = "ecb(des3_ede)", @@ -1291,6 +1338,7 @@ static const struct cc_alg_template skcipher_algs[] = { .cipher_mode = DRV_CIPHER_ECB, .flow_mode = S_DIN_to_DES, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, }, { .name = "cbc(des)", @@ -1307,6 +1355,7 @@ static const struct cc_alg_template skcipher_algs[] = { .cipher_mode = DRV_CIPHER_CBC, .flow_mode = S_DIN_to_DES, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, }, { .name = "ecb(des)", @@ -1323,6 +1372,58 @@ static const struct cc_alg_template skcipher_algs[] = { .cipher_mode = DRV_CIPHER_ECB, .flow_mode = S_DIN_to_DES, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, + }, + { + .name = "cbc(sm4)", + .driver_name = "cbc-sm4-ccree", + .blocksize = SM4_BLOCK_SIZE, + .template_skcipher = { + .setkey = cc_cipher_setkey, + .encrypt = cc_cipher_encrypt, + .decrypt = cc_cipher_decrypt, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + }, + .cipher_mode = DRV_CIPHER_CBC, + .flow_mode = S_DIN_to_SM4, + .min_hw_rev = CC_HW_REV_713, + .std_body = CC_STD_OSCCA, + }, + { + .name = "ecb(sm4)", + .driver_name = "ecb-sm4-ccree", + .blocksize = SM4_BLOCK_SIZE, + .template_skcipher = { + .setkey = cc_cipher_setkey, + .encrypt = cc_cipher_encrypt, + .decrypt = cc_cipher_decrypt, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = 0, + }, + .cipher_mode = DRV_CIPHER_ECB, + .flow_mode = S_DIN_to_SM4, + .min_hw_rev = CC_HW_REV_713, + .std_body = CC_STD_OSCCA, + }, + { + .name = "ctr(sm4)", + .driver_name = "ctr-sm4-ccree", + .blocksize = SM4_BLOCK_SIZE, + .template_skcipher = { + .setkey = cc_cipher_setkey, + .encrypt = cc_cipher_encrypt, + .decrypt = cc_cipher_decrypt, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + }, + .cipher_mode = DRV_CIPHER_CTR, + .flow_mode = S_DIN_to_SM4, + .min_hw_rev = CC_HW_REV_713, + .std_body = CC_STD_OSCCA, }, }; @@ -1398,7 +1499,8 @@ int cc_cipher_alloc(struct cc_drvdata *drvdata) dev_dbg(dev, "Number of algorithms = %zu\n", ARRAY_SIZE(skcipher_algs)); for (alg = 0; alg < ARRAY_SIZE(skcipher_algs); alg++) { - if (skcipher_algs[alg].min_hw_rev > drvdata->hw_rev) + if ((skcipher_algs[alg].min_hw_rev > drvdata->hw_rev) || + !(drvdata->std_bodies & skcipher_algs[alg].std_body)) continue; dev_dbg(dev, "creating %s\n", skcipher_algs[alg].driver_name); diff --git a/drivers/crypto/ccree/cc_crypto_ctx.h b/drivers/crypto/ccree/cc_crypto_ctx.h index e032544f4e31..c8dac273c563 100644 --- a/drivers/crypto/ccree/cc_crypto_ctx.h +++ b/drivers/crypto/ccree/cc_crypto_ctx.h @@ -115,7 +115,8 @@ enum drv_hash_mode { DRV_HASH_CBC_MAC = 6, DRV_HASH_XCBC_MAC = 7, DRV_HASH_CMAC = 8, - DRV_HASH_MODE_NUM = 9, + DRV_HASH_SM3 = 9, + DRV_HASH_MODE_NUM = 10, DRV_HASH_RESERVE32B = S32_MAX }; @@ -127,6 +128,7 @@ enum drv_hash_hw_mode { DRV_HASH_HW_SHA512 = 4, DRV_HASH_HW_SHA384 = 12, DRV_HASH_HW_GHASH = 6, + DRV_HASH_HW_SM3 = 14, DRV_HASH_HW_RESERVE32B = S32_MAX }; diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c index 1ff229c2aeab..8ada308d72ee 100644 --- a/drivers/crypto/ccree/cc_driver.c +++ b/drivers/crypto/ccree/cc_driver.c @@ -39,23 +39,38 @@ struct cc_hw_data { char *name; enum cc_hw_rev rev; u32 sig; + int std_bodies; }; /* Hardware revisions defs. */ +/* The 703 is a OSCCA only variant of the 713 */ +static const struct cc_hw_data cc703_hw = { + .name = "703", .rev = CC_HW_REV_713, .std_bodies = CC_STD_OSCCA +}; + +static const struct cc_hw_data cc713_hw = { + .name = "713", .rev = CC_HW_REV_713, .std_bodies = CC_STD_ALL +}; + static const struct cc_hw_data cc712_hw = { - .name = "712", .rev = CC_HW_REV_712, .sig = 0xDCC71200U + .name = "712", .rev = CC_HW_REV_712, .sig = 0xDCC71200U, + .std_bodies = CC_STD_ALL }; static const struct cc_hw_data cc710_hw = { - .name = "710", .rev = CC_HW_REV_710, .sig = 0xDCC63200U + .name = "710", .rev = CC_HW_REV_710, .sig = 0xDCC63200U, + .std_bodies = CC_STD_ALL }; static const struct cc_hw_data cc630p_hw = { - .name = "630P", .rev = CC_HW_REV_630, .sig = 0xDCC63000U + .name = "630P", .rev = CC_HW_REV_630, .sig = 0xDCC63000U, + .std_bodies = CC_STD_ALL }; static const struct of_device_id arm_ccree_dev_of_match[] = { + { .compatible = "arm,cryptocell-703-ree", .data = &cc703_hw }, + { .compatible = "arm,cryptocell-713-ree", .data = &cc713_hw }, { .compatible = "arm,cryptocell-712-ree", .data = &cc712_hw }, { .compatible = "arm,cryptocell-710-ree", .data = &cc710_hw }, { .compatible = "arm,cryptocell-630p-ree", .data = &cc630p_hw }, @@ -204,14 +219,13 @@ static int init_cc_resources(struct platform_device *plat_dev) hw_rev = (struct cc_hw_data *)dev_id->data; new_drvdata->hw_rev_name = hw_rev->name; new_drvdata->hw_rev = hw_rev->rev; + new_drvdata->std_bodies = hw_rev->std_bodies; if (hw_rev->rev >= CC_HW_REV_712) { - new_drvdata->hash_len_sz = HASH_LEN_SIZE_712; new_drvdata->axim_mon_offset = CC_REG(AXIM_MON_COMP); new_drvdata->sig_offset = CC_REG(HOST_SIGNATURE_712); new_drvdata->ver_offset = CC_REG(HOST_VERSION_712); } else { - new_drvdata->hash_len_sz = HASH_LEN_SIZE_630; new_drvdata->axim_mon_offset = CC_REG(AXIM_MON_COMP8); new_drvdata->sig_offset = CC_REG(HOST_SIGNATURE_630); new_drvdata->ver_offset = CC_REG(HOST_VERSION_630); @@ -297,15 +311,17 @@ static int init_cc_resources(struct platform_device *plat_dev) return rc; } - /* Verify correct mapping */ - signature_val = cc_ioread(new_drvdata, new_drvdata->sig_offset); - if (signature_val != hw_rev->sig) { - dev_err(dev, "Invalid CC signature: SIGNATURE=0x%08X != expected=0x%08X\n", - signature_val, hw_rev->sig); - rc = -EINVAL; - goto post_clk_err; + if (hw_rev->rev <= CC_HW_REV_712) { + /* Verify correct mapping */ + signature_val = cc_ioread(new_drvdata, new_drvdata->sig_offset); + if (signature_val != hw_rev->sig) { + dev_err(dev, "Invalid CC signature: SIGNATURE=0x%08X != expected=0x%08X\n", + signature_val, hw_rev->sig); + rc = -EINVAL; + goto post_clk_err; + } + dev_dbg(dev, "CC SIGNATURE=0x%08X\n", signature_val); } - dev_dbg(dev, "CC SIGNATURE=0x%08X\n", signature_val); /* Display HW versions */ dev_info(dev, "ARM CryptoCell %s Driver: HW version 0x%08X, Driver version %s\n", @@ -461,6 +477,14 @@ int cc_clk_on(struct cc_drvdata *drvdata) return 0; } +unsigned int cc_get_default_hash_len(struct cc_drvdata *drvdata) +{ + if (drvdata->hw_rev >= CC_HW_REV_712) + return HASH_LEN_SIZE_712; + else + return HASH_LEN_SIZE_630; +} + void cc_clk_off(struct cc_drvdata *drvdata) { struct clk *clk = drvdata->clk; diff --git a/drivers/crypto/ccree/cc_driver.h b/drivers/crypto/ccree/cc_driver.h index d608a4faf662..5be7fd431b05 100644 --- a/drivers/crypto/ccree/cc_driver.h +++ b/drivers/crypto/ccree/cc_driver.h @@ -36,12 +36,19 @@ extern bool cc_dump_desc; extern bool cc_dump_bytes; -#define DRV_MODULE_VERSION "4.0" +#define DRV_MODULE_VERSION "5.0" enum cc_hw_rev { CC_HW_REV_630 = 630, CC_HW_REV_710 = 710, - CC_HW_REV_712 = 712 + CC_HW_REV_712 = 712, + CC_HW_REV_713 = 713 +}; + +enum cc_std_body { + CC_STD_NIST = 0x1, + CC_STD_OSCCA = 0x2, + CC_STD_ALL = 0x3 }; #define CC_COHERENT_CACHE_PARAMS 0xEEE @@ -127,10 +134,10 @@ struct cc_drvdata { bool coherent; char *hw_rev_name; enum cc_hw_rev hw_rev; - u32 hash_len_sz; u32 axim_mon_offset; u32 sig_offset; u32 ver_offset; + int std_bodies; }; struct cc_crypto_alg { @@ -156,6 +163,7 @@ struct cc_alg_template { int flow_mode; /* Note: currently, refers to the cipher mode only. */ int auth_mode; u32 min_hw_rev; + enum cc_std_body std_body; unsigned int data_unit; struct cc_drvdata *drvdata; }; @@ -182,6 +190,7 @@ int init_cc_regs(struct cc_drvdata *drvdata, bool is_probe); void fini_cc_regs(struct cc_drvdata *drvdata); int cc_clk_on(struct cc_drvdata *drvdata); void cc_clk_off(struct cc_drvdata *drvdata); +unsigned int cc_get_default_hash_len(struct cc_drvdata *drvdata); static inline void cc_iowrite(struct cc_drvdata *drvdata, u32 reg, u32 val) { diff --git a/drivers/crypto/ccree/cc_hash.c b/drivers/crypto/ccree/cc_hash.c index b9313306c36f..2c4ddc8fb76b 100644 --- a/drivers/crypto/ccree/cc_hash.c +++ b/drivers/crypto/ccree/cc_hash.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "cc_driver.h" @@ -16,6 +17,7 @@ #define CC_MAX_HASH_SEQ_LEN 12 #define CC_MAX_OPAD_KEYS_SIZE CC_MAX_HASH_BLCK_SIZE +#define CC_SM3_HASH_LEN_SIZE 8 struct cc_hash_handle { cc_sram_addr_t digest_len_sram_addr; /* const value in SRAM*/ @@ -43,6 +45,9 @@ static u64 sha384_init[] = { static u64 sha512_init[] = { SHA512_H7, SHA512_H6, SHA512_H5, SHA512_H4, SHA512_H3, SHA512_H2, SHA512_H1, SHA512_H0 }; +static const u32 sm3_init[] = { + SM3_IVH, SM3_IVG, SM3_IVF, SM3_IVE, + SM3_IVD, SM3_IVC, SM3_IVB, SM3_IVA }; static void cc_setup_xcbc(struct ahash_request *areq, struct cc_hw_desc desc[], unsigned int *seq_size); @@ -82,6 +87,7 @@ struct cc_hash_ctx { int hash_mode; int hw_mode; int inter_digestsize; + unsigned int hash_len; struct completion setkey_comp; bool is_hmac; }; @@ -138,10 +144,10 @@ static void cc_init_req(struct device *dev, struct ahash_req_ctx *state, ctx->hash_mode == DRV_HASH_SHA384) memcpy(state->digest_bytes_len, digest_len_sha512_init, - ctx->drvdata->hash_len_sz); + ctx->hash_len); else memcpy(state->digest_bytes_len, digest_len_init, - ctx->drvdata->hash_len_sz); + ctx->hash_len); } if (ctx->hash_mode != DRV_HASH_NULL) { @@ -321,7 +327,7 @@ static int cc_fin_result(struct cc_hw_desc *desc, struct ahash_request *req, /* Get final MAC result */ hw_desc_init(&desc[idx]); - set_cipher_mode(&desc[idx], ctx->hw_mode); + set_hash_cipher_mode(&desc[idx], ctx->hw_mode, ctx->hash_mode); /* TODO */ set_dout_dlli(&desc[idx], state->digest_result_dma_addr, digestsize, NS_BIT, 1); @@ -367,7 +373,7 @@ static int cc_fin_hmac(struct cc_hw_desc *desc, struct ahash_request *req, set_cipher_mode(&desc[idx], ctx->hw_mode); set_din_sram(&desc[idx], cc_digest_len_addr(ctx->drvdata, ctx->hash_mode), - ctx->drvdata->hash_len_sz); + ctx->hash_len); set_cipher_config1(&desc[idx], HASH_PADDING_ENABLED); set_flow_mode(&desc[idx], S_DIN_to_HASH); set_setup_mode(&desc[idx], SETUP_LOAD_KEY0); @@ -440,7 +446,7 @@ static int cc_hash_digest(struct ahash_request *req) * digest */ hw_desc_init(&desc[idx]); - set_cipher_mode(&desc[idx], ctx->hw_mode); + set_hash_cipher_mode(&desc[idx], ctx->hw_mode, ctx->hash_mode); if (is_hmac) { set_din_type(&desc[idx], DMA_DLLI, state->digest_buff_dma_addr, ctx->inter_digestsize, NS_BIT); @@ -454,14 +460,14 @@ static int cc_hash_digest(struct ahash_request *req) /* Load the hash current length */ hw_desc_init(&desc[idx]); - set_cipher_mode(&desc[idx], ctx->hw_mode); + set_hash_cipher_mode(&desc[idx], ctx->hw_mode, ctx->hash_mode); if (is_hmac) { set_din_type(&desc[idx], DMA_DLLI, state->digest_bytes_len_dma_addr, - ctx->drvdata->hash_len_sz, NS_BIT); + ctx->hash_len, NS_BIT); } else { - set_din_const(&desc[idx], 0, ctx->drvdata->hash_len_sz); + set_din_const(&desc[idx], 0, ctx->hash_len); if (nbytes) set_cipher_config1(&desc[idx], HASH_PADDING_ENABLED); else @@ -478,7 +484,7 @@ static int cc_hash_digest(struct ahash_request *req) hw_desc_init(&desc[idx]); set_cipher_mode(&desc[idx], ctx->hw_mode); set_dout_dlli(&desc[idx], state->digest_buff_dma_addr, - ctx->drvdata->hash_len_sz, NS_BIT, 0); + ctx->hash_len, NS_BIT, 0); set_flow_mode(&desc[idx], S_HASH_to_DOUT); set_setup_mode(&desc[idx], SETUP_WRITE_STATE1); set_cipher_do(&desc[idx], DO_PAD); @@ -504,7 +510,7 @@ static int cc_restore_hash(struct cc_hw_desc *desc, struct cc_hash_ctx *ctx, { /* Restore hash digest */ hw_desc_init(&desc[idx]); - set_cipher_mode(&desc[idx], ctx->hw_mode); + set_hash_cipher_mode(&desc[idx], ctx->hw_mode, ctx->hash_mode); set_din_type(&desc[idx], DMA_DLLI, state->digest_buff_dma_addr, ctx->inter_digestsize, NS_BIT); set_flow_mode(&desc[idx], S_DIN_to_HASH); @@ -513,10 +519,10 @@ static int cc_restore_hash(struct cc_hw_desc *desc, struct cc_hash_ctx *ctx, /* Restore hash current length */ hw_desc_init(&desc[idx]); - set_cipher_mode(&desc[idx], ctx->hw_mode); + set_hash_cipher_mode(&desc[idx], ctx->hw_mode, ctx->hash_mode); set_cipher_config1(&desc[idx], HASH_PADDING_DISABLED); set_din_type(&desc[idx], DMA_DLLI, state->digest_bytes_len_dma_addr, - ctx->drvdata->hash_len_sz, NS_BIT); + ctx->hash_len, NS_BIT); set_flow_mode(&desc[idx], S_DIN_to_HASH); set_setup_mode(&desc[idx], SETUP_LOAD_KEY0); idx++; @@ -576,7 +582,7 @@ static int cc_hash_update(struct ahash_request *req) /* store the hash digest result in context */ hw_desc_init(&desc[idx]); - set_cipher_mode(&desc[idx], ctx->hw_mode); + set_hash_cipher_mode(&desc[idx], ctx->hw_mode, ctx->hash_mode); set_dout_dlli(&desc[idx], state->digest_buff_dma_addr, ctx->inter_digestsize, NS_BIT, 0); set_flow_mode(&desc[idx], S_HASH_to_DOUT); @@ -585,9 +591,9 @@ static int cc_hash_update(struct ahash_request *req) /* store current hash length in context */ hw_desc_init(&desc[idx]); - set_cipher_mode(&desc[idx], ctx->hw_mode); + set_hash_cipher_mode(&desc[idx], ctx->hw_mode, ctx->hash_mode); set_dout_dlli(&desc[idx], state->digest_bytes_len_dma_addr, - ctx->drvdata->hash_len_sz, NS_BIT, 1); + ctx->hash_len, NS_BIT, 1); set_queue_last_ind(ctx->drvdata, &desc[idx]); set_flow_mode(&desc[idx], S_HASH_to_DOUT); set_setup_mode(&desc[idx], SETUP_WRITE_STATE1); @@ -649,9 +655,9 @@ static int cc_do_finup(struct ahash_request *req, bool update) /* Pad the hash */ hw_desc_init(&desc[idx]); set_cipher_do(&desc[idx], DO_PAD); - set_cipher_mode(&desc[idx], ctx->hw_mode); + set_hash_cipher_mode(&desc[idx], ctx->hw_mode, ctx->hash_mode); set_dout_dlli(&desc[idx], state->digest_bytes_len_dma_addr, - ctx->drvdata->hash_len_sz, NS_BIT, 0); + ctx->hash_len, NS_BIT, 0); set_setup_mode(&desc[idx], SETUP_WRITE_STATE1); set_flow_mode(&desc[idx], S_HASH_to_DOUT); idx++; @@ -749,7 +755,7 @@ static int cc_hash_setkey(struct crypto_ahash *ahash, const u8 *key, /* Load the hash current length*/ hw_desc_init(&desc[idx]); set_cipher_mode(&desc[idx], ctx->hw_mode); - set_din_const(&desc[idx], 0, ctx->drvdata->hash_len_sz); + set_din_const(&desc[idx], 0, ctx->hash_len); set_cipher_config1(&desc[idx], HASH_PADDING_ENABLED); set_flow_mode(&desc[idx], S_DIN_to_HASH); set_setup_mode(&desc[idx], SETUP_LOAD_KEY0); @@ -831,7 +837,7 @@ static int cc_hash_setkey(struct crypto_ahash *ahash, const u8 *key, /* Load the hash current length*/ hw_desc_init(&desc[idx]); set_cipher_mode(&desc[idx], ctx->hw_mode); - set_din_const(&desc[idx], 0, ctx->drvdata->hash_len_sz); + set_din_const(&desc[idx], 0, ctx->hash_len); set_flow_mode(&desc[idx], S_DIN_to_HASH); set_setup_mode(&desc[idx], SETUP_LOAD_KEY0); idx++; @@ -1069,6 +1075,16 @@ fail: return -ENOMEM; } +static int cc_get_hash_len(struct crypto_tfm *tfm) +{ + struct cc_hash_ctx *ctx = crypto_tfm_ctx(tfm); + + if (ctx->hash_mode == DRV_HASH_SM3) + return CC_SM3_HASH_LEN_SIZE; + else + return cc_get_default_hash_len(ctx->drvdata); +} + static int cc_cra_init(struct crypto_tfm *tfm) { struct cc_hash_ctx *ctx = crypto_tfm_ctx(tfm); @@ -1086,7 +1102,7 @@ static int cc_cra_init(struct crypto_tfm *tfm) ctx->hw_mode = cc_alg->hw_mode; ctx->inter_digestsize = cc_alg->inter_digestsize; ctx->drvdata = cc_alg->drvdata; - + ctx->hash_len = cc_get_hash_len(tfm); return cc_alloc_ctx(ctx); } @@ -1465,8 +1481,8 @@ static int cc_hash_export(struct ahash_request *req, void *out) memcpy(out, state->digest_buff, ctx->inter_digestsize); out += ctx->inter_digestsize; - memcpy(out, state->digest_bytes_len, ctx->drvdata->hash_len_sz); - out += ctx->drvdata->hash_len_sz; + memcpy(out, state->digest_bytes_len, ctx->hash_len); + out += ctx->hash_len; memcpy(out, &curr_buff_cnt, sizeof(u32)); out += sizeof(u32); @@ -1494,8 +1510,8 @@ static int cc_hash_import(struct ahash_request *req, const void *in) memcpy(state->digest_buff, in, ctx->inter_digestsize); in += ctx->inter_digestsize; - memcpy(state->digest_bytes_len, in, ctx->drvdata->hash_len_sz); - in += ctx->drvdata->hash_len_sz; + memcpy(state->digest_bytes_len, in, ctx->hash_len); + in += ctx->hash_len; /* Sanity check the data as much as possible */ memcpy(&tmp, in, sizeof(u32)); @@ -1515,6 +1531,7 @@ struct cc_hash_template { char mac_name[CRYPTO_MAX_ALG_NAME]; char mac_driver_name[CRYPTO_MAX_ALG_NAME]; unsigned int blocksize; + bool is_mac; bool synchronize; struct ahash_alg template_ahash; int hash_mode; @@ -1522,6 +1539,7 @@ struct cc_hash_template { int inter_digestsize; struct cc_drvdata *drvdata; u32 min_hw_rev; + enum cc_std_body std_body; }; #define CC_STATE_SIZE(_x) \ @@ -1536,6 +1554,7 @@ static struct cc_hash_template driver_hash[] = { .mac_name = "hmac(sha1)", .mac_driver_name = "hmac-sha1-ccree", .blocksize = SHA1_BLOCK_SIZE, + .is_mac = true, .synchronize = false, .template_ahash = { .init = cc_hash_init, @@ -1555,6 +1574,7 @@ static struct cc_hash_template driver_hash[] = { .hw_mode = DRV_HASH_HW_SHA1, .inter_digestsize = SHA1_DIGEST_SIZE, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, }, { .name = "sha256", @@ -1562,6 +1582,7 @@ static struct cc_hash_template driver_hash[] = { .mac_name = "hmac(sha256)", .mac_driver_name = "hmac-sha256-ccree", .blocksize = SHA256_BLOCK_SIZE, + .is_mac = true, .template_ahash = { .init = cc_hash_init, .update = cc_hash_update, @@ -1580,6 +1601,7 @@ static struct cc_hash_template driver_hash[] = { .hw_mode = DRV_HASH_HW_SHA256, .inter_digestsize = SHA256_DIGEST_SIZE, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, }, { .name = "sha224", @@ -1587,6 +1609,7 @@ static struct cc_hash_template driver_hash[] = { .mac_name = "hmac(sha224)", .mac_driver_name = "hmac-sha224-ccree", .blocksize = SHA224_BLOCK_SIZE, + .is_mac = true, .template_ahash = { .init = cc_hash_init, .update = cc_hash_update, @@ -1605,6 +1628,7 @@ static struct cc_hash_template driver_hash[] = { .hw_mode = DRV_HASH_HW_SHA256, .inter_digestsize = SHA256_DIGEST_SIZE, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, }, { .name = "sha384", @@ -1612,6 +1636,7 @@ static struct cc_hash_template driver_hash[] = { .mac_name = "hmac(sha384)", .mac_driver_name = "hmac-sha384-ccree", .blocksize = SHA384_BLOCK_SIZE, + .is_mac = true, .template_ahash = { .init = cc_hash_init, .update = cc_hash_update, @@ -1630,6 +1655,7 @@ static struct cc_hash_template driver_hash[] = { .hw_mode = DRV_HASH_HW_SHA512, .inter_digestsize = SHA512_DIGEST_SIZE, .min_hw_rev = CC_HW_REV_712, + .std_body = CC_STD_NIST, }, { .name = "sha512", @@ -1637,6 +1663,7 @@ static struct cc_hash_template driver_hash[] = { .mac_name = "hmac(sha512)", .mac_driver_name = "hmac-sha512-ccree", .blocksize = SHA512_BLOCK_SIZE, + .is_mac = true, .template_ahash = { .init = cc_hash_init, .update = cc_hash_update, @@ -1655,6 +1682,7 @@ static struct cc_hash_template driver_hash[] = { .hw_mode = DRV_HASH_HW_SHA512, .inter_digestsize = SHA512_DIGEST_SIZE, .min_hw_rev = CC_HW_REV_712, + .std_body = CC_STD_NIST, }, { .name = "md5", @@ -1662,6 +1690,7 @@ static struct cc_hash_template driver_hash[] = { .mac_name = "hmac(md5)", .mac_driver_name = "hmac-md5-ccree", .blocksize = MD5_HMAC_BLOCK_SIZE, + .is_mac = true, .template_ahash = { .init = cc_hash_init, .update = cc_hash_update, @@ -1680,11 +1709,38 @@ static struct cc_hash_template driver_hash[] = { .hw_mode = DRV_HASH_HW_MD5, .inter_digestsize = MD5_DIGEST_SIZE, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, + }, + { + .name = "sm3", + .driver_name = "sm3-ccree", + .blocksize = SM3_BLOCK_SIZE, + .is_mac = false, + .template_ahash = { + .init = cc_hash_init, + .update = cc_hash_update, + .final = cc_hash_final, + .finup = cc_hash_finup, + .digest = cc_hash_digest, + .export = cc_hash_export, + .import = cc_hash_import, + .setkey = cc_hash_setkey, + .halg = { + .digestsize = SM3_DIGEST_SIZE, + .statesize = CC_STATE_SIZE(SM3_DIGEST_SIZE), + }, + }, + .hash_mode = DRV_HASH_SM3, + .hw_mode = DRV_HASH_HW_SM3, + .inter_digestsize = SM3_DIGEST_SIZE, + .min_hw_rev = CC_HW_REV_713, + .std_body = CC_STD_OSCCA, }, { .mac_name = "xcbc(aes)", .mac_driver_name = "xcbc-aes-ccree", .blocksize = AES_BLOCK_SIZE, + .is_mac = true, .template_ahash = { .init = cc_hash_init, .update = cc_mac_update, @@ -1703,11 +1759,13 @@ static struct cc_hash_template driver_hash[] = { .hw_mode = DRV_CIPHER_XCBC_MAC, .inter_digestsize = AES_BLOCK_SIZE, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, }, { .mac_name = "cmac(aes)", .mac_driver_name = "cmac-aes-ccree", .blocksize = AES_BLOCK_SIZE, + .is_mac = true, .template_ahash = { .init = cc_hash_init, .update = cc_mac_update, @@ -1726,6 +1784,7 @@ static struct cc_hash_template driver_hash[] = { .hw_mode = DRV_CIPHER_CMAC, .inter_digestsize = AES_BLOCK_SIZE, .min_hw_rev = CC_HW_REV_630, + .std_body = CC_STD_NIST, }, }; @@ -1780,6 +1839,7 @@ int cc_init_hash_sram(struct cc_drvdata *drvdata) unsigned int larval_seq_len = 0; struct cc_hw_desc larval_seq[CC_DIGEST_SIZE_MAX / sizeof(u32)]; bool large_sha_supported = (drvdata->hw_rev >= CC_HW_REV_712); + bool sm3_supported = (drvdata->hw_rev >= CC_HW_REV_713); int rc = 0; /* Copy-to-sram digest-len */ @@ -1845,6 +1905,17 @@ int cc_init_hash_sram(struct cc_drvdata *drvdata) sram_buff_ofs += sizeof(sha256_init); larval_seq_len = 0; + if (sm3_supported) { + cc_set_sram_desc(sm3_init, sram_buff_ofs, + ARRAY_SIZE(sm3_init), larval_seq, + &larval_seq_len); + rc = send_request_init(drvdata, larval_seq, larval_seq_len); + if (rc) + goto init_digest_const_err; + sram_buff_ofs += sizeof(sm3_init); + larval_seq_len = 0; + } + if (large_sha_supported) { cc_set_sram_desc((u32 *)sha384_init, sram_buff_ofs, (ARRAY_SIZE(sha384_init) * 2), larval_seq, @@ -1911,6 +1982,9 @@ int cc_hash_alloc(struct cc_drvdata *drvdata) sizeof(sha224_init) + sizeof(sha256_init); + if (drvdata->hw_rev >= CC_HW_REV_713) + sram_size_to_alloc += sizeof(sm3_init); + if (drvdata->hw_rev >= CC_HW_REV_712) sram_size_to_alloc += sizeof(digest_len_sha512_init) + sizeof(sha384_init) + sizeof(sha512_init); @@ -1937,30 +2011,33 @@ int cc_hash_alloc(struct cc_drvdata *drvdata) struct cc_hash_alg *t_alg; int hw_mode = driver_hash[alg].hw_mode; - /* We either support both HASH and MAC or none */ - if (driver_hash[alg].min_hw_rev > drvdata->hw_rev) + /* Check that the HW revision and variants are suitable */ + if ((driver_hash[alg].min_hw_rev > drvdata->hw_rev) || + !(drvdata->std_bodies & driver_hash[alg].std_body)) continue; - /* register hmac version */ - t_alg = cc_alloc_hash_alg(&driver_hash[alg], dev, true); - if (IS_ERR(t_alg)) { - rc = PTR_ERR(t_alg); - dev_err(dev, "%s alg allocation failed\n", - driver_hash[alg].driver_name); - goto fail; - } - t_alg->drvdata = drvdata; + if (driver_hash[alg].is_mac) { + /* register hmac version */ + t_alg = cc_alloc_hash_alg(&driver_hash[alg], dev, true); + if (IS_ERR(t_alg)) { + rc = PTR_ERR(t_alg); + dev_err(dev, "%s alg allocation failed\n", + driver_hash[alg].driver_name); + goto fail; + } + t_alg->drvdata = drvdata; - rc = crypto_register_ahash(&t_alg->ahash_alg); - if (rc) { - dev_err(dev, "%s alg registration failed\n", - driver_hash[alg].driver_name); - kfree(t_alg); - goto fail; - } else { - list_add_tail(&t_alg->entry, &hash_handle->hash_list); + rc = crypto_register_ahash(&t_alg->ahash_alg); + if (rc) { + dev_err(dev, "%s alg registration failed\n", + driver_hash[alg].driver_name); + kfree(t_alg); + goto fail; + } else { + list_add_tail(&t_alg->entry, + &hash_handle->hash_list); + } } - if (hw_mode == DRV_CIPHER_XCBC_MAC || hw_mode == DRV_CIPHER_CMAC) continue; @@ -2027,7 +2104,7 @@ static void cc_setup_xcbc(struct ahash_request *areq, struct cc_hw_desc desc[], XCBC_MAC_K1_OFFSET), CC_AES_128_BIT_KEY_SIZE, NS_BIT); set_setup_mode(&desc[idx], SETUP_LOAD_KEY0); - set_cipher_mode(&desc[idx], DRV_CIPHER_XCBC_MAC); + set_hash_cipher_mode(&desc[idx], DRV_CIPHER_XCBC_MAC, ctx->hash_mode); set_cipher_config0(&desc[idx], DESC_DIRECTION_ENCRYPT_ENCRYPT); set_key_size_aes(&desc[idx], CC_AES_128_BIT_KEY_SIZE); set_flow_mode(&desc[idx], S_DIN_to_AES); @@ -2162,6 +2239,8 @@ static const void *cc_larval_digest(struct device *dev, u32 mode) return sha384_init; case DRV_HASH_SHA512: return sha512_init; + case DRV_HASH_SM3: + return sm3_init; default: dev_err(dev, "Invalid hash mode (%d)\n", mode); return md5_init; @@ -2182,6 +2261,8 @@ cc_sram_addr_t cc_larval_digest_addr(void *drvdata, u32 mode) struct cc_drvdata *_drvdata = (struct cc_drvdata *)drvdata; struct cc_hash_handle *hash_handle = _drvdata->hash_handle; struct device *dev = drvdata_to_dev(_drvdata); + bool sm3_supported = (_drvdata->hw_rev >= CC_HW_REV_713); + cc_sram_addr_t addr; switch (mode) { case DRV_HASH_NULL: @@ -2200,19 +2281,31 @@ cc_sram_addr_t cc_larval_digest_addr(void *drvdata, u32 mode) sizeof(md5_init) + sizeof(sha1_init) + sizeof(sha224_init)); - case DRV_HASH_SHA384: + case DRV_HASH_SM3: return (hash_handle->larval_digest_sram_addr + sizeof(md5_init) + sizeof(sha1_init) + sizeof(sha224_init) + sizeof(sha256_init)); + case DRV_HASH_SHA384: + addr = (hash_handle->larval_digest_sram_addr + + sizeof(md5_init) + + sizeof(sha1_init) + + sizeof(sha224_init) + + sizeof(sha256_init)); + if (sm3_supported) + addr += sizeof(sm3_init); + return addr; case DRV_HASH_SHA512: - return (hash_handle->larval_digest_sram_addr + + addr = (hash_handle->larval_digest_sram_addr + sizeof(md5_init) + sizeof(sha1_init) + sizeof(sha224_init) + sizeof(sha256_init) + sizeof(sha384_init)); + if (sm3_supported) + addr += sizeof(sm3_init); + return addr; default: dev_err(dev, "Invalid hash mode (%d)\n", mode); } diff --git a/drivers/crypto/ccree/cc_hw_queue_defs.h b/drivers/crypto/ccree/cc_hw_queue_defs.h index 45985b955d2c..7a9b90db7db7 100644 --- a/drivers/crypto/ccree/cc_hw_queue_defs.h +++ b/drivers/crypto/ccree/cc_hw_queue_defs.h @@ -42,6 +42,7 @@ #define WORD3_QUEUE_LAST_IND CC_GENMASK(3, QUEUE_LAST_IND) #define WORD4_ACK_NEEDED CC_GENMASK(4, ACK_NEEDED) #define WORD4_AES_SEL_N_HASH CC_GENMASK(4, AES_SEL_N_HASH) +#define WORD4_AES_XOR_CRYPTO_KEY CC_GENMASK(4, AES_XOR_CRYPTO_KEY) #define WORD4_BYTES_SWAP CC_GENMASK(4, BYTES_SWAP) #define WORD4_CIPHER_CONF0 CC_GENMASK(4, CIPHER_CONF0) #define WORD4_CIPHER_CONF1 CC_GENMASK(4, CIPHER_CONF1) @@ -107,6 +108,7 @@ enum cc_flow_mode { AES_to_AES_to_HASH_and_DOUT = 13, AES_to_AES_to_HASH = 14, AES_to_HASH_and_AES = 15, + DIN_SM4_DOUT = 16, DIN_AES_AESMAC = 17, HASH_to_DOUT = 18, /* setup flows */ @@ -114,9 +116,11 @@ enum cc_flow_mode { S_DIN_to_AES2 = 33, S_DIN_to_DES = 34, S_DIN_to_RC4 = 35, + S_DIN_to_SM4 = 36, S_DIN_to_HASH = 37, S_AES_to_DOUT = 38, S_AES2_to_DOUT = 39, + S_SM4_to_DOUT = 40, S_RC4_to_DOUT = 41, S_DES_to_DOUT = 42, S_HASH_to_DOUT = 43, @@ -393,6 +397,16 @@ static inline void set_aes_not_hash_mode(struct cc_hw_desc *pdesc) pdesc->word[4] |= FIELD_PREP(WORD4_AES_SEL_N_HASH, 1); } +/* + * Set aes xor crypto key, this in some secenrios select SM3 engine + * + * @pdesc: pointer HW descriptor struct + */ +static inline void set_aes_xor_crypto_key(struct cc_hw_desc *pdesc) +{ + pdesc->word[4] |= FIELD_PREP(WORD4_AES_XOR_CRYPTO_KEY, 1); +} + /* * Set the DOUT field of a HW descriptors to SRAM mode * Note: No need to check SRAM alignment since host requests do not use SRAM and @@ -454,6 +468,22 @@ static inline void set_cipher_mode(struct cc_hw_desc *pdesc, int mode) pdesc->word[4] |= FIELD_PREP(WORD4_CIPHER_MODE, mode); } +/* + * Set the cipher mode for hash algorithms. + * + * @pdesc: pointer HW descriptor struct + * @cipher_mode: Any one of the modes defined in [CC7x-DESC] + * @hash_mode: specifies which hash is being handled + */ +static inline void set_hash_cipher_mode(struct cc_hw_desc *pdesc, + enum drv_cipher_mode cipher_mode, + enum drv_hash_mode hash_mode) +{ + set_cipher_mode(pdesc, cipher_mode); + if (hash_mode == DRV_HASH_SM3) + set_aes_xor_crypto_key(pdesc); +} + /* * Set the cipher configuration fields. * diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index db203f8be429..bcef76508dfa 100644 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -123,7 +123,7 @@ static inline struct chcr_authenc_ctx *AUTHENC_CTX(struct chcr_aead_ctx *gctx) static inline struct uld_ctx *ULD_CTX(struct chcr_context *ctx) { - return ctx->dev->u_ctx; + return container_of(ctx->dev, struct uld_ctx, dev); } static inline int is_ofld_imm(const struct sk_buff *skb) @@ -198,18 +198,43 @@ void chcr_verify_tag(struct aead_request *req, u8 *input, int *err) *err = 0; } -static inline void chcr_handle_aead_resp(struct aead_request *req, +static int chcr_inc_wrcount(struct chcr_dev *dev) +{ + int err = 0; + + spin_lock_bh(&dev->lock_chcr_dev); + if (dev->state == CHCR_DETACH) + err = 1; + else + atomic_inc(&dev->inflight); + + spin_unlock_bh(&dev->lock_chcr_dev); + + return err; +} + +static inline void chcr_dec_wrcount(struct chcr_dev *dev) +{ + atomic_dec(&dev->inflight); +} + +static inline int chcr_handle_aead_resp(struct aead_request *req, unsigned char *input, int err) { struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct chcr_dev *dev = a_ctx(tfm)->dev; chcr_aead_common_exit(req); if (reqctx->verify == VERIFY_SW) { chcr_verify_tag(req, input, &err); reqctx->verify = VERIFY_HW; } + chcr_dec_wrcount(dev); req->base.complete(&req->base, err); + + return err; } static void get_aes_decrypt_key(unsigned char *dec_key, @@ -391,7 +416,7 @@ static inline void dsgl_walk_end(struct dsgl_walk *walk, unsigned short qid, static inline void dsgl_walk_add_page(struct dsgl_walk *walk, size_t size, - dma_addr_t *addr) + dma_addr_t addr) { int j; @@ -399,7 +424,7 @@ static inline void dsgl_walk_add_page(struct dsgl_walk *walk, return; j = walk->nents; walk->to->len[j % 8] = htons(size); - walk->to->addr[j % 8] = cpu_to_be64(*addr); + walk->to->addr[j % 8] = cpu_to_be64(addr); j++; if ((j % 8) == 0) walk->to++; @@ -473,16 +498,16 @@ static inline void ulptx_walk_end(struct ulptx_walk *walk) static inline void ulptx_walk_add_page(struct ulptx_walk *walk, size_t size, - dma_addr_t *addr) + dma_addr_t addr) { if (!size) return; if (walk->nents == 0) { walk->sgl->len0 = cpu_to_be32(size); - walk->sgl->addr0 = cpu_to_be64(*addr); + walk->sgl->addr0 = cpu_to_be64(addr); } else { - walk->pair->addr[walk->pair_idx] = cpu_to_be64(*addr); + walk->pair->addr[walk->pair_idx] = cpu_to_be64(addr); walk->pair->len[walk->pair_idx] = cpu_to_be32(size); walk->pair_idx = !walk->pair_idx; if (!walk->pair_idx) @@ -717,7 +742,7 @@ static inline void create_wreq(struct chcr_context *ctx, htonl(FW_CRYPTO_LOOKASIDE_WR_LEN16_V(DIV_ROUND_UP(len16, 16))); chcr_req->wreq.cookie = cpu_to_be64((uintptr_t)req); chcr_req->wreq.rx_chid_to_rx_q_id = - FILL_WR_RX_Q_ID(ctx->dev->rx_channel_id, qid, + FILL_WR_RX_Q_ID(ctx->tx_chan_id, qid, !!lcb, ctx->tx_qidx); chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(ctx->tx_chan_id, @@ -773,7 +798,7 @@ static struct sk_buff *create_cipher_wr(struct cipher_wr_param *wrparam) } chcr_req = __skb_put_zero(skb, transhdr_len); chcr_req->sec_cpl.op_ivinsrtofst = - FILL_SEC_CPL_OP_IVINSR(c_ctx(tfm)->dev->rx_channel_id, 2, 1); + FILL_SEC_CPL_OP_IVINSR(c_ctx(tfm)->tx_chan_id, 2, 1); chcr_req->sec_cpl.pldlen = htonl(IV + wrparam->bytes); chcr_req->sec_cpl.aadstart_cipherstop_hi = @@ -1100,6 +1125,7 @@ static int chcr_handle_cipher_resp(struct ablkcipher_request *req, struct cpl_fw6_pld *fw6_pld = (struct cpl_fw6_pld *)input; struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req); struct cipher_wr_param wrparam; + struct chcr_dev *dev = c_ctx(tfm)->dev; int bytes; if (err) @@ -1161,6 +1187,7 @@ static int chcr_handle_cipher_resp(struct ablkcipher_request *req, unmap: chcr_cipher_dma_unmap(&ULD_CTX(c_ctx(tfm))->lldi.pdev->dev, req); complete: + chcr_dec_wrcount(dev); req->base.complete(&req->base, err); return err; } @@ -1187,7 +1214,10 @@ static int process_cipher(struct ablkcipher_request *req, ablkctx->enckey_len, req->nbytes, ivsize); goto error; } - chcr_cipher_dma_map(&ULD_CTX(c_ctx(tfm))->lldi.pdev->dev, req); + + err = chcr_cipher_dma_map(&ULD_CTX(c_ctx(tfm))->lldi.pdev->dev, req); + if (err) + goto error; if (req->nbytes < (SGE_MAX_WR_LEN - (sizeof(struct chcr_wr) + AES_MIN_KEY_SIZE + sizeof(struct cpl_rx_phys_dsgl) + @@ -1276,15 +1306,21 @@ error: static int chcr_aes_encrypt(struct ablkcipher_request *req) { struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct chcr_dev *dev = c_ctx(tfm)->dev; struct sk_buff *skb = NULL; int err, isfull = 0; struct uld_ctx *u_ctx = ULD_CTX(c_ctx(tfm)); + err = chcr_inc_wrcount(dev); + if (err) + return -ENXIO; if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], c_ctx(tfm)->tx_qidx))) { isfull = 1; - if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) - return -ENOSPC; + if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { + err = -ENOSPC; + goto error; + } } err = process_cipher(req, u_ctx->lldi.rxq_ids[c_ctx(tfm)->rx_qidx], @@ -1295,15 +1331,23 @@ static int chcr_aes_encrypt(struct ablkcipher_request *req) set_wr_txq(skb, CPL_PRIORITY_DATA, c_ctx(tfm)->tx_qidx); chcr_send_wr(skb); return isfull ? -EBUSY : -EINPROGRESS; +error: + chcr_dec_wrcount(dev); + return err; } static int chcr_aes_decrypt(struct ablkcipher_request *req) { struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); struct uld_ctx *u_ctx = ULD_CTX(c_ctx(tfm)); + struct chcr_dev *dev = c_ctx(tfm)->dev; struct sk_buff *skb = NULL; int err, isfull = 0; + err = chcr_inc_wrcount(dev); + if (err) + return -ENXIO; + if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], c_ctx(tfm)->tx_qidx))) { isfull = 1; @@ -1311,8 +1355,8 @@ static int chcr_aes_decrypt(struct ablkcipher_request *req) return -ENOSPC; } - err = process_cipher(req, u_ctx->lldi.rxq_ids[c_ctx(tfm)->rx_qidx], - &skb, CHCR_DECRYPT_OP); + err = process_cipher(req, u_ctx->lldi.rxq_ids[c_ctx(tfm)->rx_qidx], + &skb, CHCR_DECRYPT_OP); if (err || !skb) return err; skb->dev = u_ctx->lldi.ports[0]; @@ -1333,10 +1377,11 @@ static int chcr_device_init(struct chcr_context *ctx) if (!ctx->dev) { u_ctx = assign_chcr_device(); if (!u_ctx) { + err = -ENXIO; pr_err("chcr device assignment fails\n"); goto out; } - ctx->dev = u_ctx->dev; + ctx->dev = &u_ctx->dev; adap = padap(ctx->dev); ntxq = u_ctx->lldi.ntxq; rxq_perchan = u_ctx->lldi.nrxq / u_ctx->lldi.nchan; @@ -1344,7 +1389,6 @@ static int chcr_device_init(struct chcr_context *ctx) spin_lock(&ctx->dev->lock_chcr_dev); ctx->tx_chan_id = ctx->dev->tx_channel_id; ctx->dev->tx_channel_id = !ctx->dev->tx_channel_id; - ctx->dev->rx_channel_id = 0; spin_unlock(&ctx->dev->lock_chcr_dev); rxq_idx = ctx->tx_chan_id * rxq_perchan; rxq_idx += id % rxq_perchan; @@ -1498,7 +1542,7 @@ static struct sk_buff *create_hash_wr(struct ahash_request *req, chcr_req = __skb_put_zero(skb, transhdr_len); chcr_req->sec_cpl.op_ivinsrtofst = - FILL_SEC_CPL_OP_IVINSR(h_ctx(tfm)->dev->rx_channel_id, 2, 0); + FILL_SEC_CPL_OP_IVINSR(h_ctx(tfm)->tx_chan_id, 2, 0); chcr_req->sec_cpl.pldlen = htonl(param->bfr_len + param->sg_len); chcr_req->sec_cpl.aadstart_cipherstop_hi = @@ -1562,6 +1606,7 @@ static int chcr_ahash_update(struct ahash_request *req) struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req); struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req); struct uld_ctx *u_ctx = NULL; + struct chcr_dev *dev = h_ctx(rtfm)->dev; struct sk_buff *skb; u8 remainder = 0, bs; unsigned int nbytes = req->nbytes; @@ -1570,12 +1615,6 @@ static int chcr_ahash_update(struct ahash_request *req) bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm)); u_ctx = ULD_CTX(h_ctx(rtfm)); - if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], - h_ctx(rtfm)->tx_qidx))) { - isfull = 1; - if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) - return -ENOSPC; - } if (nbytes + req_ctx->reqlen >= bs) { remainder = (nbytes + req_ctx->reqlen) % bs; @@ -1586,10 +1625,27 @@ static int chcr_ahash_update(struct ahash_request *req) req_ctx->reqlen += nbytes; return 0; } + error = chcr_inc_wrcount(dev); + if (error) + return -ENXIO; + /* Detach state for CHCR means lldi or padap is freed. Increasing + * inflight count for dev guarantees that lldi and padap is valid + */ + if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], + h_ctx(rtfm)->tx_qidx))) { + isfull = 1; + if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { + error = -ENOSPC; + goto err; + } + } + chcr_init_hctx_per_wr(req_ctx); error = chcr_hash_dma_map(&u_ctx->lldi.pdev->dev, req); - if (error) - return -ENOMEM; + if (error) { + error = -ENOMEM; + goto err; + } get_alg_config(¶ms.alg_prm, crypto_ahash_digestsize(rtfm)); params.kctx_len = roundup(params.alg_prm.result_size, 16); params.sg_len = chcr_hash_ent_in_wr(req->src, !!req_ctx->reqlen, @@ -1629,6 +1685,8 @@ static int chcr_ahash_update(struct ahash_request *req) return isfull ? -EBUSY : -EINPROGRESS; unmap: chcr_hash_dma_unmap(&u_ctx->lldi.pdev->dev, req); +err: + chcr_dec_wrcount(dev); return error; } @@ -1646,10 +1704,16 @@ static int chcr_ahash_final(struct ahash_request *req) { struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req); struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req); + struct chcr_dev *dev = h_ctx(rtfm)->dev; struct hash_wr_param params; struct sk_buff *skb; struct uld_ctx *u_ctx = NULL; u8 bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm)); + int error = -EINVAL; + + error = chcr_inc_wrcount(dev); + if (error) + return -ENXIO; chcr_init_hctx_per_wr(req_ctx); u_ctx = ULD_CTX(h_ctx(rtfm)); @@ -1686,19 +1750,25 @@ static int chcr_ahash_final(struct ahash_request *req) } params.hash_size = crypto_ahash_digestsize(rtfm); skb = create_hash_wr(req, ¶ms); - if (IS_ERR(skb)) - return PTR_ERR(skb); + if (IS_ERR(skb)) { + error = PTR_ERR(skb); + goto err; + } req_ctx->reqlen = 0; skb->dev = u_ctx->lldi.ports[0]; set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx); chcr_send_wr(skb); return -EINPROGRESS; +err: + chcr_dec_wrcount(dev); + return error; } static int chcr_ahash_finup(struct ahash_request *req) { struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req); struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req); + struct chcr_dev *dev = h_ctx(rtfm)->dev; struct uld_ctx *u_ctx = NULL; struct sk_buff *skb; struct hash_wr_param params; @@ -1707,17 +1777,24 @@ static int chcr_ahash_finup(struct ahash_request *req) bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm)); u_ctx = ULD_CTX(h_ctx(rtfm)); + error = chcr_inc_wrcount(dev); + if (error) + return -ENXIO; if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], h_ctx(rtfm)->tx_qidx))) { isfull = 1; - if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) - return -ENOSPC; + if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { + error = -ENOSPC; + goto err; + } } chcr_init_hctx_per_wr(req_ctx); error = chcr_hash_dma_map(&u_ctx->lldi.pdev->dev, req); - if (error) - return -ENOMEM; + if (error) { + error = -ENOMEM; + goto err; + } get_alg_config(¶ms.alg_prm, crypto_ahash_digestsize(rtfm)); params.kctx_len = roundup(params.alg_prm.result_size, 16); @@ -1774,6 +1851,8 @@ static int chcr_ahash_finup(struct ahash_request *req) return isfull ? -EBUSY : -EINPROGRESS; unmap: chcr_hash_dma_unmap(&u_ctx->lldi.pdev->dev, req); +err: + chcr_dec_wrcount(dev); return error; } @@ -1781,6 +1860,7 @@ static int chcr_ahash_digest(struct ahash_request *req) { struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req); struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req); + struct chcr_dev *dev = h_ctx(rtfm)->dev; struct uld_ctx *u_ctx = NULL; struct sk_buff *skb; struct hash_wr_param params; @@ -1789,19 +1869,26 @@ static int chcr_ahash_digest(struct ahash_request *req) rtfm->init(req); bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm)); + error = chcr_inc_wrcount(dev); + if (error) + return -ENXIO; u_ctx = ULD_CTX(h_ctx(rtfm)); if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], h_ctx(rtfm)->tx_qidx))) { isfull = 1; - if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) - return -ENOSPC; + if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { + error = -ENOSPC; + goto err; + } } chcr_init_hctx_per_wr(req_ctx); error = chcr_hash_dma_map(&u_ctx->lldi.pdev->dev, req); - if (error) - return -ENOMEM; + if (error) { + error = -ENOMEM; + goto err; + } get_alg_config(¶ms.alg_prm, crypto_ahash_digestsize(rtfm)); params.kctx_len = roundup(params.alg_prm.result_size, 16); @@ -1854,6 +1941,8 @@ static int chcr_ahash_digest(struct ahash_request *req) return isfull ? -EBUSY : -EINPROGRESS; unmap: chcr_hash_dma_unmap(&u_ctx->lldi.pdev->dev, req); +err: + chcr_dec_wrcount(dev); return error; } @@ -1925,6 +2014,7 @@ static inline void chcr_handle_ahash_resp(struct ahash_request *req, int digestsize, updated_digestsize; struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct uld_ctx *u_ctx = ULD_CTX(h_ctx(tfm)); + struct chcr_dev *dev = h_ctx(tfm)->dev; if (input == NULL) goto out; @@ -1967,6 +2057,7 @@ unmap: out: + chcr_dec_wrcount(dev); req->base.complete(&req->base, err); } @@ -1983,14 +2074,13 @@ int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input, switch (tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK) { case CRYPTO_ALG_TYPE_AEAD: - chcr_handle_aead_resp(aead_request_cast(req), input, err); + err = chcr_handle_aead_resp(aead_request_cast(req), input, err); break; case CRYPTO_ALG_TYPE_ABLKCIPHER: - err = chcr_handle_cipher_resp(ablkcipher_request_cast(req), + chcr_handle_cipher_resp(ablkcipher_request_cast(req), input, err); break; - case CRYPTO_ALG_TYPE_AHASH: chcr_handle_ahash_resp(ahash_request_cast(req), input, err); } @@ -2008,7 +2098,7 @@ static int chcr_ahash_export(struct ahash_request *areq, void *out) memcpy(state->partial_hash, req_ctx->partial_hash, CHCR_HASH_MAX_DIGEST_SIZE); chcr_init_hctx_per_wr(state); - return 0; + return 0; } static int chcr_ahash_import(struct ahash_request *areq, const void *in) @@ -2215,10 +2305,7 @@ static int chcr_aead_common_init(struct aead_request *req) error = -ENOMEM; goto err; } - reqctx->aad_nents = sg_nents_xlen(req->src, req->assoclen, - CHCR_SRC_SG_SIZE, 0); - reqctx->src_nents = sg_nents_xlen(req->src, req->cryptlen, - CHCR_SRC_SG_SIZE, req->assoclen); + return 0; err: return error; @@ -2249,7 +2336,7 @@ static int chcr_aead_fallback(struct aead_request *req, unsigned short op_type) req->base.complete, req->base.data); aead_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, req->iv); - aead_request_set_ad(subreq, req->assoclen); + aead_request_set_ad(subreq, req->assoclen); return op_type ? crypto_aead_decrypt(subreq) : crypto_aead_encrypt(subreq); } @@ -2268,10 +2355,10 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req, struct ulptx_sgl *ulptx; unsigned int transhdr_len; unsigned int dst_size = 0, temp, subtype = get_aead_subtype(tfm); - unsigned int kctx_len = 0, dnents; - unsigned int assoclen = req->assoclen; + unsigned int kctx_len = 0, dnents, snents; unsigned int authsize = crypto_aead_authsize(tfm); int error = -EINVAL; + u8 *ivptr; int null = 0; gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; @@ -2288,24 +2375,20 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req, if (subtype == CRYPTO_ALG_SUB_TYPE_CBC_NULL || subtype == CRYPTO_ALG_SUB_TYPE_CTR_NULL) { null = 1; - assoclen = 0; - reqctx->aad_nents = 0; } - dnents = sg_nents_xlen(req->dst, assoclen, CHCR_DST_SG_SIZE, 0); - dnents += sg_nents_xlen(req->dst, req->cryptlen + - (reqctx->op ? -authsize : authsize), CHCR_DST_SG_SIZE, - req->assoclen); + dnents = sg_nents_xlen(req->dst, req->assoclen + req->cryptlen + + (reqctx->op ? -authsize : authsize), CHCR_DST_SG_SIZE, 0); dnents += MIN_AUTH_SG; // For IV - + snents = sg_nents_xlen(req->src, req->assoclen + req->cryptlen, + CHCR_SRC_SG_SIZE, 0); dst_size = get_space_for_phys_dsgl(dnents); kctx_len = (ntohl(KEY_CONTEXT_CTX_LEN_V(aeadctx->key_ctx_hdr)) << 4) - sizeof(chcr_req->key_ctx); transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dst_size); - reqctx->imm = (transhdr_len + assoclen + IV + req->cryptlen) < + reqctx->imm = (transhdr_len + req->assoclen + req->cryptlen) < SGE_MAX_WR_LEN; - temp = reqctx->imm ? roundup(assoclen + IV + req->cryptlen, 16) - : (sgl_len(reqctx->src_nents + reqctx->aad_nents - + MIN_GCM_SG) * 8); + temp = reqctx->imm ? roundup(req->assoclen + req->cryptlen, 16) + : (sgl_len(snents) * 8); transhdr_len += temp; transhdr_len = roundup(transhdr_len, 16); @@ -2315,7 +2398,7 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req, chcr_aead_common_exit(req); return ERR_PTR(chcr_aead_fallback(req, reqctx->op)); } - skb = alloc_skb(SGE_MAX_WR_LEN, flags); + skb = alloc_skb(transhdr_len, flags); if (!skb) { error = -ENOMEM; goto err; @@ -2331,16 +2414,16 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req, * to the hardware spec */ chcr_req->sec_cpl.op_ivinsrtofst = - FILL_SEC_CPL_OP_IVINSR(a_ctx(tfm)->dev->rx_channel_id, 2, - assoclen + 1); - chcr_req->sec_cpl.pldlen = htonl(assoclen + IV + req->cryptlen); + FILL_SEC_CPL_OP_IVINSR(a_ctx(tfm)->tx_chan_id, 2, 1); + chcr_req->sec_cpl.pldlen = htonl(req->assoclen + IV + req->cryptlen); chcr_req->sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI( - assoclen ? 1 : 0, assoclen, - assoclen + IV + 1, + null ? 0 : 1 + IV, + null ? 0 : IV + req->assoclen, + req->assoclen + IV + 1, (temp & 0x1F0) >> 4); chcr_req->sec_cpl.cipherstop_lo_authinsert = FILL_SEC_CPL_AUTHINSERT( temp & 0xF, - null ? 0 : assoclen + IV + 1, + null ? 0 : req->assoclen + IV + 1, temp, temp); if (subtype == CRYPTO_ALG_SUB_TYPE_CTR_NULL || subtype == CRYPTO_ALG_SUB_TYPE_CTR_SHA) @@ -2367,23 +2450,24 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req, memcpy(chcr_req->key_ctx.key + roundup(aeadctx->enckey_len, 16), actx->h_iopad, kctx_len - roundup(aeadctx->enckey_len, 16)); + phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)(chcr_req + 1) + kctx_len); + ivptr = (u8 *)(phys_cpl + 1) + dst_size; + ulptx = (struct ulptx_sgl *)(ivptr + IV); if (subtype == CRYPTO_ALG_SUB_TYPE_CTR_SHA || subtype == CRYPTO_ALG_SUB_TYPE_CTR_NULL) { - memcpy(reqctx->iv, aeadctx->nonce, CTR_RFC3686_NONCE_SIZE); - memcpy(reqctx->iv + CTR_RFC3686_NONCE_SIZE, req->iv, + memcpy(ivptr, aeadctx->nonce, CTR_RFC3686_NONCE_SIZE); + memcpy(ivptr + CTR_RFC3686_NONCE_SIZE, req->iv, CTR_RFC3686_IV_SIZE); - *(__be32 *)(reqctx->iv + CTR_RFC3686_NONCE_SIZE + + *(__be32 *)(ivptr + CTR_RFC3686_NONCE_SIZE + CTR_RFC3686_IV_SIZE) = cpu_to_be32(1); } else { - memcpy(reqctx->iv, req->iv, IV); + memcpy(ivptr, req->iv, IV); } - phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)(chcr_req + 1) + kctx_len); - ulptx = (struct ulptx_sgl *)((u8 *)(phys_cpl + 1) + dst_size); - chcr_add_aead_dst_ent(req, phys_cpl, assoclen, qid); - chcr_add_aead_src_ent(req, ulptx, assoclen); + chcr_add_aead_dst_ent(req, phys_cpl, qid); + chcr_add_aead_src_ent(req, ulptx); atomic_inc(&adap->chcr_stats.cipher_rqst); - temp = sizeof(struct cpl_rx_phys_dsgl) + dst_size + - kctx_len + (reqctx->imm ? (assoclen + IV + req->cryptlen) : 0); + temp = sizeof(struct cpl_rx_phys_dsgl) + dst_size + IV + + kctx_len + (reqctx->imm ? (req->assoclen + req->cryptlen) : 0); create_wreq(a_ctx(tfm), chcr_req, &req->base, reqctx->imm, size, transhdr_len, temp, 0); reqctx->skb = skb; @@ -2470,8 +2554,7 @@ void chcr_aead_dma_unmap(struct device *dev, } void chcr_add_aead_src_ent(struct aead_request *req, - struct ulptx_sgl *ulptx, - unsigned int assoclen) + struct ulptx_sgl *ulptx) { struct ulptx_walk ulp_walk; struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); @@ -2484,28 +2567,20 @@ void chcr_add_aead_src_ent(struct aead_request *req, buf += reqctx->b0_len; } sg_pcopy_to_buffer(req->src, sg_nents(req->src), - buf, assoclen, 0); - buf += assoclen; - memcpy(buf, reqctx->iv, IV); - buf += IV; - sg_pcopy_to_buffer(req->src, sg_nents(req->src), - buf, req->cryptlen, req->assoclen); + buf, req->cryptlen + req->assoclen, 0); } else { ulptx_walk_init(&ulp_walk, ulptx); if (reqctx->b0_len) ulptx_walk_add_page(&ulp_walk, reqctx->b0_len, - &reqctx->b0_dma); - ulptx_walk_add_sg(&ulp_walk, req->src, assoclen, 0); - ulptx_walk_add_page(&ulp_walk, IV, &reqctx->iv_dma); - ulptx_walk_add_sg(&ulp_walk, req->src, req->cryptlen, - req->assoclen); + reqctx->b0_dma); + ulptx_walk_add_sg(&ulp_walk, req->src, req->cryptlen + + req->assoclen, 0); ulptx_walk_end(&ulp_walk); } } void chcr_add_aead_dst_ent(struct aead_request *req, struct cpl_rx_phys_dsgl *phys_cpl, - unsigned int assoclen, unsigned short qid) { struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); @@ -2516,12 +2591,10 @@ void chcr_add_aead_dst_ent(struct aead_request *req, u32 temp; dsgl_walk_init(&dsgl_walk, phys_cpl); - if (reqctx->b0_len) - dsgl_walk_add_page(&dsgl_walk, reqctx->b0_len, &reqctx->b0_dma); - dsgl_walk_add_sg(&dsgl_walk, req->dst, assoclen, 0); - dsgl_walk_add_page(&dsgl_walk, IV, &reqctx->iv_dma); - temp = req->cryptlen + (reqctx->op ? -authsize : authsize); - dsgl_walk_add_sg(&dsgl_walk, req->dst, temp, req->assoclen); + dsgl_walk_add_page(&dsgl_walk, IV + reqctx->b0_len, reqctx->iv_dma); + temp = req->assoclen + req->cryptlen + + (reqctx->op ? -authsize : authsize); + dsgl_walk_add_sg(&dsgl_walk, req->dst, temp, 0); dsgl_walk_end(&dsgl_walk, qid, ctx->pci_chan_id); } @@ -2589,7 +2662,7 @@ void chcr_add_hash_src_ent(struct ahash_request *req, ulptx_walk_init(&ulp_walk, ulptx); if (param->bfr_len) ulptx_walk_add_page(&ulp_walk, param->bfr_len, - &reqctx->hctx_wr.dma_addr); + reqctx->hctx_wr.dma_addr); ulptx_walk_add_sg(&ulp_walk, reqctx->hctx_wr.srcsg, param->sg_len, reqctx->hctx_wr.src_ofst); reqctx->hctx_wr.srcsg = ulp_walk.last_sg; @@ -2689,8 +2762,7 @@ static int set_msg_len(u8 *block, unsigned int msglen, int csize) return 0; } -static void generate_b0(struct aead_request *req, - struct chcr_aead_ctx *aeadctx, +static void generate_b0(struct aead_request *req, u8 *ivptr, unsigned short op_type) { unsigned int l, lp, m; @@ -2701,7 +2773,7 @@ static void generate_b0(struct aead_request *req, m = crypto_aead_authsize(aead); - memcpy(b0, reqctx->iv, 16); + memcpy(b0, ivptr, 16); lp = b0[0]; l = lp + 1; @@ -2727,29 +2799,31 @@ static inline int crypto_ccm_check_iv(const u8 *iv) } static int ccm_format_packet(struct aead_request *req, - struct chcr_aead_ctx *aeadctx, + u8 *ivptr, unsigned int sub_type, unsigned short op_type, unsigned int assoclen) { struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm)); int rc = 0; if (sub_type == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309) { - reqctx->iv[0] = 3; - memcpy(reqctx->iv + 1, &aeadctx->salt[0], 3); - memcpy(reqctx->iv + 4, req->iv, 8); - memset(reqctx->iv + 12, 0, 4); + ivptr[0] = 3; + memcpy(ivptr + 1, &aeadctx->salt[0], 3); + memcpy(ivptr + 4, req->iv, 8); + memset(ivptr + 12, 0, 4); } else { - memcpy(reqctx->iv, req->iv, 16); + memcpy(ivptr, req->iv, 16); } if (assoclen) *((unsigned short *)(reqctx->scratch_pad + 16)) = htons(assoclen); - generate_b0(req, aeadctx, op_type); + generate_b0(req, ivptr, op_type); /* zero the ctr value */ - memset(reqctx->iv + 15 - reqctx->iv[0], 0, reqctx->iv[0] + 1); + memset(ivptr + 15 - ivptr[0], 0, ivptr[0] + 1); return rc; } @@ -2762,7 +2836,7 @@ static void fill_sec_cpl_for_aead(struct cpl_tx_sec_pdu *sec_cpl, struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm)); unsigned int cipher_mode = CHCR_SCMD_CIPHER_MODE_AES_CCM; unsigned int mac_mode = CHCR_SCMD_AUTH_MODE_CBCMAC; - unsigned int c_id = a_ctx(tfm)->dev->rx_channel_id; + unsigned int c_id = a_ctx(tfm)->tx_chan_id; unsigned int ccm_xtra; unsigned char tag_offset = 0, auth_offset = 0; unsigned int assoclen; @@ -2775,7 +2849,7 @@ static void fill_sec_cpl_for_aead(struct cpl_tx_sec_pdu *sec_cpl, ((assoclen) ? CCM_AAD_FIELD_SIZE : 0); auth_offset = req->cryptlen ? - (assoclen + IV + 1 + ccm_xtra) : 0; + (req->assoclen + IV + 1 + ccm_xtra) : 0; if (op_type == CHCR_DECRYPT_OP) { if (crypto_aead_authsize(tfm) != req->cryptlen) tag_offset = crypto_aead_authsize(tfm); @@ -2785,13 +2859,13 @@ static void fill_sec_cpl_for_aead(struct cpl_tx_sec_pdu *sec_cpl, sec_cpl->op_ivinsrtofst = FILL_SEC_CPL_OP_IVINSR(c_id, - 2, assoclen + 1 + ccm_xtra); + 2, 1); sec_cpl->pldlen = - htonl(assoclen + IV + req->cryptlen + ccm_xtra); + htonl(req->assoclen + IV + req->cryptlen + ccm_xtra); /* For CCM there wil be b0 always. So AAD start will be 1 always */ sec_cpl->aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI( - 1, assoclen + ccm_xtra, assoclen - + IV + 1 + ccm_xtra, 0); + 1 + IV, IV + assoclen + ccm_xtra, + req->assoclen + IV + 1 + ccm_xtra, 0); sec_cpl->cipherstop_lo_authinsert = FILL_SEC_CPL_AUTHINSERT(0, auth_offset, tag_offset, @@ -2838,10 +2912,11 @@ static struct sk_buff *create_aead_ccm_wr(struct aead_request *req, struct cpl_rx_phys_dsgl *phys_cpl; struct ulptx_sgl *ulptx; unsigned int transhdr_len; - unsigned int dst_size = 0, kctx_len, dnents, temp; + unsigned int dst_size = 0, kctx_len, dnents, temp, snents; unsigned int sub_type, assoclen = req->assoclen; unsigned int authsize = crypto_aead_authsize(tfm); int error = -EINVAL; + u8 *ivptr; gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; struct adapter *adap = padap(a_ctx(tfm)->dev); @@ -2857,37 +2932,38 @@ static struct sk_buff *create_aead_ccm_wr(struct aead_request *req, error = aead_ccm_validate_input(reqctx->op, req, aeadctx, sub_type); if (error) goto err; - dnents = sg_nents_xlen(req->dst, assoclen, CHCR_DST_SG_SIZE, 0); - dnents += sg_nents_xlen(req->dst, req->cryptlen + dnents = sg_nents_xlen(req->dst, req->assoclen + req->cryptlen + (reqctx->op ? -authsize : authsize), - CHCR_DST_SG_SIZE, req->assoclen); + CHCR_DST_SG_SIZE, 0); dnents += MIN_CCM_SG; // For IV and B0 dst_size = get_space_for_phys_dsgl(dnents); + snents = sg_nents_xlen(req->src, req->assoclen + req->cryptlen, + CHCR_SRC_SG_SIZE, 0); + snents += MIN_CCM_SG; //For B0 kctx_len = roundup(aeadctx->enckey_len, 16) * 2; transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dst_size); - reqctx->imm = (transhdr_len + assoclen + IV + req->cryptlen + + reqctx->imm = (transhdr_len + req->assoclen + req->cryptlen + reqctx->b0_len) <= SGE_MAX_WR_LEN; - temp = reqctx->imm ? roundup(assoclen + IV + req->cryptlen + + temp = reqctx->imm ? roundup(req->assoclen + req->cryptlen + reqctx->b0_len, 16) : - (sgl_len(reqctx->src_nents + reqctx->aad_nents + - MIN_CCM_SG) * 8); + (sgl_len(snents) * 8); transhdr_len += temp; transhdr_len = roundup(transhdr_len, 16); if (chcr_aead_need_fallback(req, dnents, T6_MAX_AAD_SIZE - - reqctx->b0_len, transhdr_len, reqctx->op)) { + reqctx->b0_len, transhdr_len, reqctx->op)) { atomic_inc(&adap->chcr_stats.fallback); chcr_aead_common_exit(req); return ERR_PTR(chcr_aead_fallback(req, reqctx->op)); } - skb = alloc_skb(SGE_MAX_WR_LEN, flags); + skb = alloc_skb(transhdr_len, flags); if (!skb) { error = -ENOMEM; goto err; } - chcr_req = (struct chcr_wr *) __skb_put_zero(skb, transhdr_len); + chcr_req = __skb_put_zero(skb, transhdr_len); fill_sec_cpl_for_aead(&chcr_req->sec_cpl, dst_size, req, reqctx->op); @@ -2897,16 +2973,17 @@ static struct sk_buff *create_aead_ccm_wr(struct aead_request *req, aeadctx->key, aeadctx->enckey_len); phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)(chcr_req + 1) + kctx_len); - ulptx = (struct ulptx_sgl *)((u8 *)(phys_cpl + 1) + dst_size); - error = ccm_format_packet(req, aeadctx, sub_type, reqctx->op, assoclen); + ivptr = (u8 *)(phys_cpl + 1) + dst_size; + ulptx = (struct ulptx_sgl *)(ivptr + IV); + error = ccm_format_packet(req, ivptr, sub_type, reqctx->op, assoclen); if (error) goto dstmap_fail; - chcr_add_aead_dst_ent(req, phys_cpl, assoclen, qid); - chcr_add_aead_src_ent(req, ulptx, assoclen); + chcr_add_aead_dst_ent(req, phys_cpl, qid); + chcr_add_aead_src_ent(req, ulptx); atomic_inc(&adap->chcr_stats.aead_rqst); - temp = sizeof(struct cpl_rx_phys_dsgl) + dst_size + - kctx_len + (reqctx->imm ? (assoclen + IV + req->cryptlen + + temp = sizeof(struct cpl_rx_phys_dsgl) + dst_size + IV + + kctx_len + (reqctx->imm ? (req->assoclen + req->cryptlen + reqctx->b0_len) : 0); create_wreq(a_ctx(tfm), chcr_req, &req->base, reqctx->imm, 0, transhdr_len, temp, 0); @@ -2931,10 +3008,11 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req, struct chcr_wr *chcr_req; struct cpl_rx_phys_dsgl *phys_cpl; struct ulptx_sgl *ulptx; - unsigned int transhdr_len, dnents = 0; + unsigned int transhdr_len, dnents = 0, snents; unsigned int dst_size = 0, temp = 0, kctx_len, assoclen = req->assoclen; unsigned int authsize = crypto_aead_authsize(tfm); int error = -EINVAL; + u8 *ivptr; gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; struct adapter *adap = padap(a_ctx(tfm)->dev); @@ -2946,19 +3024,19 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req, error = chcr_aead_common_init(req); if (error) return ERR_PTR(error); - dnents = sg_nents_xlen(req->dst, assoclen, CHCR_DST_SG_SIZE, 0); - dnents += sg_nents_xlen(req->dst, req->cryptlen + + dnents = sg_nents_xlen(req->dst, req->assoclen + req->cryptlen + (reqctx->op ? -authsize : authsize), - CHCR_DST_SG_SIZE, req->assoclen); + CHCR_DST_SG_SIZE, 0); + snents = sg_nents_xlen(req->src, req->assoclen + req->cryptlen, + CHCR_SRC_SG_SIZE, 0); dnents += MIN_GCM_SG; // For IV dst_size = get_space_for_phys_dsgl(dnents); kctx_len = roundup(aeadctx->enckey_len, 16) + AEAD_H_SIZE; transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dst_size); - reqctx->imm = (transhdr_len + assoclen + IV + req->cryptlen) <= + reqctx->imm = (transhdr_len + req->assoclen + req->cryptlen) <= SGE_MAX_WR_LEN; - temp = reqctx->imm ? roundup(assoclen + IV + req->cryptlen, 16) : - (sgl_len(reqctx->src_nents + - reqctx->aad_nents + MIN_GCM_SG) * 8); + temp = reqctx->imm ? roundup(req->assoclen + req->cryptlen, 16) : + (sgl_len(snents) * 8); transhdr_len += temp; transhdr_len = roundup(transhdr_len, 16); if (chcr_aead_need_fallback(req, dnents, T6_MAX_AAD_SIZE, @@ -2968,7 +3046,7 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req, chcr_aead_common_exit(req); return ERR_PTR(chcr_aead_fallback(req, reqctx->op)); } - skb = alloc_skb(SGE_MAX_WR_LEN, flags); + skb = alloc_skb(transhdr_len, flags); if (!skb) { error = -ENOMEM; goto err; @@ -2979,15 +3057,15 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req, //Offset of tag from end temp = (reqctx->op == CHCR_ENCRYPT_OP) ? 0 : authsize; chcr_req->sec_cpl.op_ivinsrtofst = FILL_SEC_CPL_OP_IVINSR( - a_ctx(tfm)->dev->rx_channel_id, 2, - (assoclen + 1)); + a_ctx(tfm)->tx_chan_id, 2, 1); chcr_req->sec_cpl.pldlen = - htonl(assoclen + IV + req->cryptlen); + htonl(req->assoclen + IV + req->cryptlen); chcr_req->sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI( - assoclen ? 1 : 0, assoclen, - assoclen + IV + 1, 0); + assoclen ? 1 + IV : 0, + assoclen ? IV + assoclen : 0, + req->assoclen + IV + 1, 0); chcr_req->sec_cpl.cipherstop_lo_authinsert = - FILL_SEC_CPL_AUTHINSERT(0, assoclen + IV + 1, + FILL_SEC_CPL_AUTHINSERT(0, req->assoclen + IV + 1, temp, temp); chcr_req->sec_cpl.seqno_numivs = FILL_SEC_CPL_SCMD0_SEQNO(reqctx->op, (reqctx->op == @@ -3002,25 +3080,26 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req, memcpy(chcr_req->key_ctx.key + roundup(aeadctx->enckey_len, 16), GCM_CTX(aeadctx)->ghash_h, AEAD_H_SIZE); + phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)(chcr_req + 1) + kctx_len); + ivptr = (u8 *)(phys_cpl + 1) + dst_size; /* prepare a 16 byte iv */ /* S A L T | IV | 0x00000001 */ if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106) { - memcpy(reqctx->iv, aeadctx->salt, 4); - memcpy(reqctx->iv + 4, req->iv, GCM_RFC4106_IV_SIZE); + memcpy(ivptr, aeadctx->salt, 4); + memcpy(ivptr + 4, req->iv, GCM_RFC4106_IV_SIZE); } else { - memcpy(reqctx->iv, req->iv, GCM_AES_IV_SIZE); + memcpy(ivptr, req->iv, GCM_AES_IV_SIZE); } - *((unsigned int *)(reqctx->iv + 12)) = htonl(0x01); + *((unsigned int *)(ivptr + 12)) = htonl(0x01); - phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)(chcr_req + 1) + kctx_len); - ulptx = (struct ulptx_sgl *)((u8 *)(phys_cpl + 1) + dst_size); + ulptx = (struct ulptx_sgl *)(ivptr + 16); - chcr_add_aead_dst_ent(req, phys_cpl, assoclen, qid); - chcr_add_aead_src_ent(req, ulptx, assoclen); + chcr_add_aead_dst_ent(req, phys_cpl, qid); + chcr_add_aead_src_ent(req, ulptx); atomic_inc(&adap->chcr_stats.aead_rqst); - temp = sizeof(struct cpl_rx_phys_dsgl) + dst_size + - kctx_len + (reqctx->imm ? (assoclen + IV + req->cryptlen) : 0); + temp = sizeof(struct cpl_rx_phys_dsgl) + dst_size + IV + + kctx_len + (reqctx->imm ? (req->assoclen + req->cryptlen) : 0); create_wreq(a_ctx(tfm), chcr_req, &req->base, reqctx->imm, size, transhdr_len, temp, reqctx->verify); reqctx->skb = skb; @@ -3118,12 +3197,12 @@ static int chcr_gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) aeadctx->mayverify = VERIFY_HW; break; case ICV_12: - aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_IPSEC_96BIT; - aeadctx->mayverify = VERIFY_HW; + aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_IPSEC_96BIT; + aeadctx->mayverify = VERIFY_HW; break; case ICV_14: - aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_PL3; - aeadctx->mayverify = VERIFY_HW; + aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_PL3; + aeadctx->mayverify = VERIFY_HW; break; case ICV_16: aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_NO_TRUNC; @@ -3565,27 +3644,42 @@ static int chcr_aead_op(struct aead_request *req, create_wr_t create_wr_fn) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); struct uld_ctx *u_ctx; struct sk_buff *skb; int isfull = 0; + struct chcr_dev *cdev; - if (!a_ctx(tfm)->dev) { + cdev = a_ctx(tfm)->dev; + if (!cdev) { pr_err("chcr : %s : No crypto device.\n", __func__); return -ENXIO; } + + if (chcr_inc_wrcount(cdev)) { + /* Detach state for CHCR means lldi or padap is freed. + * We cannot increment fallback here. + */ + return chcr_aead_fallback(req, reqctx->op); + } + u_ctx = ULD_CTX(a_ctx(tfm)); if (cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], a_ctx(tfm)->tx_qidx)) { isfull = 1; - if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) + if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { + chcr_dec_wrcount(cdev); return -ENOSPC; + } } /* Form a WR from req */ skb = create_wr_fn(req, u_ctx->lldi.rxq_ids[a_ctx(tfm)->rx_qidx], size); - if (IS_ERR(skb) || !skb) + if (IS_ERR(skb) || !skb) { + chcr_dec_wrcount(cdev); return PTR_ERR(skb); + } skb->dev = u_ctx->lldi.ports[0]; set_wr_txq(skb, CPL_PRIORITY_DATA, a_ctx(tfm)->tx_qidx); @@ -3722,7 +3816,6 @@ static struct chcr_alg_template driver_algs[] = { .setkey = chcr_aes_rfc3686_setkey, .encrypt = chcr_aes_encrypt, .decrypt = chcr_aes_decrypt, - .geniv = "seqiv", } } }, @@ -4178,7 +4271,6 @@ static struct chcr_alg_template driver_algs[] = { .setauthsize = chcr_authenc_null_setauthsize, } }, - }; /* diff --git a/drivers/crypto/chelsio/chcr_algo.h b/drivers/crypto/chelsio/chcr_algo.h index 1871500309e2..ee20dd899e83 100644 --- a/drivers/crypto/chelsio/chcr_algo.h +++ b/drivers/crypto/chelsio/chcr_algo.h @@ -262,7 +262,7 @@ #define MIN_AUTH_SG 1 /* IV */ #define MIN_GCM_SG 1 /* IV */ #define MIN_DIGEST_SG 1 /*Partial Buffer*/ -#define MIN_CCM_SG 2 /*IV+B0*/ +#define MIN_CCM_SG 1 /*IV+B0*/ #define CIP_SPACE_LEFT(len) \ ((SGE_MAX_WR_LEN - CIP_WR_MIN_LEN - (len))) #define HASH_SPACE_LEFT(len) \ diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c index 2c472e3c6aeb..239b933d6df6 100644 --- a/drivers/crypto/chelsio/chcr_core.c +++ b/drivers/crypto/chelsio/chcr_core.c @@ -26,10 +26,7 @@ #include "chcr_core.h" #include "cxgb4_uld.h" -static LIST_HEAD(uld_ctx_list); -static DEFINE_MUTEX(dev_mutex); -static atomic_t dev_count; -static struct uld_ctx *ctx_rr; +static struct chcr_driver_data drv_data; typedef int (*chcr_handler_func)(struct chcr_dev *dev, unsigned char *input); static int cpl_fw6_pld_handler(struct chcr_dev *dev, unsigned char *input); @@ -53,6 +50,29 @@ static struct cxgb4_uld_info chcr_uld_info = { #endif /* CONFIG_CHELSIO_IPSEC_INLINE */ }; +static void detach_work_fn(struct work_struct *work) +{ + struct chcr_dev *dev; + + dev = container_of(work, struct chcr_dev, detach_work.work); + + if (atomic_read(&dev->inflight)) { + dev->wqretry--; + if (dev->wqretry) { + pr_debug("Request Inflight Count %d\n", + atomic_read(&dev->inflight)); + + schedule_delayed_work(&dev->detach_work, WQ_DETACH_TM); + } else { + WARN(1, "CHCR:%d request Still Pending\n", + atomic_read(&dev->inflight)); + complete(&dev->detach_comp); + } + } else { + complete(&dev->detach_comp); + } +} + struct uld_ctx *assign_chcr_device(void) { struct uld_ctx *u_ctx = NULL; @@ -63,56 +83,74 @@ struct uld_ctx *assign_chcr_device(void) * Although One session must use the same device to * maintain request-response ordering. */ - mutex_lock(&dev_mutex); - if (!list_empty(&uld_ctx_list)) { - u_ctx = ctx_rr; - if (list_is_last(&ctx_rr->entry, &uld_ctx_list)) - ctx_rr = list_first_entry(&uld_ctx_list, - struct uld_ctx, - entry); + mutex_lock(&drv_data.drv_mutex); + if (!list_empty(&drv_data.act_dev)) { + u_ctx = drv_data.last_dev; + if (list_is_last(&drv_data.last_dev->entry, &drv_data.act_dev)) + drv_data.last_dev = list_first_entry(&drv_data.act_dev, + struct uld_ctx, entry); else - ctx_rr = list_next_entry(ctx_rr, entry); + drv_data.last_dev = + list_next_entry(drv_data.last_dev, entry); } - mutex_unlock(&dev_mutex); + mutex_unlock(&drv_data.drv_mutex); return u_ctx; } -static int chcr_dev_add(struct uld_ctx *u_ctx) +static void chcr_dev_add(struct uld_ctx *u_ctx) { struct chcr_dev *dev; - dev = kzalloc(sizeof(*dev), GFP_KERNEL); - if (!dev) - return -ENXIO; - - spin_lock_init(&dev->lock_chcr_dev); - u_ctx->dev = dev; - dev->u_ctx = u_ctx; - atomic_inc(&dev_count); - mutex_lock(&dev_mutex); - list_add_tail(&u_ctx->entry, &uld_ctx_list); - if (!ctx_rr) - ctx_rr = u_ctx; - mutex_unlock(&dev_mutex); - return 0; + dev = &u_ctx->dev; + dev->state = CHCR_ATTACH; + atomic_set(&dev->inflight, 0); + mutex_lock(&drv_data.drv_mutex); + list_move(&u_ctx->entry, &drv_data.act_dev); + if (!drv_data.last_dev) + drv_data.last_dev = u_ctx; + mutex_unlock(&drv_data.drv_mutex); } -static int chcr_dev_remove(struct uld_ctx *u_ctx) +static void chcr_dev_init(struct uld_ctx *u_ctx) { - if (ctx_rr == u_ctx) { - if (list_is_last(&ctx_rr->entry, &uld_ctx_list)) - ctx_rr = list_first_entry(&uld_ctx_list, - struct uld_ctx, - entry); + struct chcr_dev *dev; + + dev = &u_ctx->dev; + spin_lock_init(&dev->lock_chcr_dev); + INIT_DELAYED_WORK(&dev->detach_work, detach_work_fn); + init_completion(&dev->detach_comp); + dev->state = CHCR_INIT; + dev->wqretry = WQ_RETRY; + atomic_inc(&drv_data.dev_count); + atomic_set(&dev->inflight, 0); + mutex_lock(&drv_data.drv_mutex); + list_add_tail(&u_ctx->entry, &drv_data.inact_dev); + if (!drv_data.last_dev) + drv_data.last_dev = u_ctx; + mutex_unlock(&drv_data.drv_mutex); +} + +static int chcr_dev_move(struct uld_ctx *u_ctx) +{ + struct adapter *adap; + + mutex_lock(&drv_data.drv_mutex); + if (drv_data.last_dev == u_ctx) { + if (list_is_last(&drv_data.last_dev->entry, &drv_data.act_dev)) + drv_data.last_dev = list_first_entry(&drv_data.act_dev, + struct uld_ctx, entry); else - ctx_rr = list_next_entry(ctx_rr, entry); + drv_data.last_dev = + list_next_entry(drv_data.last_dev, entry); } - list_del(&u_ctx->entry); - if (list_empty(&uld_ctx_list)) - ctx_rr = NULL; - kfree(u_ctx->dev); - u_ctx->dev = NULL; - atomic_dec(&dev_count); + list_move(&u_ctx->entry, &drv_data.inact_dev); + if (list_empty(&drv_data.act_dev)) + drv_data.last_dev = NULL; + adap = padap(&u_ctx->dev); + memset(&adap->chcr_stats, 0, sizeof(adap->chcr_stats)); + atomic_dec(&drv_data.dev_count); + mutex_unlock(&drv_data.drv_mutex); + return 0; } @@ -131,12 +169,8 @@ static int cpl_fw6_pld_handler(struct chcr_dev *dev, ack_err_status = ntohl(*(__be32 *)((unsigned char *)&fw6_pld->data[0] + 4)); - if (ack_err_status) { - if (CHK_MAC_ERR_BIT(ack_err_status) || - CHK_PAD_ERR_BIT(ack_err_status)) - error_status = -EBADMSG; - atomic_inc(&adap->chcr_stats.error); - } + if (CHK_MAC_ERR_BIT(ack_err_status) || CHK_PAD_ERR_BIT(ack_err_status)) + error_status = -EBADMSG; /* call completion callback with failure status */ if (req) { error_status = chcr_handle_resp(req, input, error_status); @@ -144,6 +178,9 @@ static int cpl_fw6_pld_handler(struct chcr_dev *dev, pr_err("Incorrect request address from the firmware\n"); return -EFAULT; } + if (error_status) + atomic_inc(&adap->chcr_stats.error); + return 0; } @@ -167,6 +204,7 @@ static void *chcr_uld_add(const struct cxgb4_lld_info *lld) goto out; } u_ctx->lldi = *lld; + chcr_dev_init(u_ctx); #ifdef CONFIG_CHELSIO_IPSEC_INLINE if (lld->crypto & ULP_CRYPTO_IPSEC_INLINE) chcr_add_xfrmops(lld); @@ -179,7 +217,7 @@ int chcr_uld_rx_handler(void *handle, const __be64 *rsp, const struct pkt_gl *pgl) { struct uld_ctx *u_ctx = (struct uld_ctx *)handle; - struct chcr_dev *dev = u_ctx->dev; + struct chcr_dev *dev = &u_ctx->dev; const struct cpl_fw6_pld *rpl = (struct cpl_fw6_pld *)rsp; if (rpl->opcode != CPL_FW6_PLD) { @@ -201,6 +239,28 @@ int chcr_uld_tx_handler(struct sk_buff *skb, struct net_device *dev) } #endif /* CONFIG_CHELSIO_IPSEC_INLINE */ +static void chcr_detach_device(struct uld_ctx *u_ctx) +{ + struct chcr_dev *dev = &u_ctx->dev; + + spin_lock_bh(&dev->lock_chcr_dev); + if (dev->state == CHCR_DETACH) { + spin_unlock_bh(&dev->lock_chcr_dev); + pr_debug("Detached Event received for already detach device\n"); + return; + } + dev->state = CHCR_DETACH; + spin_unlock_bh(&dev->lock_chcr_dev); + + if (atomic_read(&dev->inflight) != 0) { + schedule_delayed_work(&dev->detach_work, WQ_DETACH_TM); + wait_for_completion(&dev->detach_comp); + } + + // Move u_ctx to inactive_dev list + chcr_dev_move(u_ctx); +} + static int chcr_uld_state_change(void *handle, enum cxgb4_state state) { struct uld_ctx *u_ctx = handle; @@ -208,23 +268,16 @@ static int chcr_uld_state_change(void *handle, enum cxgb4_state state) switch (state) { case CXGB4_STATE_UP: - if (!u_ctx->dev) { - ret = chcr_dev_add(u_ctx); - if (ret != 0) - return ret; + if (u_ctx->dev.state != CHCR_INIT) { + // ALready Initialised. + return 0; } - if (atomic_read(&dev_count) == 1) - ret = start_crypto(); + chcr_dev_add(u_ctx); + ret = start_crypto(); break; case CXGB4_STATE_DETACH: - if (u_ctx->dev) { - mutex_lock(&dev_mutex); - chcr_dev_remove(u_ctx); - mutex_unlock(&dev_mutex); - } - if (!atomic_read(&dev_count)) - stop_crypto(); + chcr_detach_device(u_ctx); break; case CXGB4_STATE_START_RECOVERY: @@ -237,7 +290,13 @@ static int chcr_uld_state_change(void *handle, enum cxgb4_state state) static int __init chcr_crypto_init(void) { + INIT_LIST_HEAD(&drv_data.act_dev); + INIT_LIST_HEAD(&drv_data.inact_dev); + atomic_set(&drv_data.dev_count, 0); + mutex_init(&drv_data.drv_mutex); + drv_data.last_dev = NULL; cxgb4_register_uld(CXGB4_ULD_CRYPTO, &chcr_uld_info); + return 0; } @@ -245,18 +304,20 @@ static void __exit chcr_crypto_exit(void) { struct uld_ctx *u_ctx, *tmp; - if (atomic_read(&dev_count)) - stop_crypto(); + stop_crypto(); + cxgb4_unregister_uld(CXGB4_ULD_CRYPTO); /* Remove all devices from list */ - mutex_lock(&dev_mutex); - list_for_each_entry_safe(u_ctx, tmp, &uld_ctx_list, entry) { - if (u_ctx->dev) - chcr_dev_remove(u_ctx); + mutex_lock(&drv_data.drv_mutex); + list_for_each_entry_safe(u_ctx, tmp, &drv_data.act_dev, entry) { + list_del(&u_ctx->entry); kfree(u_ctx); } - mutex_unlock(&dev_mutex); - cxgb4_unregister_uld(CXGB4_ULD_CRYPTO); + list_for_each_entry_safe(u_ctx, tmp, &drv_data.inact_dev, entry) { + list_del(&u_ctx->entry); + kfree(u_ctx); + } + mutex_unlock(&drv_data.drv_mutex); } module_init(chcr_crypto_init); diff --git a/drivers/crypto/chelsio/chcr_core.h b/drivers/crypto/chelsio/chcr_core.h index de3a9c085daf..1159dee964ed 100644 --- a/drivers/crypto/chelsio/chcr_core.h +++ b/drivers/crypto/chelsio/chcr_core.h @@ -47,7 +47,7 @@ #define MAX_PENDING_REQ_TO_HW 20 #define CHCR_TEST_RESPONSE_TIMEOUT 1000 - +#define WQ_DETACH_TM (msecs_to_jiffies(50)) #define PAD_ERROR_BIT 1 #define CHK_PAD_ERR_BIT(x) (((x) >> PAD_ERROR_BIT) & 1) @@ -61,9 +61,6 @@ #define HASH_WR_MIN_LEN (sizeof(struct chcr_wr) + \ DUMMY_BYTES + \ sizeof(struct ulptx_sgl)) - -#define padap(dev) pci_get_drvdata(dev->u_ctx->lldi.pdev) - struct uld_ctx; struct _key_ctx { @@ -121,6 +118,20 @@ struct _key_ctx { #define KEYCTX_TX_WR_AUTHIN_G(x) \ (((x) >> KEYCTX_TX_WR_AUTHIN_S) & KEYCTX_TX_WR_AUTHIN_M) +#define WQ_RETRY 5 +struct chcr_driver_data { + struct list_head act_dev; + struct list_head inact_dev; + atomic_t dev_count; + struct mutex drv_mutex; + struct uld_ctx *last_dev; +}; + +enum chcr_state { + CHCR_INIT = 0, + CHCR_ATTACH, + CHCR_DETACH, +}; struct chcr_wr { struct fw_crypto_lookaside_wr wreq; struct ulp_txpkt ulptx; @@ -131,15 +142,18 @@ struct chcr_wr { struct chcr_dev { spinlock_t lock_chcr_dev; - struct uld_ctx *u_ctx; + enum chcr_state state; + atomic_t inflight; + int wqretry; + struct delayed_work detach_work; + struct completion detach_comp; unsigned char tx_channel_id; - unsigned char rx_channel_id; }; struct uld_ctx { struct list_head entry; struct cxgb4_lld_info lldi; - struct chcr_dev *dev; + struct chcr_dev dev; }; struct sge_opaque_hdr { @@ -159,8 +173,17 @@ struct chcr_ipsec_wr { struct chcr_ipsec_req req; }; +#define ESN_IV_INSERT_OFFSET 12 +struct chcr_ipsec_aadiv { + __be32 spi; + u8 seq_no[8]; + u8 iv[8]; +}; + struct ipsec_sa_entry { int hmac_ctrl; + u16 esn; + u16 imm; unsigned int enckey_len; unsigned int kctx_len; unsigned int authsize; @@ -181,6 +204,13 @@ static inline unsigned int sgl_len(unsigned int n) return (3 * n) / 2 + (n & 1) + 2; } +static inline void *padap(struct chcr_dev *dev) +{ + struct uld_ctx *u_ctx = container_of(dev, struct uld_ctx, dev); + + return pci_get_drvdata(u_ctx->lldi.pdev); +} + struct uld_ctx *assign_chcr_device(void); int chcr_send_wr(struct sk_buff *skb); int start_crypto(void); diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h index d37ef41f9ebe..655606f2e4d0 100644 --- a/drivers/crypto/chelsio/chcr_crypto.h +++ b/drivers/crypto/chelsio/chcr_crypto.h @@ -41,7 +41,8 @@ #define CCM_B0_SIZE 16 #define CCM_AAD_FIELD_SIZE 2 -#define T6_MAX_AAD_SIZE 511 +// 511 - 16(For IV) +#define T6_MAX_AAD_SIZE 495 /* Define following if h/w is not dropping the AAD and IV data before @@ -185,9 +186,6 @@ struct chcr_aead_reqctx { dma_addr_t b0_dma; unsigned int b0_len; unsigned int op; - short int aad_nents; - short int src_nents; - short int dst_nents; u16 imm; u16 verify; u8 iv[CHCR_MAX_CRYPTO_IV_LEN + MAX_SCRATCH_PAD_SIZE]; @@ -322,10 +320,8 @@ void chcr_aead_dma_unmap(struct device *dev, struct aead_request *req, unsigned short op_type); void chcr_add_aead_dst_ent(struct aead_request *req, struct cpl_rx_phys_dsgl *phys_cpl, - unsigned int assoclen, unsigned short qid); -void chcr_add_aead_src_ent(struct aead_request *req, struct ulptx_sgl *ulptx, - unsigned int assoclen); +void chcr_add_aead_src_ent(struct aead_request *req, struct ulptx_sgl *ulptx); void chcr_add_cipher_src_ent(struct ablkcipher_request *req, void *ulptx, struct cipher_wr_param *wrparam); diff --git a/drivers/crypto/chelsio/chcr_ipsec.c b/drivers/crypto/chelsio/chcr_ipsec.c index ceaa16b8f72e..2fb48cce4462 100644 --- a/drivers/crypto/chelsio/chcr_ipsec.c +++ b/drivers/crypto/chelsio/chcr_ipsec.c @@ -76,12 +76,14 @@ static int chcr_xfrm_add_state(struct xfrm_state *x); static void chcr_xfrm_del_state(struct xfrm_state *x); static void chcr_xfrm_free_state(struct xfrm_state *x); static bool chcr_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x); +static void chcr_advance_esn_state(struct xfrm_state *x); static const struct xfrmdev_ops chcr_xfrmdev_ops = { .xdo_dev_state_add = chcr_xfrm_add_state, .xdo_dev_state_delete = chcr_xfrm_del_state, .xdo_dev_state_free = chcr_xfrm_free_state, .xdo_dev_offload_ok = chcr_ipsec_offload_ok, + .xdo_dev_state_advance_esn = chcr_advance_esn_state, }; /* Add offload xfrms to Chelsio Interface */ @@ -210,10 +212,6 @@ static int chcr_xfrm_add_state(struct xfrm_state *x) pr_debug("CHCR: Cannot offload compressed xfrm states\n"); return -EINVAL; } - if (x->props.flags & XFRM_STATE_ESN) { - pr_debug("CHCR: Cannot offload ESN xfrm states\n"); - return -EINVAL; - } if (x->props.family != AF_INET && x->props.family != AF_INET6) { pr_debug("CHCR: Only IPv4/6 xfrm state offloaded\n"); @@ -266,6 +264,8 @@ static int chcr_xfrm_add_state(struct xfrm_state *x) } sa_entry->hmac_ctrl = chcr_ipsec_setauthsize(x, sa_entry); + if (x->props.flags & XFRM_STATE_ESN) + sa_entry->esn = 1; chcr_ipsec_setkey(x, sa_entry); x->xso.offload_handle = (unsigned long)sa_entry; try_module_get(THIS_MODULE); @@ -294,28 +294,57 @@ static void chcr_xfrm_free_state(struct xfrm_state *x) static bool chcr_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x) { - /* Offload with IP options is not supported yet */ - if (ip_hdr(skb)->ihl > 5) - return false; - + if (x->props.family == AF_INET) { + /* Offload with IP options is not supported yet */ + if (ip_hdr(skb)->ihl > 5) + return false; + } else { + /* Offload with IPv6 extension headers is not support yet */ + if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr)) + return false; + } return true; } -static inline int is_eth_imm(const struct sk_buff *skb, unsigned int kctx_len) +static void chcr_advance_esn_state(struct xfrm_state *x) { - int hdrlen = sizeof(struct chcr_ipsec_req) + kctx_len; + /* do nothing */ + if (!x->xso.offload_handle) + return; +} + +static inline int is_eth_imm(const struct sk_buff *skb, + struct ipsec_sa_entry *sa_entry) +{ + unsigned int kctx_len; + int hdrlen; + + kctx_len = sa_entry->kctx_len; + hdrlen = sizeof(struct fw_ulptx_wr) + + sizeof(struct chcr_ipsec_req) + kctx_len; hdrlen += sizeof(struct cpl_tx_pkt); + if (sa_entry->esn) + hdrlen += (DIV_ROUND_UP(sizeof(struct chcr_ipsec_aadiv), 16) + << 4); if (skb->len <= MAX_IMM_TX_PKT_LEN - hdrlen) return hdrlen; return 0; } static inline unsigned int calc_tx_sec_flits(const struct sk_buff *skb, - unsigned int kctx_len) + struct ipsec_sa_entry *sa_entry) { + unsigned int kctx_len; unsigned int flits; - int hdrlen = is_eth_imm(skb, kctx_len); + int aadivlen; + int hdrlen; + + kctx_len = sa_entry->kctx_len; + hdrlen = is_eth_imm(skb, sa_entry); + aadivlen = sa_entry->esn ? DIV_ROUND_UP(sizeof(struct chcr_ipsec_aadiv), + 16) : 0; + aadivlen <<= 4; /* If the skb is small enough, we can pump it out as a work request * with only immediate data. In that case we just have to have the @@ -338,13 +367,69 @@ static inline unsigned int calc_tx_sec_flits(const struct sk_buff *skb, flits += (sizeof(struct fw_ulptx_wr) + sizeof(struct chcr_ipsec_req) + kctx_len + - sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64); + sizeof(struct cpl_tx_pkt_core) + + aadivlen) / sizeof(__be64); return flits; } +inline void *copy_esn_pktxt(struct sk_buff *skb, + struct net_device *dev, + void *pos, + struct ipsec_sa_entry *sa_entry) +{ + struct chcr_ipsec_aadiv *aadiv; + struct ulptx_idata *sc_imm; + struct ip_esp_hdr *esphdr; + struct xfrm_offload *xo; + struct sge_eth_txq *q; + struct adapter *adap; + struct port_info *pi; + __be64 seqno; + u32 qidx; + u32 seqlo; + u8 *iv; + int eoq; + int len; + + pi = netdev_priv(dev); + adap = pi->adapter; + qidx = skb->queue_mapping; + q = &adap->sge.ethtxq[qidx + pi->first_qset]; + + /* end of queue, reset pos to start of queue */ + eoq = (void *)q->q.stat - pos; + if (!eoq) + pos = q->q.desc; + + len = DIV_ROUND_UP(sizeof(struct chcr_ipsec_aadiv), 16) << 4; + memset(pos, 0, len); + aadiv = (struct chcr_ipsec_aadiv *)pos; + esphdr = (struct ip_esp_hdr *)skb_transport_header(skb); + iv = skb_transport_header(skb) + sizeof(struct ip_esp_hdr); + xo = xfrm_offload(skb); + + aadiv->spi = (esphdr->spi); + seqlo = htonl(esphdr->seq_no); + seqno = cpu_to_be64(seqlo + ((u64)xo->seq.hi << 32)); + memcpy(aadiv->seq_no, &seqno, 8); + iv = skb_transport_header(skb) + sizeof(struct ip_esp_hdr); + memcpy(aadiv->iv, iv, 8); + + if (sa_entry->imm) { + sc_imm = (struct ulptx_idata *)(pos + + (DIV_ROUND_UP(sizeof(struct chcr_ipsec_aadiv), + sizeof(__be64)) << 3)); + sc_imm->cmd_more = FILL_CMD_MORE(!sa_entry->imm); + sc_imm->len = cpu_to_be32(sa_entry->imm); + } + pos += len; + return pos; +} + inline void *copy_cpltx_pktxt(struct sk_buff *skb, - struct net_device *dev, - void *pos) + struct net_device *dev, + void *pos, + struct ipsec_sa_entry *sa_entry) { struct cpl_tx_pkt_core *cpl; struct sge_eth_txq *q; @@ -379,6 +464,9 @@ inline void *copy_cpltx_pktxt(struct sk_buff *skb, cpl->ctrl1 = cpu_to_be64(cntrl); pos += sizeof(struct cpl_tx_pkt_core); + /* Copy ESN info for HW */ + if (sa_entry->esn) + pos = copy_esn_pktxt(skb, dev, pos, sa_entry); return pos; } @@ -425,7 +513,7 @@ inline void *copy_key_cpltx_pktxt(struct sk_buff *skb, pos = (u8 *)q->q.desc + (key_len - left); } /* Copy CPL TX PKT XT */ - pos = copy_cpltx_pktxt(skb, dev, pos); + pos = copy_cpltx_pktxt(skb, dev, pos, sa_entry); return pos; } @@ -438,10 +526,16 @@ inline void *chcr_crypto_wreq(struct sk_buff *skb, { struct port_info *pi = netdev_priv(dev); struct adapter *adap = pi->adapter; - unsigned int immdatalen = 0; unsigned int ivsize = GCM_ESP_IV_SIZE; struct chcr_ipsec_wr *wr; + u16 immdatalen = 0; unsigned int flits; + u32 ivinoffset; + u32 aadstart; + u32 aadstop; + u32 ciphstart; + u32 ivdrop = 0; + u32 esnlen = 0; u32 wr_mid; int qidx = skb_get_queue_mapping(skb); struct sge_eth_txq *q = &adap->sge.ethtxq[qidx + pi->first_qset]; @@ -450,10 +544,17 @@ inline void *chcr_crypto_wreq(struct sk_buff *skb, atomic_inc(&adap->chcr_stats.ipsec_cnt); - flits = calc_tx_sec_flits(skb, kctx_len); + flits = calc_tx_sec_flits(skb, sa_entry); + if (sa_entry->esn) + ivdrop = 1; - if (is_eth_imm(skb, kctx_len)) + if (is_eth_imm(skb, sa_entry)) { immdatalen = skb->len; + sa_entry->imm = immdatalen; + } + + if (sa_entry->esn) + esnlen = sizeof(struct chcr_ipsec_aadiv); /* WR Header */ wr = (struct chcr_ipsec_wr *)pos; @@ -478,33 +579,38 @@ inline void *chcr_crypto_wreq(struct sk_buff *skb, sizeof(wr->req.key_ctx) + kctx_len + sizeof(struct cpl_tx_pkt_core) + - immdatalen); + esnlen + + (esnlen ? 0 : immdatalen)); /* CPL_SEC_PDU */ + ivinoffset = sa_entry->esn ? (ESN_IV_INSERT_OFFSET + 1) : + (skb_transport_offset(skb) + + sizeof(struct ip_esp_hdr) + 1); wr->req.sec_cpl.op_ivinsrtofst = htonl( CPL_TX_SEC_PDU_OPCODE_V(CPL_TX_SEC_PDU) | CPL_TX_SEC_PDU_CPLLEN_V(2) | CPL_TX_SEC_PDU_PLACEHOLDER_V(1) | CPL_TX_SEC_PDU_IVINSRTOFST_V( - (skb_transport_offset(skb) + - sizeof(struct ip_esp_hdr) + 1))); + ivinoffset)); - wr->req.sec_cpl.pldlen = htonl(skb->len); + wr->req.sec_cpl.pldlen = htonl(skb->len + esnlen); + aadstart = sa_entry->esn ? 1 : (skb_transport_offset(skb) + 1); + aadstop = sa_entry->esn ? ESN_IV_INSERT_OFFSET : + (skb_transport_offset(skb) + + sizeof(struct ip_esp_hdr)); + ciphstart = skb_transport_offset(skb) + sizeof(struct ip_esp_hdr) + + GCM_ESP_IV_SIZE + 1; + ciphstart += sa_entry->esn ? esnlen : 0; wr->req.sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI( - (skb_transport_offset(skb) + 1), - (skb_transport_offset(skb) + - sizeof(struct ip_esp_hdr)), - (skb_transport_offset(skb) + - sizeof(struct ip_esp_hdr) + - GCM_ESP_IV_SIZE + 1), 0); + aadstart, + aadstop, + ciphstart, 0); wr->req.sec_cpl.cipherstop_lo_authinsert = - FILL_SEC_CPL_AUTHINSERT(0, skb_transport_offset(skb) + - sizeof(struct ip_esp_hdr) + - GCM_ESP_IV_SIZE + 1, - sa_entry->authsize, - sa_entry->authsize); + FILL_SEC_CPL_AUTHINSERT(0, ciphstart, + sa_entry->authsize, + sa_entry->authsize); wr->req.sec_cpl.seqno_numivs = FILL_SEC_CPL_SCMD0_SEQNO(CHCR_ENCRYPT_OP, 1, CHCR_SCMD_CIPHER_MODE_AES_GCM, @@ -512,7 +618,7 @@ inline void *chcr_crypto_wreq(struct sk_buff *skb, sa_entry->hmac_ctrl, ivsize >> 1); wr->req.sec_cpl.ivgen_hdrlen = FILL_SEC_CPL_IVGEN_HDRLEN(0, 0, 1, - 0, 0, 0); + 0, ivdrop, 0); pos += sizeof(struct fw_ulptx_wr) + sizeof(struct ulp_txpkt) + @@ -565,7 +671,7 @@ int chcr_ipsec_xmit(struct sk_buff *skb, struct net_device *dev) struct ipsec_sa_entry *sa_entry; u64 *pos, *end, *before, *sgl; int qidx, left, credits; - unsigned int flits = 0, ndesc, kctx_len; + unsigned int flits = 0, ndesc; struct adapter *adap; struct sge_eth_txq *q; struct port_info *pi; @@ -577,7 +683,6 @@ int chcr_ipsec_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_BUSY; sa_entry = (struct ipsec_sa_entry *)x->xso.offload_handle; - kctx_len = sa_entry->kctx_len; sp = skb_sec_path(skb); if (sp->len != 1) { @@ -592,7 +697,7 @@ out_free: dev_kfree_skb_any(skb); cxgb4_reclaim_completed_tx(adap, &q->q, true); - flits = calc_tx_sec_flits(skb, sa_entry->kctx_len); + flits = calc_tx_sec_flits(skb, sa_entry); ndesc = flits_to_desc(flits); credits = txq_avail(&q->q) - ndesc; @@ -605,7 +710,7 @@ out_free: dev_kfree_skb_any(skb); return NETDEV_TX_BUSY; } - if (is_eth_imm(skb, kctx_len)) + if (is_eth_imm(skb, sa_entry)) immediate = true; if (!immediate && diff --git a/drivers/crypto/geode-aes.c b/drivers/crypto/geode-aes.c index eb2a0a73cbed..b4c24a35b3d0 100644 --- a/drivers/crypto/geode-aes.c +++ b/drivers/crypto/geode-aes.c @@ -261,7 +261,7 @@ static int fallback_init_cip(struct crypto_tfm *tfm) struct geode_aes_op *op = crypto_tfm_ctx(tfm); op->fallback.cip = crypto_alloc_cipher(name, 0, - CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); + CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(op->fallback.cip)) { printk(KERN_ERR "Error allocating fallback algo %s\n", name); diff --git a/drivers/crypto/inside-secure/safexcel_cipher.c b/drivers/crypto/inside-secure/safexcel_cipher.c index 3aef1d43e435..d531c14020dc 100644 --- a/drivers/crypto/inside-secure/safexcel_cipher.c +++ b/drivers/crypto/inside-secure/safexcel_cipher.c @@ -970,7 +970,7 @@ struct safexcel_alg_template safexcel_alg_cbc_des = { .cra_name = "cbc(des)", .cra_driver_name = "safexcel-cbc-des", .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_ASYNC | + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY, .cra_blocksize = DES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct safexcel_cipher_ctx), @@ -1010,7 +1010,7 @@ struct safexcel_alg_template safexcel_alg_ecb_des = { .cra_name = "ecb(des)", .cra_driver_name = "safexcel-ecb-des", .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_ASYNC | + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY, .cra_blocksize = DES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct safexcel_cipher_ctx), @@ -1074,7 +1074,7 @@ struct safexcel_alg_template safexcel_alg_cbc_des3_ede = { .cra_name = "cbc(des3_ede)", .cra_driver_name = "safexcel-cbc-des3_ede", .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_ASYNC | + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY, .cra_blocksize = DES3_EDE_BLOCK_SIZE, .cra_ctxsize = sizeof(struct safexcel_cipher_ctx), @@ -1114,7 +1114,7 @@ struct safexcel_alg_template safexcel_alg_ecb_des3_ede = { .cra_name = "ecb(des3_ede)", .cra_driver_name = "safexcel-ecb-des3_ede", .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_ASYNC | + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY, .cra_blocksize = DES3_EDE_BLOCK_SIZE, .cra_ctxsize = sizeof(struct safexcel_cipher_ctx), diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index 27f7dad2d45d..19fba998b86b 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -1194,7 +1194,6 @@ static struct ixp_alg ixp4xx_algos[] = { .min_keysize = DES_KEY_SIZE, .max_keysize = DES_KEY_SIZE, .ivsize = DES_BLOCK_SIZE, - .geniv = "eseqiv", } } }, @@ -1221,7 +1220,6 @@ static struct ixp_alg ixp4xx_algos[] = { .min_keysize = DES3_EDE_KEY_SIZE, .max_keysize = DES3_EDE_KEY_SIZE, .ivsize = DES3_EDE_BLOCK_SIZE, - .geniv = "eseqiv", } } }, @@ -1247,7 +1245,6 @@ static struct ixp_alg ixp4xx_algos[] = { .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, .ivsize = AES_BLOCK_SIZE, - .geniv = "eseqiv", } } }, @@ -1273,7 +1270,6 @@ static struct ixp_alg ixp4xx_algos[] = { .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, .ivsize = AES_BLOCK_SIZE, - .geniv = "eseqiv", } } }, @@ -1287,7 +1283,6 @@ static struct ixp_alg ixp4xx_algos[] = { .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, .ivsize = AES_BLOCK_SIZE, - .geniv = "eseqiv", .setkey = ablk_rfc3686_setkey, .encrypt = ablk_rfc3686_crypt, .decrypt = ablk_rfc3686_crypt } diff --git a/drivers/crypto/mxc-scc.c b/drivers/crypto/mxc-scc.c index e01c46387df8..519086730791 100644 --- a/drivers/crypto/mxc-scc.c +++ b/drivers/crypto/mxc-scc.c @@ -178,12 +178,12 @@ static int mxc_scc_get_data(struct mxc_scc_ctx *ctx, else from = scc->black_memory; - dev_dbg(scc->dev, "pcopy: from 0x%p %d bytes\n", from, + dev_dbg(scc->dev, "pcopy: from 0x%p %zu bytes\n", from, ctx->dst_nents * 8); len = sg_pcopy_from_buffer(ablkreq->dst, ctx->dst_nents, from, ctx->size, ctx->offset); if (!len) { - dev_err(scc->dev, "pcopy err from 0x%p (len=%d)\n", from, len); + dev_err(scc->dev, "pcopy err from 0x%p (len=%zu)\n", from, len); return -EINVAL; } @@ -274,7 +274,7 @@ static int mxc_scc_put_data(struct mxc_scc_ctx *ctx, len = sg_pcopy_to_buffer(req->src, ctx->src_nents, to, len, ctx->offset); if (!len) { - dev_err(scc->dev, "pcopy err to 0x%p (len=%d)\n", to, len); + dev_err(scc->dev, "pcopy err to 0x%p (len=%zu)\n", to, len); return -EINVAL; } @@ -335,9 +335,9 @@ static void mxc_scc_ablkcipher_next(struct mxc_scc_ctx *ctx, return; } - dev_dbg(scc->dev, "Start encryption (0x%p/0x%p)\n", - (void *)readl(scc->base + SCC_SCM_RED_START), - (void *)readl(scc->base + SCC_SCM_BLACK_START)); + dev_dbg(scc->dev, "Start encryption (0x%x/0x%x)\n", + readl(scc->base + SCC_SCM_RED_START), + readl(scc->base + SCC_SCM_BLACK_START)); /* clear interrupt control registers */ writel(SCC_SCM_INTR_CTRL_CLR_INTR, diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index 4e6ff32f8a7e..a2105cf33abb 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -82,6 +83,7 @@ struct dcp { spinlock_t lock[DCP_MAX_CHANS]; struct task_struct *thread[DCP_MAX_CHANS]; struct crypto_queue queue[DCP_MAX_CHANS]; + struct clk *dcp_clk; }; enum dcp_chan { @@ -1053,11 +1055,24 @@ static int mxs_dcp_probe(struct platform_device *pdev) /* Re-align the structure so it fits the DCP constraints. */ sdcp->coh = PTR_ALIGN(sdcp->coh, DCP_ALIGNMENT); - /* Restart the DCP block. */ - ret = stmp_reset_block(sdcp->base); + /* DCP clock is optional, only used on some SOCs */ + sdcp->dcp_clk = devm_clk_get(dev, "dcp"); + if (IS_ERR(sdcp->dcp_clk)) { + if (sdcp->dcp_clk != ERR_PTR(-ENOENT)) + return PTR_ERR(sdcp->dcp_clk); + sdcp->dcp_clk = NULL; + } + ret = clk_prepare_enable(sdcp->dcp_clk); if (ret) return ret; + /* Restart the DCP block. */ + ret = stmp_reset_block(sdcp->base); + if (ret) { + dev_err(dev, "Failed reset\n"); + goto err_disable_unprepare_clk; + } + /* Initialize control register. */ writel(MXS_DCP_CTRL_GATHER_RESIDUAL_WRITES | MXS_DCP_CTRL_ENABLE_CONTEXT_CACHING | 0xf, @@ -1094,7 +1109,8 @@ static int mxs_dcp_probe(struct platform_device *pdev) NULL, "mxs_dcp_chan/sha"); if (IS_ERR(sdcp->thread[DCP_CHAN_HASH_SHA])) { dev_err(dev, "Error starting SHA thread!\n"); - return PTR_ERR(sdcp->thread[DCP_CHAN_HASH_SHA]); + ret = PTR_ERR(sdcp->thread[DCP_CHAN_HASH_SHA]); + goto err_disable_unprepare_clk; } sdcp->thread[DCP_CHAN_CRYPTO] = kthread_run(dcp_chan_thread_aes, @@ -1151,6 +1167,10 @@ err_destroy_aes_thread: err_destroy_sha_thread: kthread_stop(sdcp->thread[DCP_CHAN_HASH_SHA]); + +err_disable_unprepare_clk: + clk_disable_unprepare(sdcp->dcp_clk); + return ret; } @@ -1170,6 +1190,8 @@ static int mxs_dcp_remove(struct platform_device *pdev) kthread_stop(sdcp->thread[DCP_CHAN_HASH_SHA]); kthread_stop(sdcp->thread[DCP_CHAN_CRYPTO]); + clk_disable_unprepare(sdcp->dcp_clk); + platform_set_drvdata(pdev, NULL); global_sdcp = NULL; diff --git a/drivers/crypto/nx/nx-aes-ctr.c b/drivers/crypto/nx/nx-aes-ctr.c index 898c0a280511..5a26fcd75d2d 100644 --- a/drivers/crypto/nx/nx-aes-ctr.c +++ b/drivers/crypto/nx/nx-aes-ctr.c @@ -159,7 +159,6 @@ struct crypto_alg nx_ctr3686_aes_alg = { .min_keysize = AES_MIN_KEY_SIZE + CTR_RFC3686_NONCE_SIZE, .max_keysize = AES_MAX_KEY_SIZE + CTR_RFC3686_NONCE_SIZE, .ivsize = CTR_RFC3686_IV_SIZE, - .geniv = "seqiv", .setkey = ctr3686_aes_nx_set_key, .encrypt = ctr3686_aes_nx_crypt, .decrypt = ctr3686_aes_nx_crypt, diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index a553ffddb11b..0120feb2d746 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -749,7 +749,6 @@ static struct crypto_alg algs_ctr[] = { .cra_u.ablkcipher = { .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, - .geniv = "eseqiv", .ivsize = AES_BLOCK_SIZE, .setkey = omap_aes_setkey, .encrypt = omap_aes_ctr_encrypt, @@ -1222,7 +1221,6 @@ static int omap_aes_probe(struct platform_device *pdev) algp = &dd->pdata->algs_info[i].algs_list[j]; pr_debug("reg alg: %s\n", algp->cra_name); - INIT_LIST_HEAD(&algp->cra_list); err = crypto_register_alg(algp); if (err) @@ -1240,7 +1238,6 @@ static int omap_aes_probe(struct platform_device *pdev) algp = &aalg->base; pr_debug("reg alg: %s\n", algp->cra_name); - INIT_LIST_HEAD(&algp->cra_list); err = crypto_register_aead(aalg); if (err) diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c index eb95b0d7f184..6369019219d4 100644 --- a/drivers/crypto/omap-des.c +++ b/drivers/crypto/omap-des.c @@ -1069,7 +1069,6 @@ static int omap_des_probe(struct platform_device *pdev) algp = &dd->pdata->algs_info[i].algs_list[j]; pr_debug("reg alg: %s\n", algp->cra_name); - INIT_LIST_HEAD(&algp->cra_list); err = crypto_register_alg(algp); if (err) diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c index a28f1d18fe01..17068b55fea5 100644 --- a/drivers/crypto/picoxcell_crypto.c +++ b/drivers/crypto/picoxcell_crypto.c @@ -1585,8 +1585,7 @@ static struct spacc_alg l2_engine_algs[] = { .cra_name = "f8(kasumi)", .cra_driver_name = "f8-kasumi-picoxcell", .cra_priority = SPACC_CRYPTO_ALG_PRIORITY, - .cra_flags = CRYPTO_ALG_TYPE_GIVCIPHER | - CRYPTO_ALG_ASYNC | + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY, .cra_blocksize = 8, .cra_ctxsize = sizeof(struct spacc_ablk_ctx), diff --git a/drivers/crypto/qce/ablkcipher.c b/drivers/crypto/qce/ablkcipher.c index 585e1cab9ae3..25c13e26d012 100644 --- a/drivers/crypto/qce/ablkcipher.c +++ b/drivers/crypto/qce/ablkcipher.c @@ -376,7 +376,6 @@ static int qce_ablkcipher_register_one(const struct qce_ablkcipher_def *def, alg->cra_module = THIS_MODULE; alg->cra_init = qce_ablkcipher_init; alg->cra_exit = qce_ablkcipher_exit; - INIT_LIST_HEAD(&alg->cra_list); INIT_LIST_HEAD(&tmpl->entry); tmpl->crypto_alg_type = CRYPTO_ALG_TYPE_ABLKCIPHER; diff --git a/drivers/crypto/qce/sha.c b/drivers/crypto/qce/sha.c index d8a5db11b7ea..fc45f5ea6fdd 100644 --- a/drivers/crypto/qce/sha.c +++ b/drivers/crypto/qce/sha.c @@ -508,7 +508,6 @@ static int qce_ahash_register_one(const struct qce_ahash_def *def, base->cra_alignmask = 0; base->cra_module = THIS_MODULE; base->cra_init = qce_ahash_cra_init; - INIT_LIST_HEAD(&base->cra_list); snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name); snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c index bbf166a97ad3..8c32a3059b4a 100644 --- a/drivers/crypto/sahara.c +++ b/drivers/crypto/sahara.c @@ -1321,7 +1321,6 @@ static int sahara_register_algs(struct sahara_dev *dev) unsigned int i, j, k, l; for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { - INIT_LIST_HEAD(&aes_algs[i].cra_list); err = crypto_register_alg(&aes_algs[i]); if (err) goto err_aes_algs; diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 6988012deca4..45e20707cef8 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -3155,7 +3155,6 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev, alg->cra_ablkcipher.setkey = ablkcipher_setkey; alg->cra_ablkcipher.encrypt = ablkcipher_encrypt; alg->cra_ablkcipher.decrypt = ablkcipher_decrypt; - alg->cra_ablkcipher.geniv = "eseqiv"; break; case CRYPTO_ALG_TYPE_AEAD: alg = &t_alg->algt.alg.aead.base; diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c index d2663a4e1f5e..a92a66b1ff46 100644 --- a/drivers/crypto/ux500/cryp/cryp_core.c +++ b/drivers/crypto/ux500/cryp/cryp_core.c @@ -556,7 +556,7 @@ static int cryp_set_dma_transfer(struct cryp_ctx *ctx, desc = dmaengine_prep_slave_sg(channel, ctx->device->dma.sg_src, ctx->device->dma.sg_src_len, - direction, DMA_CTRL_ACK); + DMA_MEM_TO_DEV, DMA_CTRL_ACK); break; case DMA_FROM_DEVICE: @@ -580,7 +580,7 @@ static int cryp_set_dma_transfer(struct cryp_ctx *ctx, desc = dmaengine_prep_slave_sg(channel, ctx->device->dma.sg_dst, ctx->device->dma.sg_dst_len, - direction, + DMA_DEV_TO_MEM, DMA_CTRL_ACK | DMA_PREP_INTERRUPT); diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c index 633321a8dd03..a0bb8a6eec3f 100644 --- a/drivers/crypto/ux500/hash/hash_core.c +++ b/drivers/crypto/ux500/hash/hash_core.c @@ -166,7 +166,7 @@ static int hash_set_dma_transfer(struct hash_ctx *ctx, struct scatterlist *sg, __func__); desc = dmaengine_prep_slave_sg(channel, ctx->device->dma.sg, ctx->device->dma.sg_len, - direction, DMA_CTRL_ACK | DMA_PREP_INTERRUPT); + DMA_MEM_TO_DEV, DMA_CTRL_ACK | DMA_PREP_INTERRUPT); if (!desc) { dev_err(ctx->device->dev, "%s: dmaengine_prep_slave_sg() failed!\n", __func__); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index b8eec515a003..a7195eb5b8d8 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -377,7 +377,7 @@ static struct crypto_cipher *alloc_essiv_cipher(struct crypt_config *cc, int err; /* Setup the essiv_tfm with the given salt */ - essiv_tfm = crypto_alloc_cipher(cc->cipher, 0, CRYPTO_ALG_ASYNC); + essiv_tfm = crypto_alloc_cipher(cc->cipher, 0, 0); if (IS_ERR(essiv_tfm)) { ti->error = "Error allocating crypto tfm for ESSIV"; return essiv_tfm; diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index bb3096bf2cc6..d4ad0bfee251 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -2804,7 +2804,7 @@ static int get_mac(struct crypto_shash **hash, struct alg_spec *a, char **error, int r; if (a->alg_string) { - *hash = crypto_alloc_shash(a->alg_string, 0, CRYPTO_ALG_ASYNC); + *hash = crypto_alloc_shash(a->alg_string, 0, 0); if (IS_ERR(*hash)) { *error = error_alg; r = PTR_ERR(*hash); diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c index 5512c7f73fce..3f5a14112c6b 100644 --- a/drivers/net/wireless/cisco/airo.c +++ b/drivers/net/wireless/cisco/airo.c @@ -1359,7 +1359,7 @@ static int micsetup(struct airo_info *ai) { int i; if (ai->tfm == NULL) - ai->tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); + ai->tfm = crypto_alloc_cipher("aes", 0, 0); if (IS_ERR(ai->tfm)) { airo_print_err(ai->dev->name, "failed to load transform for AES"); diff --git a/drivers/net/wireless/intersil/orinoco/mic.c b/drivers/net/wireless/intersil/orinoco/mic.c index 08bc7822f820..709d9ab3e7bc 100644 --- a/drivers/net/wireless/intersil/orinoco/mic.c +++ b/drivers/net/wireless/intersil/orinoco/mic.c @@ -16,8 +16,7 @@ /********************************************************************/ int orinoco_mic_init(struct orinoco_private *priv) { - priv->tx_tfm_mic = crypto_alloc_shash("michael_mic", 0, - CRYPTO_ALG_ASYNC); + priv->tx_tfm_mic = crypto_alloc_shash("michael_mic", 0, 0); if (IS_ERR(priv->tx_tfm_mic)) { printk(KERN_DEBUG "orinoco_mic_init: could not allocate " "crypto API michael_mic\n"); @@ -25,8 +24,7 @@ int orinoco_mic_init(struct orinoco_private *priv) return -ENOMEM; } - priv->rx_tfm_mic = crypto_alloc_shash("michael_mic", 0, - CRYPTO_ALG_ASYNC); + priv->rx_tfm_mic = crypto_alloc_shash("michael_mic", 0, 0); if (IS_ERR(priv->rx_tfm_mic)) { printk(KERN_DEBUG "orinoco_mic_init: could not allocate " "crypto API michael_mic\n"); diff --git a/drivers/staging/rtl8192e/rtllib_crypt_ccmp.c b/drivers/staging/rtl8192e/rtllib_crypt_ccmp.c index bc45cf098b04..91871503364d 100644 --- a/drivers/staging/rtl8192e/rtllib_crypt_ccmp.c +++ b/drivers/staging/rtl8192e/rtllib_crypt_ccmp.c @@ -67,7 +67,7 @@ static void *rtllib_ccmp_init(int key_idx) goto fail; priv->key_idx = key_idx; - priv->tfm = (void *)crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); + priv->tfm = (void *)crypto_alloc_cipher("aes", 0, 0); if (IS_ERR(priv->tfm)) { pr_debug("Could not allocate crypto API aes\n"); priv->tfm = NULL; diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_ccmp.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_ccmp.c index 041f1b123888..3534ddb900d1 100644 --- a/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_ccmp.c +++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_ccmp.c @@ -71,7 +71,7 @@ static void *ieee80211_ccmp_init(int key_idx) goto fail; priv->key_idx = key_idx; - priv->tfm = (void *)crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); + priv->tfm = (void *)crypto_alloc_cipher("aes", 0, 0); if (IS_ERR(priv->tfm)) { pr_debug("ieee80211_crypt_ccmp: could not allocate crypto API aes\n"); priv->tfm = NULL; diff --git a/drivers/usb/wusbcore/crypto.c b/drivers/usb/wusbcore/crypto.c index 68ddee86a886..edb7263bff40 100644 --- a/drivers/usb/wusbcore/crypto.c +++ b/drivers/usb/wusbcore/crypto.c @@ -316,7 +316,7 @@ ssize_t wusb_prf(void *out, size_t out_size, goto error_setkey_cbc; } - tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); + tfm_aes = crypto_alloc_cipher("aes", 0, 0); if (IS_ERR(tfm_aes)) { result = PTR_ERR(tfm_aes); printk(KERN_ERR "E: can't load AES: %d\n", (int)result); diff --git a/fs/ubifs/auth.c b/fs/ubifs/auth.c index 124e965a28b3..5bf5fd08879e 100644 --- a/fs/ubifs/auth.c +++ b/fs/ubifs/auth.c @@ -269,8 +269,7 @@ int ubifs_init_authentication(struct ubifs_info *c) goto out; } - c->hash_tfm = crypto_alloc_shash(c->auth_hash_name, 0, - CRYPTO_ALG_ASYNC); + c->hash_tfm = crypto_alloc_shash(c->auth_hash_name, 0, 0); if (IS_ERR(c->hash_tfm)) { err = PTR_ERR(c->hash_tfm); ubifs_err(c, "Can not allocate %s: %d", @@ -286,7 +285,7 @@ int ubifs_init_authentication(struct ubifs_info *c) goto out_free_hash; } - c->hmac_tfm = crypto_alloc_shash(hmac_name, 0, CRYPTO_ALG_ASYNC); + c->hmac_tfm = crypto_alloc_shash(hmac_name, 0, 0); if (IS_ERR(c->hmac_tfm)) { err = PTR_ERR(c->hmac_tfm); ubifs_err(c, "Can not allocate %s: %d", hmac_name, err); diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h index 22e6f412c595..a3e766dff917 100644 --- a/include/crypto/acompress.h +++ b/include/crypto/acompress.h @@ -234,34 +234,6 @@ static inline void acomp_request_set_params(struct acomp_req *req, req->flags |= CRYPTO_ACOMP_ALLOC_OUTPUT; } -static inline void crypto_stat_compress(struct acomp_req *req, int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - struct crypto_acomp *tfm = crypto_acomp_reqtfm(req); - - if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic_inc(&tfm->base.__crt_alg->compress_err_cnt); - } else { - atomic_inc(&tfm->base.__crt_alg->compress_cnt); - atomic64_add(req->slen, &tfm->base.__crt_alg->compress_tlen); - } -#endif -} - -static inline void crypto_stat_decompress(struct acomp_req *req, int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - struct crypto_acomp *tfm = crypto_acomp_reqtfm(req); - - if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic_inc(&tfm->base.__crt_alg->compress_err_cnt); - } else { - atomic_inc(&tfm->base.__crt_alg->decompress_cnt); - atomic64_add(req->slen, &tfm->base.__crt_alg->decompress_tlen); - } -#endif -} - /** * crypto_acomp_compress() -- Invoke asynchronous compress operation * @@ -274,10 +246,13 @@ static inline void crypto_stat_decompress(struct acomp_req *req, int ret) static inline int crypto_acomp_compress(struct acomp_req *req) { struct crypto_acomp *tfm = crypto_acomp_reqtfm(req); + struct crypto_alg *alg = tfm->base.__crt_alg; + unsigned int slen = req->slen; int ret; + crypto_stats_get(alg); ret = tfm->compress(req); - crypto_stat_compress(req, ret); + crypto_stats_compress(slen, ret, alg); return ret; } @@ -293,10 +268,13 @@ static inline int crypto_acomp_compress(struct acomp_req *req) static inline int crypto_acomp_decompress(struct acomp_req *req) { struct crypto_acomp *tfm = crypto_acomp_reqtfm(req); + struct crypto_alg *alg = tfm->base.__crt_alg; + unsigned int slen = req->slen; int ret; + crypto_stats_get(alg); ret = tfm->decompress(req); - crypto_stat_decompress(req, ret); + crypto_stats_decompress(slen, ret, alg); return ret; } diff --git a/include/crypto/aead.h b/include/crypto/aead.h index 0d765d7bfb82..9ad595f97c65 100644 --- a/include/crypto/aead.h +++ b/include/crypto/aead.h @@ -115,7 +115,6 @@ struct aead_request { * @setkey: see struct skcipher_alg * @encrypt: see struct skcipher_alg * @decrypt: see struct skcipher_alg - * @geniv: see struct skcipher_alg * @ivsize: see struct skcipher_alg * @chunksize: see struct skcipher_alg * @init: Initialize the cryptographic transformation object. This function @@ -142,8 +141,6 @@ struct aead_alg { int (*init)(struct crypto_aead *tfm); void (*exit)(struct crypto_aead *tfm); - const char *geniv; - unsigned int ivsize; unsigned int maxauthsize; unsigned int chunksize; @@ -306,34 +303,6 @@ static inline struct crypto_aead *crypto_aead_reqtfm(struct aead_request *req) return __crypto_aead_cast(req->base.tfm); } -static inline void crypto_stat_aead_encrypt(struct aead_request *req, int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - - if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic_inc(&tfm->base.__crt_alg->aead_err_cnt); - } else { - atomic_inc(&tfm->base.__crt_alg->encrypt_cnt); - atomic64_add(req->cryptlen, &tfm->base.__crt_alg->encrypt_tlen); - } -#endif -} - -static inline void crypto_stat_aead_decrypt(struct aead_request *req, int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - - if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic_inc(&tfm->base.__crt_alg->aead_err_cnt); - } else { - atomic_inc(&tfm->base.__crt_alg->decrypt_cnt); - atomic64_add(req->cryptlen, &tfm->base.__crt_alg->decrypt_tlen); - } -#endif -} - /** * crypto_aead_encrypt() - encrypt plaintext * @req: reference to the aead_request handle that holds all information @@ -356,13 +325,16 @@ static inline void crypto_stat_aead_decrypt(struct aead_request *req, int ret) static inline int crypto_aead_encrypt(struct aead_request *req) { struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct crypto_alg *alg = aead->base.__crt_alg; + unsigned int cryptlen = req->cryptlen; int ret; + crypto_stats_get(alg); if (crypto_aead_get_flags(aead) & CRYPTO_TFM_NEED_KEY) ret = -ENOKEY; else ret = crypto_aead_alg(aead)->encrypt(req); - crypto_stat_aead_encrypt(req, ret); + crypto_stats_aead_encrypt(cryptlen, alg, ret); return ret; } @@ -391,15 +363,18 @@ static inline int crypto_aead_encrypt(struct aead_request *req) static inline int crypto_aead_decrypt(struct aead_request *req) { struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct crypto_alg *alg = aead->base.__crt_alg; + unsigned int cryptlen = req->cryptlen; int ret; + crypto_stats_get(alg); if (crypto_aead_get_flags(aead) & CRYPTO_TFM_NEED_KEY) ret = -ENOKEY; else if (req->cryptlen < crypto_aead_authsize(aead)) ret = -EINVAL; else ret = crypto_aead_alg(aead)->decrypt(req); - crypto_stat_aead_decrypt(req, ret); + crypto_stats_aead_decrypt(cryptlen, alg, ret); return ret; } diff --git a/include/crypto/akcipher.h b/include/crypto/akcipher.h index afac71119396..2d690494568c 100644 --- a/include/crypto/akcipher.h +++ b/include/crypto/akcipher.h @@ -271,62 +271,6 @@ static inline unsigned int crypto_akcipher_maxsize(struct crypto_akcipher *tfm) return alg->max_size(tfm); } -static inline void crypto_stat_akcipher_encrypt(struct akcipher_request *req, - int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - - if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic_inc(&tfm->base.__crt_alg->akcipher_err_cnt); - } else { - atomic_inc(&tfm->base.__crt_alg->encrypt_cnt); - atomic64_add(req->src_len, &tfm->base.__crt_alg->encrypt_tlen); - } -#endif -} - -static inline void crypto_stat_akcipher_decrypt(struct akcipher_request *req, - int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - - if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic_inc(&tfm->base.__crt_alg->akcipher_err_cnt); - } else { - atomic_inc(&tfm->base.__crt_alg->decrypt_cnt); - atomic64_add(req->src_len, &tfm->base.__crt_alg->decrypt_tlen); - } -#endif -} - -static inline void crypto_stat_akcipher_sign(struct akcipher_request *req, - int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - - if (ret && ret != -EINPROGRESS && ret != -EBUSY) - atomic_inc(&tfm->base.__crt_alg->akcipher_err_cnt); - else - atomic_inc(&tfm->base.__crt_alg->sign_cnt); -#endif -} - -static inline void crypto_stat_akcipher_verify(struct akcipher_request *req, - int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - - if (ret && ret != -EINPROGRESS && ret != -EBUSY) - atomic_inc(&tfm->base.__crt_alg->akcipher_err_cnt); - else - atomic_inc(&tfm->base.__crt_alg->verify_cnt); -#endif -} - /** * crypto_akcipher_encrypt() - Invoke public key encrypt operation * @@ -341,10 +285,13 @@ static inline int crypto_akcipher_encrypt(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct akcipher_alg *alg = crypto_akcipher_alg(tfm); + struct crypto_alg *calg = tfm->base.__crt_alg; + unsigned int src_len = req->src_len; int ret; + crypto_stats_get(calg); ret = alg->encrypt(req); - crypto_stat_akcipher_encrypt(req, ret); + crypto_stats_akcipher_encrypt(src_len, ret, calg); return ret; } @@ -362,10 +309,13 @@ static inline int crypto_akcipher_decrypt(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct akcipher_alg *alg = crypto_akcipher_alg(tfm); + struct crypto_alg *calg = tfm->base.__crt_alg; + unsigned int src_len = req->src_len; int ret; + crypto_stats_get(calg); ret = alg->decrypt(req); - crypto_stat_akcipher_decrypt(req, ret); + crypto_stats_akcipher_decrypt(src_len, ret, calg); return ret; } @@ -383,10 +333,12 @@ static inline int crypto_akcipher_sign(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct akcipher_alg *alg = crypto_akcipher_alg(tfm); + struct crypto_alg *calg = tfm->base.__crt_alg; int ret; + crypto_stats_get(calg); ret = alg->sign(req); - crypto_stat_akcipher_sign(req, ret); + crypto_stats_akcipher_sign(ret, calg); return ret; } @@ -404,10 +356,12 @@ static inline int crypto_akcipher_verify(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct akcipher_alg *alg = crypto_akcipher_alg(tfm); + struct crypto_alg *calg = tfm->base.__crt_alg; int ret; + crypto_stats_get(calg); ret = alg->verify(req); - crypto_stat_akcipher_verify(req, ret); + crypto_stats_akcipher_verify(ret, calg); return ret; } diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h new file mode 100644 index 000000000000..1fc70a69d550 --- /dev/null +++ b/include/crypto/chacha.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common values and helper functions for the ChaCha and XChaCha stream ciphers. + * + * XChaCha extends ChaCha's nonce to 192 bits, while provably retaining ChaCha's + * security. Here they share the same key size, tfm context, and setkey + * function; only their IV size and encrypt/decrypt function differ. + * + * The ChaCha paper specifies 20, 12, and 8-round variants. In general, it is + * recommended to use the 20-round variant ChaCha20. However, the other + * variants can be needed in some performance-sensitive scenarios. The generic + * ChaCha code currently allows only the 20 and 12-round variants. + */ + +#ifndef _CRYPTO_CHACHA_H +#define _CRYPTO_CHACHA_H + +#include +#include +#include + +/* 32-bit stream position, then 96-bit nonce (RFC7539 convention) */ +#define CHACHA_IV_SIZE 16 + +#define CHACHA_KEY_SIZE 32 +#define CHACHA_BLOCK_SIZE 64 +#define CHACHAPOLY_IV_SIZE 12 + +/* 192-bit nonce, then 64-bit stream position */ +#define XCHACHA_IV_SIZE 32 + +struct chacha_ctx { + u32 key[8]; + int nrounds; +}; + +void chacha_block(u32 *state, u8 *stream, int nrounds); +static inline void chacha20_block(u32 *state, u8 *stream) +{ + chacha_block(state, stream, 20); +} +void hchacha_block(const u32 *in, u32 *out, int nrounds); + +void crypto_chacha_init(u32 *state, struct chacha_ctx *ctx, u8 *iv); + +int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keysize); +int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keysize); + +int crypto_chacha_crypt(struct skcipher_request *req); +int crypto_xchacha_crypt(struct skcipher_request *req); + +#endif /* _CRYPTO_CHACHA_H */ diff --git a/include/crypto/chacha20.h b/include/crypto/chacha20.h deleted file mode 100644 index f76302d99e2b..000000000000 --- a/include/crypto/chacha20.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Common values for the ChaCha20 algorithm - */ - -#ifndef _CRYPTO_CHACHA20_H -#define _CRYPTO_CHACHA20_H - -#include -#include -#include - -#define CHACHA20_IV_SIZE 16 -#define CHACHA20_KEY_SIZE 32 -#define CHACHA20_BLOCK_SIZE 64 - -struct chacha20_ctx { - u32 key[8]; -}; - -void chacha20_block(u32 *state, u8 *stream); -void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv); -int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keysize); -int crypto_chacha20_crypt(struct skcipher_request *req); - -#endif diff --git a/include/crypto/hash.h b/include/crypto/hash.h index bc7796600338..3b31c1b349ae 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -412,32 +412,6 @@ static inline void *ahash_request_ctx(struct ahash_request *req) int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key, unsigned int keylen); -static inline void crypto_stat_ahash_update(struct ahash_request *req, int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - - if (ret && ret != -EINPROGRESS && ret != -EBUSY) - atomic_inc(&tfm->base.__crt_alg->hash_err_cnt); - else - atomic64_add(req->nbytes, &tfm->base.__crt_alg->hash_tlen); -#endif -} - -static inline void crypto_stat_ahash_final(struct ahash_request *req, int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - - if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic_inc(&tfm->base.__crt_alg->hash_err_cnt); - } else { - atomic_inc(&tfm->base.__crt_alg->hash_cnt); - atomic64_add(req->nbytes, &tfm->base.__crt_alg->hash_tlen); - } -#endif -} - /** * crypto_ahash_finup() - update and finalize message digest * @req: reference to the ahash_request handle that holds all information @@ -552,10 +526,14 @@ static inline int crypto_ahash_init(struct ahash_request *req) */ static inline int crypto_ahash_update(struct ahash_request *req) { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct crypto_alg *alg = tfm->base.__crt_alg; + unsigned int nbytes = req->nbytes; int ret; + crypto_stats_get(alg); ret = crypto_ahash_reqtfm(req)->update(req); - crypto_stat_ahash_update(req, ret); + crypto_stats_ahash_update(nbytes, ret, alg); return ret; } diff --git a/include/crypto/hash_info.h b/include/crypto/hash_info.h index 56f217d41f12..91786b68dbdb 100644 --- a/include/crypto/hash_info.h +++ b/include/crypto/hash_info.h @@ -15,6 +15,7 @@ #include #include +#include #include diff --git a/include/crypto/internal/cryptouser.h b/include/crypto/internal/cryptouser.h index 8db299c25566..40623f4457df 100644 --- a/include/crypto/internal/cryptouser.h +++ b/include/crypto/internal/cryptouser.h @@ -3,6 +3,11 @@ struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact); -int crypto_dump_reportstat(struct sk_buff *skb, struct netlink_callback *cb); +#ifdef CONFIG_CRYPTO_STATS int crypto_reportstat(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, struct nlattr **attrs); -int crypto_dump_reportstat_done(struct netlink_callback *cb); +#else +static int crypto_reportstat(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, struct nlattr **attrs) +{ + return -ENOTSUPP; +} +#endif diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h index e42f7063f245..453e867b4bd9 100644 --- a/include/crypto/internal/skcipher.h +++ b/include/crypto/internal/skcipher.h @@ -70,8 +70,6 @@ struct skcipher_walk { unsigned int alignmask; }; -extern const struct crypto_type crypto_givcipher_type; - static inline struct crypto_instance *skcipher_crypto_instance( struct skcipher_instance *inst) { diff --git a/include/crypto/kpp.h b/include/crypto/kpp.h index f517ba6d3a27..1a97e1601422 100644 --- a/include/crypto/kpp.h +++ b/include/crypto/kpp.h @@ -268,42 +268,6 @@ struct kpp_secret { unsigned short len; }; -static inline void crypto_stat_kpp_set_secret(struct crypto_kpp *tfm, int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - if (ret) - atomic_inc(&tfm->base.__crt_alg->kpp_err_cnt); - else - atomic_inc(&tfm->base.__crt_alg->setsecret_cnt); -#endif -} - -static inline void crypto_stat_kpp_generate_public_key(struct kpp_request *req, - int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); - - if (ret) - atomic_inc(&tfm->base.__crt_alg->kpp_err_cnt); - else - atomic_inc(&tfm->base.__crt_alg->generate_public_key_cnt); -#endif -} - -static inline void crypto_stat_kpp_compute_shared_secret(struct kpp_request *req, - int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); - - if (ret) - atomic_inc(&tfm->base.__crt_alg->kpp_err_cnt); - else - atomic_inc(&tfm->base.__crt_alg->compute_shared_secret_cnt); -#endif -} - /** * crypto_kpp_set_secret() - Invoke kpp operation * @@ -323,10 +287,12 @@ static inline int crypto_kpp_set_secret(struct crypto_kpp *tfm, const void *buffer, unsigned int len) { struct kpp_alg *alg = crypto_kpp_alg(tfm); + struct crypto_alg *calg = tfm->base.__crt_alg; int ret; + crypto_stats_get(calg); ret = alg->set_secret(tfm, buffer, len); - crypto_stat_kpp_set_secret(tfm, ret); + crypto_stats_kpp_set_secret(calg, ret); return ret; } @@ -347,10 +313,12 @@ static inline int crypto_kpp_generate_public_key(struct kpp_request *req) { struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); struct kpp_alg *alg = crypto_kpp_alg(tfm); + struct crypto_alg *calg = tfm->base.__crt_alg; int ret; + crypto_stats_get(calg); ret = alg->generate_public_key(req); - crypto_stat_kpp_generate_public_key(req, ret); + crypto_stats_kpp_generate_public_key(calg, ret); return ret; } @@ -368,10 +336,12 @@ static inline int crypto_kpp_compute_shared_secret(struct kpp_request *req) { struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); struct kpp_alg *alg = crypto_kpp_alg(tfm); + struct crypto_alg *calg = tfm->base.__crt_alg; int ret; + crypto_stats_get(calg); ret = alg->compute_shared_secret(req); - crypto_stat_kpp_compute_shared_secret(req, ret); + crypto_stats_kpp_compute_shared_secret(calg, ret); return ret; } diff --git a/include/crypto/nhpoly1305.h b/include/crypto/nhpoly1305.h new file mode 100644 index 000000000000..53c04423c582 --- /dev/null +++ b/include/crypto/nhpoly1305.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common values and helper functions for the NHPoly1305 hash function. + */ + +#ifndef _NHPOLY1305_H +#define _NHPOLY1305_H + +#include +#include + +/* NH parameterization: */ + +/* Endianness: little */ +/* Word size: 32 bits (works well on NEON, SSE2, AVX2) */ + +/* Stride: 2 words (optimal on ARM32 NEON; works okay on other CPUs too) */ +#define NH_PAIR_STRIDE 2 +#define NH_MESSAGE_UNIT (NH_PAIR_STRIDE * 2 * sizeof(u32)) + +/* Num passes (Toeplitz iteration count): 4, to give ε = 2^{-128} */ +#define NH_NUM_PASSES 4 +#define NH_HASH_BYTES (NH_NUM_PASSES * sizeof(u64)) + +/* Max message size: 1024 bytes (32x compression factor) */ +#define NH_NUM_STRIDES 64 +#define NH_MESSAGE_WORDS (NH_PAIR_STRIDE * 2 * NH_NUM_STRIDES) +#define NH_MESSAGE_BYTES (NH_MESSAGE_WORDS * sizeof(u32)) +#define NH_KEY_WORDS (NH_MESSAGE_WORDS + \ + NH_PAIR_STRIDE * 2 * (NH_NUM_PASSES - 1)) +#define NH_KEY_BYTES (NH_KEY_WORDS * sizeof(u32)) + +#define NHPOLY1305_KEY_SIZE (POLY1305_BLOCK_SIZE + NH_KEY_BYTES) + +struct nhpoly1305_key { + struct poly1305_key poly_key; + u32 nh_key[NH_KEY_WORDS]; +}; + +struct nhpoly1305_state { + + /* Running total of polynomial evaluation */ + struct poly1305_state poly_state; + + /* Partial block buffer */ + u8 buffer[NH_MESSAGE_UNIT]; + unsigned int buflen; + + /* + * Number of bytes remaining until the current NH message reaches + * NH_MESSAGE_BYTES. When nonzero, 'nh_hash' holds the partial NH hash. + */ + unsigned int nh_remaining; + + __le64 nh_hash[NH_NUM_PASSES]; +}; + +typedef void (*nh_t)(const u32 *key, const u8 *message, size_t message_len, + __le64 hash[NH_NUM_PASSES]); + +int crypto_nhpoly1305_setkey(struct crypto_shash *tfm, + const u8 *key, unsigned int keylen); + +int crypto_nhpoly1305_init(struct shash_desc *desc); +int crypto_nhpoly1305_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen); +int crypto_nhpoly1305_update_helper(struct shash_desc *desc, + const u8 *src, unsigned int srclen, + nh_t nh_fn); +int crypto_nhpoly1305_final(struct shash_desc *desc, u8 *dst); +int crypto_nhpoly1305_final_helper(struct shash_desc *desc, u8 *dst, + nh_t nh_fn); + +#endif /* _NHPOLY1305_H */ diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h index f718a19da82f..34317ed2071e 100644 --- a/include/crypto/poly1305.h +++ b/include/crypto/poly1305.h @@ -13,13 +13,21 @@ #define POLY1305_KEY_SIZE 32 #define POLY1305_DIGEST_SIZE 16 +struct poly1305_key { + u32 r[5]; /* key, base 2^26 */ +}; + +struct poly1305_state { + u32 h[5]; /* accumulator, base 2^26 */ +}; + struct poly1305_desc_ctx { /* key */ - u32 r[5]; + struct poly1305_key r; /* finalize key */ u32 s[4]; /* accumulator */ - u32 h[5]; + struct poly1305_state h; /* partial buffer */ u8 buf[POLY1305_BLOCK_SIZE]; /* bytes used in partial buffer */ @@ -30,6 +38,22 @@ struct poly1305_desc_ctx { bool sset; }; +/* + * Poly1305 core functions. These implement the ε-almost-∆-universal hash + * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce + * ("s key") at the end. They also only support block-aligned inputs. + */ +void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key); +static inline void poly1305_core_init(struct poly1305_state *state) +{ + memset(state->h, 0, sizeof(state->h)); +} +void poly1305_core_blocks(struct poly1305_state *state, + const struct poly1305_key *key, + const void *src, unsigned int nblocks); +void poly1305_core_emit(const struct poly1305_state *state, void *dst); + +/* Crypto API helper functions for the Poly1305 MAC */ int crypto_poly1305_init(struct shash_desc *desc); unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, const u8 *src, unsigned int srclen); diff --git a/include/crypto/rng.h b/include/crypto/rng.h index 6d258f5b68f1..022a1b896b47 100644 --- a/include/crypto/rng.h +++ b/include/crypto/rng.h @@ -122,29 +122,6 @@ static inline void crypto_free_rng(struct crypto_rng *tfm) crypto_destroy_tfm(tfm, crypto_rng_tfm(tfm)); } -static inline void crypto_stat_rng_seed(struct crypto_rng *tfm, int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - if (ret && ret != -EINPROGRESS && ret != -EBUSY) - atomic_inc(&tfm->base.__crt_alg->rng_err_cnt); - else - atomic_inc(&tfm->base.__crt_alg->seed_cnt); -#endif -} - -static inline void crypto_stat_rng_generate(struct crypto_rng *tfm, - unsigned int dlen, int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic_inc(&tfm->base.__crt_alg->rng_err_cnt); - } else { - atomic_inc(&tfm->base.__crt_alg->generate_cnt); - atomic64_add(dlen, &tfm->base.__crt_alg->generate_tlen); - } -#endif -} - /** * crypto_rng_generate() - get random number * @tfm: cipher handle @@ -163,10 +140,12 @@ static inline int crypto_rng_generate(struct crypto_rng *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int dlen) { + struct crypto_alg *alg = tfm->base.__crt_alg; int ret; + crypto_stats_get(alg); ret = crypto_rng_alg(tfm)->generate(tfm, src, slen, dst, dlen); - crypto_stat_rng_generate(tfm, dlen, ret); + crypto_stats_rng_generate(alg, dlen, ret); return ret; } diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h index 925f547cdcfa..e555294ed77f 100644 --- a/include/crypto/skcipher.h +++ b/include/crypto/skcipher.h @@ -39,19 +39,6 @@ struct skcipher_request { void *__ctx[] CRYPTO_MINALIGN_ATTR; }; -/** - * struct skcipher_givcrypt_request - Crypto request with IV generation - * @seq: Sequence number for IV generation - * @giv: Space for generated IV - * @creq: The crypto request itself - */ -struct skcipher_givcrypt_request { - u64 seq; - u8 *giv; - - struct ablkcipher_request creq; -}; - struct crypto_skcipher { int (*setkey)(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen); @@ -486,32 +473,6 @@ static inline struct crypto_sync_skcipher *crypto_sync_skcipher_reqtfm( return container_of(tfm, struct crypto_sync_skcipher, base); } -static inline void crypto_stat_skcipher_encrypt(struct skcipher_request *req, - int ret, struct crypto_alg *alg) -{ -#ifdef CONFIG_CRYPTO_STATS - if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic_inc(&alg->cipher_err_cnt); - } else { - atomic_inc(&alg->encrypt_cnt); - atomic64_add(req->cryptlen, &alg->encrypt_tlen); - } -#endif -} - -static inline void crypto_stat_skcipher_decrypt(struct skcipher_request *req, - int ret, struct crypto_alg *alg) -{ -#ifdef CONFIG_CRYPTO_STATS - if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic_inc(&alg->cipher_err_cnt); - } else { - atomic_inc(&alg->decrypt_cnt); - atomic64_add(req->cryptlen, &alg->decrypt_tlen); - } -#endif -} - /** * crypto_skcipher_encrypt() - encrypt plaintext * @req: reference to the skcipher_request handle that holds all information @@ -526,13 +487,16 @@ static inline void crypto_stat_skcipher_decrypt(struct skcipher_request *req, static inline int crypto_skcipher_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_alg *alg = tfm->base.__crt_alg; + unsigned int cryptlen = req->cryptlen; int ret; + crypto_stats_get(alg); if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) ret = -ENOKEY; else ret = tfm->encrypt(req); - crypto_stat_skcipher_encrypt(req, ret, tfm->base.__crt_alg); + crypto_stats_skcipher_encrypt(cryptlen, ret, alg); return ret; } @@ -550,13 +514,16 @@ static inline int crypto_skcipher_encrypt(struct skcipher_request *req) static inline int crypto_skcipher_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_alg *alg = tfm->base.__crt_alg; + unsigned int cryptlen = req->cryptlen; int ret; + crypto_stats_get(alg); if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) ret = -ENOKEY; else ret = tfm->decrypt(req); - crypto_stat_skcipher_decrypt(req, ret, tfm->base.__crt_alg); + crypto_stats_skcipher_decrypt(cryptlen, ret, alg); return ret; } diff --git a/include/crypto/streebog.h b/include/crypto/streebog.h new file mode 100644 index 000000000000..4af119f7e07b --- /dev/null +++ b/include/crypto/streebog.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0+ OR BSD-2-Clause */ +/* + * Copyright (c) 2013 Alexey Degtyarev + * Copyright (c) 2018 Vitaly Chikunov + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#ifndef _CRYPTO_STREEBOG_H_ +#define _CRYPTO_STREEBOG_H_ + +#include + +#define STREEBOG256_DIGEST_SIZE 32 +#define STREEBOG512_DIGEST_SIZE 64 +#define STREEBOG_BLOCK_SIZE 64 + +struct streebog_uint512 { + u64 qword[8]; +}; + +struct streebog_state { + u8 buffer[STREEBOG_BLOCK_SIZE]; + struct streebog_uint512 hash; + struct streebog_uint512 h; + struct streebog_uint512 N; + struct streebog_uint512 Sigma; + size_t fillsize; +}; + +#endif /* !_CRYPTO_STREEBOG_H_ */ diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 3634ad6fe202..902ec171fc6d 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -49,7 +49,6 @@ #define CRYPTO_ALG_TYPE_BLKCIPHER 0x00000004 #define CRYPTO_ALG_TYPE_ABLKCIPHER 0x00000005 #define CRYPTO_ALG_TYPE_SKCIPHER 0x00000005 -#define CRYPTO_ALG_TYPE_GIVCIPHER 0x00000006 #define CRYPTO_ALG_TYPE_KPP 0x00000008 #define CRYPTO_ALG_TYPE_ACOMPRESS 0x0000000a #define CRYPTO_ALG_TYPE_SCOMPRESS 0x0000000b @@ -76,12 +75,6 @@ */ #define CRYPTO_ALG_NEED_FALLBACK 0x00000100 -/* - * This bit is set for symmetric key ciphers that have already been wrapped - * with a generic IV generator to prevent them from being wrapped again. - */ -#define CRYPTO_ALG_GENIV 0x00000200 - /* * Set if the algorithm has passed automated run-time testing. Note that * if there is no run-time testing for a given algorithm it is considered @@ -157,7 +150,6 @@ struct crypto_async_request; struct crypto_blkcipher; struct crypto_tfm; struct crypto_type; -struct skcipher_givcrypt_request; typedef void (*crypto_completion_t)(struct crypto_async_request *req, int err); @@ -246,31 +238,16 @@ struct cipher_desc { * be called in parallel with the same transformation object. * @decrypt: Decrypt a single block. This is a reverse counterpart to @encrypt * and the conditions are exactly the same. - * @givencrypt: Update the IV for encryption. With this function, a cipher - * implementation may provide the function on how to update the IV - * for encryption. - * @givdecrypt: Update the IV for decryption. This is the reverse of - * @givencrypt . - * @geniv: The transformation implementation may use an "IV generator" provided - * by the kernel crypto API. Several use cases have a predefined - * approach how IVs are to be updated. For such use cases, the kernel - * crypto API provides ready-to-use implementations that can be - * referenced with this variable. * @ivsize: IV size applicable for transformation. The consumer must provide an * IV of exactly that size to perform the encrypt or decrypt operation. * - * All fields except @givencrypt , @givdecrypt , @geniv and @ivsize are - * mandatory and must be filled. + * All fields except @ivsize are mandatory and must be filled. */ struct ablkcipher_alg { int (*setkey)(struct crypto_ablkcipher *tfm, const u8 *key, unsigned int keylen); int (*encrypt)(struct ablkcipher_request *req); int (*decrypt)(struct ablkcipher_request *req); - int (*givencrypt)(struct skcipher_givcrypt_request *req); - int (*givdecrypt)(struct skcipher_givcrypt_request *req); - - const char *geniv; unsigned int min_keysize; unsigned int max_keysize; @@ -284,10 +261,9 @@ struct ablkcipher_alg { * @setkey: see struct ablkcipher_alg * @encrypt: see struct ablkcipher_alg * @decrypt: see struct ablkcipher_alg - * @geniv: see struct ablkcipher_alg * @ivsize: see struct ablkcipher_alg * - * All fields except @geniv and @ivsize are mandatory and must be filled. + * All fields except @ivsize are mandatory and must be filled. */ struct blkcipher_alg { int (*setkey)(struct crypto_tfm *tfm, const u8 *key, @@ -299,8 +275,6 @@ struct blkcipher_alg { struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes); - const char *geniv; - unsigned int min_keysize; unsigned int max_keysize; unsigned int ivsize; @@ -369,6 +343,115 @@ struct compress_alg { unsigned int slen, u8 *dst, unsigned int *dlen); }; +#ifdef CONFIG_CRYPTO_STATS +/* + * struct crypto_istat_aead - statistics for AEAD algorithm + * @encrypt_cnt: number of encrypt requests + * @encrypt_tlen: total data size handled by encrypt requests + * @decrypt_cnt: number of decrypt requests + * @decrypt_tlen: total data size handled by decrypt requests + * @err_cnt: number of error for AEAD requests + */ +struct crypto_istat_aead { + atomic64_t encrypt_cnt; + atomic64_t encrypt_tlen; + atomic64_t decrypt_cnt; + atomic64_t decrypt_tlen; + atomic64_t err_cnt; +}; + +/* + * struct crypto_istat_akcipher - statistics for akcipher algorithm + * @encrypt_cnt: number of encrypt requests + * @encrypt_tlen: total data size handled by encrypt requests + * @decrypt_cnt: number of decrypt requests + * @decrypt_tlen: total data size handled by decrypt requests + * @verify_cnt: number of verify operation + * @sign_cnt: number of sign requests + * @err_cnt: number of error for akcipher requests + */ +struct crypto_istat_akcipher { + atomic64_t encrypt_cnt; + atomic64_t encrypt_tlen; + atomic64_t decrypt_cnt; + atomic64_t decrypt_tlen; + atomic64_t verify_cnt; + atomic64_t sign_cnt; + atomic64_t err_cnt; +}; + +/* + * struct crypto_istat_cipher - statistics for cipher algorithm + * @encrypt_cnt: number of encrypt requests + * @encrypt_tlen: total data size handled by encrypt requests + * @decrypt_cnt: number of decrypt requests + * @decrypt_tlen: total data size handled by decrypt requests + * @err_cnt: number of error for cipher requests + */ +struct crypto_istat_cipher { + atomic64_t encrypt_cnt; + atomic64_t encrypt_tlen; + atomic64_t decrypt_cnt; + atomic64_t decrypt_tlen; + atomic64_t err_cnt; +}; + +/* + * struct crypto_istat_compress - statistics for compress algorithm + * @compress_cnt: number of compress requests + * @compress_tlen: total data size handled by compress requests + * @decompress_cnt: number of decompress requests + * @decompress_tlen: total data size handled by decompress requests + * @err_cnt: number of error for compress requests + */ +struct crypto_istat_compress { + atomic64_t compress_cnt; + atomic64_t compress_tlen; + atomic64_t decompress_cnt; + atomic64_t decompress_tlen; + atomic64_t err_cnt; +}; + +/* + * struct crypto_istat_hash - statistics for has algorithm + * @hash_cnt: number of hash requests + * @hash_tlen: total data size hashed + * @err_cnt: number of error for hash requests + */ +struct crypto_istat_hash { + atomic64_t hash_cnt; + atomic64_t hash_tlen; + atomic64_t err_cnt; +}; + +/* + * struct crypto_istat_kpp - statistics for KPP algorithm + * @setsecret_cnt: number of setsecrey operation + * @generate_public_key_cnt: number of generate_public_key operation + * @compute_shared_secret_cnt: number of compute_shared_secret operation + * @err_cnt: number of error for KPP requests + */ +struct crypto_istat_kpp { + atomic64_t setsecret_cnt; + atomic64_t generate_public_key_cnt; + atomic64_t compute_shared_secret_cnt; + atomic64_t err_cnt; +}; + +/* + * struct crypto_istat_rng: statistics for RNG algorithm + * @generate_cnt: number of RNG generate requests + * @generate_tlen: total data size of generated data by the RNG + * @seed_cnt: number of times the RNG was seeded + * @err_cnt: number of error for RNG requests + */ +struct crypto_istat_rng { + atomic64_t generate_cnt; + atomic64_t generate_tlen; + atomic64_t seed_cnt; + atomic64_t err_cnt; +}; +#endif /* CONFIG_CRYPTO_STATS */ #define cra_ablkcipher cra_u.ablkcipher #define cra_blkcipher cra_u.blkcipher @@ -454,32 +537,14 @@ struct compress_alg { * @cra_refcnt: internally used * @cra_destroy: internally used * - * All following statistics are for this crypto_alg - * @encrypt_cnt: number of encrypt requests - * @decrypt_cnt: number of decrypt requests - * @compress_cnt: number of compress requests - * @decompress_cnt: number of decompress requests - * @generate_cnt: number of RNG generate requests - * @seed_cnt: number of times the rng was seeded - * @hash_cnt: number of hash requests - * @sign_cnt: number of sign requests - * @setsecret_cnt: number of setsecrey operation - * @generate_public_key_cnt: number of generate_public_key operation - * @verify_cnt: number of verify operation - * @compute_shared_secret_cnt: number of compute_shared_secret operation - * @encrypt_tlen: total data size handled by encrypt requests - * @decrypt_tlen: total data size handled by decrypt requests - * @compress_tlen: total data size handled by compress requests - * @decompress_tlen: total data size handled by decompress requests - * @generate_tlen: total data size of generated data by the RNG - * @hash_tlen: total data size hashed - * @akcipher_err_cnt: number of error for akcipher requests - * @cipher_err_cnt: number of error for akcipher requests - * @compress_err_cnt: number of error for akcipher requests - * @aead_err_cnt: number of error for akcipher requests - * @hash_err_cnt: number of error for akcipher requests - * @rng_err_cnt: number of error for akcipher requests - * @kpp_err_cnt: number of error for akcipher requests + * @stats: union of all possible crypto_istat_xxx structures + * @stats.aead: statistics for AEAD algorithm + * @stats.akcipher: statistics for akcipher algorithm + * @stats.cipher: statistics for cipher algorithm + * @stats.compress: statistics for compress algorithm + * @stats.hash: statistics for hash algorithm + * @stats.rng: statistics for rng algorithm + * @stats.kpp: statistics for KPP algorithm * * The struct crypto_alg describes a generic Crypto API algorithm and is common * for all of the transformations. Any variable not documented here shall not @@ -515,46 +580,86 @@ struct crypto_alg { struct module *cra_module; +#ifdef CONFIG_CRYPTO_STATS union { - atomic_t encrypt_cnt; - atomic_t compress_cnt; - atomic_t generate_cnt; - atomic_t hash_cnt; - atomic_t setsecret_cnt; - }; - union { - atomic64_t encrypt_tlen; - atomic64_t compress_tlen; - atomic64_t generate_tlen; - atomic64_t hash_tlen; - }; - union { - atomic_t akcipher_err_cnt; - atomic_t cipher_err_cnt; - atomic_t compress_err_cnt; - atomic_t aead_err_cnt; - atomic_t hash_err_cnt; - atomic_t rng_err_cnt; - atomic_t kpp_err_cnt; - }; - union { - atomic_t decrypt_cnt; - atomic_t decompress_cnt; - atomic_t seed_cnt; - atomic_t generate_public_key_cnt; - }; - union { - atomic64_t decrypt_tlen; - atomic64_t decompress_tlen; - }; - union { - atomic_t verify_cnt; - atomic_t compute_shared_secret_cnt; - }; - atomic_t sign_cnt; + struct crypto_istat_aead aead; + struct crypto_istat_akcipher akcipher; + struct crypto_istat_cipher cipher; + struct crypto_istat_compress compress; + struct crypto_istat_hash hash; + struct crypto_istat_rng rng; + struct crypto_istat_kpp kpp; + } stats; +#endif /* CONFIG_CRYPTO_STATS */ } CRYPTO_MINALIGN_ATTR; +#ifdef CONFIG_CRYPTO_STATS +void crypto_stats_init(struct crypto_alg *alg); +void crypto_stats_get(struct crypto_alg *alg); +void crypto_stats_ablkcipher_encrypt(unsigned int nbytes, int ret, struct crypto_alg *alg); +void crypto_stats_ablkcipher_decrypt(unsigned int nbytes, int ret, struct crypto_alg *alg); +void crypto_stats_aead_encrypt(unsigned int cryptlen, struct crypto_alg *alg, int ret); +void crypto_stats_aead_decrypt(unsigned int cryptlen, struct crypto_alg *alg, int ret); +void crypto_stats_ahash_update(unsigned int nbytes, int ret, struct crypto_alg *alg); +void crypto_stats_ahash_final(unsigned int nbytes, int ret, struct crypto_alg *alg); +void crypto_stats_akcipher_encrypt(unsigned int src_len, int ret, struct crypto_alg *alg); +void crypto_stats_akcipher_decrypt(unsigned int src_len, int ret, struct crypto_alg *alg); +void crypto_stats_akcipher_sign(int ret, struct crypto_alg *alg); +void crypto_stats_akcipher_verify(int ret, struct crypto_alg *alg); +void crypto_stats_compress(unsigned int slen, int ret, struct crypto_alg *alg); +void crypto_stats_decompress(unsigned int slen, int ret, struct crypto_alg *alg); +void crypto_stats_kpp_set_secret(struct crypto_alg *alg, int ret); +void crypto_stats_kpp_generate_public_key(struct crypto_alg *alg, int ret); +void crypto_stats_kpp_compute_shared_secret(struct crypto_alg *alg, int ret); +void crypto_stats_rng_seed(struct crypto_alg *alg, int ret); +void crypto_stats_rng_generate(struct crypto_alg *alg, unsigned int dlen, int ret); +void crypto_stats_skcipher_encrypt(unsigned int cryptlen, int ret, struct crypto_alg *alg); +void crypto_stats_skcipher_decrypt(unsigned int cryptlen, int ret, struct crypto_alg *alg); +#else +static inline void crypto_stats_init(struct crypto_alg *alg) +{} +static inline void crypto_stats_get(struct crypto_alg *alg) +{} +static inline void crypto_stats_ablkcipher_encrypt(unsigned int nbytes, int ret, struct crypto_alg *alg) +{} +static inline void crypto_stats_ablkcipher_decrypt(unsigned int nbytes, int ret, struct crypto_alg *alg) +{} +static inline void crypto_stats_aead_encrypt(unsigned int cryptlen, struct crypto_alg *alg, int ret) +{} +static inline void crypto_stats_aead_decrypt(unsigned int cryptlen, struct crypto_alg *alg, int ret) +{} +static inline void crypto_stats_ahash_update(unsigned int nbytes, int ret, struct crypto_alg *alg) +{} +static inline void crypto_stats_ahash_final(unsigned int nbytes, int ret, struct crypto_alg *alg) +{} +static inline void crypto_stats_akcipher_encrypt(unsigned int src_len, int ret, struct crypto_alg *alg) +{} +static inline void crypto_stats_akcipher_decrypt(unsigned int src_len, int ret, struct crypto_alg *alg) +{} +static inline void crypto_stats_akcipher_sign(int ret, struct crypto_alg *alg) +{} +static inline void crypto_stats_akcipher_verify(int ret, struct crypto_alg *alg) +{} +static inline void crypto_stats_compress(unsigned int slen, int ret, struct crypto_alg *alg) +{} +static inline void crypto_stats_decompress(unsigned int slen, int ret, struct crypto_alg *alg) +{} +static inline void crypto_stats_kpp_set_secret(struct crypto_alg *alg, int ret) +{} +static inline void crypto_stats_kpp_generate_public_key(struct crypto_alg *alg, int ret) +{} +static inline void crypto_stats_kpp_compute_shared_secret(struct crypto_alg *alg, int ret) +{} +static inline void crypto_stats_rng_seed(struct crypto_alg *alg, int ret) +{} +static inline void crypto_stats_rng_generate(struct crypto_alg *alg, unsigned int dlen, int ret) +{} +static inline void crypto_stats_skcipher_encrypt(unsigned int cryptlen, int ret, struct crypto_alg *alg) +{} +static inline void crypto_stats_skcipher_decrypt(unsigned int cryptlen, int ret, struct crypto_alg *alg) +{} +#endif /* * A helper struct for waiting for completion of async crypto ops */ @@ -800,14 +905,14 @@ static inline struct crypto_ablkcipher *__crypto_ablkcipher_cast( static inline u32 crypto_skcipher_type(u32 type) { - type &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV); + type &= ~CRYPTO_ALG_TYPE_MASK; type |= CRYPTO_ALG_TYPE_BLKCIPHER; return type; } static inline u32 crypto_skcipher_mask(u32 mask) { - mask &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV); + mask &= ~CRYPTO_ALG_TYPE_MASK; mask |= CRYPTO_ALG_TYPE_BLKCIPHER_MASK; return mask; } @@ -973,38 +1078,6 @@ static inline struct crypto_ablkcipher *crypto_ablkcipher_reqtfm( return __crypto_ablkcipher_cast(req->base.tfm); } -static inline void crypto_stat_ablkcipher_encrypt(struct ablkcipher_request *req, - int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - struct ablkcipher_tfm *crt = - crypto_ablkcipher_crt(crypto_ablkcipher_reqtfm(req)); - - if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic_inc(&crt->base->base.__crt_alg->cipher_err_cnt); - } else { - atomic_inc(&crt->base->base.__crt_alg->encrypt_cnt); - atomic64_add(req->nbytes, &crt->base->base.__crt_alg->encrypt_tlen); - } -#endif -} - -static inline void crypto_stat_ablkcipher_decrypt(struct ablkcipher_request *req, - int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - struct ablkcipher_tfm *crt = - crypto_ablkcipher_crt(crypto_ablkcipher_reqtfm(req)); - - if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic_inc(&crt->base->base.__crt_alg->cipher_err_cnt); - } else { - atomic_inc(&crt->base->base.__crt_alg->decrypt_cnt); - atomic64_add(req->nbytes, &crt->base->base.__crt_alg->decrypt_tlen); - } -#endif -} - /** * crypto_ablkcipher_encrypt() - encrypt plaintext * @req: reference to the ablkcipher_request handle that holds all information @@ -1020,10 +1093,13 @@ static inline int crypto_ablkcipher_encrypt(struct ablkcipher_request *req) { struct ablkcipher_tfm *crt = crypto_ablkcipher_crt(crypto_ablkcipher_reqtfm(req)); + struct crypto_alg *alg = crt->base->base.__crt_alg; + unsigned int nbytes = req->nbytes; int ret; + crypto_stats_get(alg); ret = crt->encrypt(req); - crypto_stat_ablkcipher_encrypt(req, ret); + crypto_stats_ablkcipher_encrypt(nbytes, ret, alg); return ret; } @@ -1042,10 +1118,13 @@ static inline int crypto_ablkcipher_decrypt(struct ablkcipher_request *req) { struct ablkcipher_tfm *crt = crypto_ablkcipher_crt(crypto_ablkcipher_reqtfm(req)); + struct crypto_alg *alg = crt->base->base.__crt_alg; + unsigned int nbytes = req->nbytes; int ret; + crypto_stats_get(alg); ret = crt->decrypt(req); - crypto_stat_ablkcipher_decrypt(req, ret); + crypto_stats_ablkcipher_decrypt(nbytes, ret, alg); return ret; } diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h index 6dafbc3e4414..4dc1603919ce 100644 --- a/include/uapi/linux/cryptouser.h +++ b/include/uapi/linux/cryptouser.h @@ -76,45 +76,69 @@ struct crypto_user_alg { __u32 cru_flags; }; -struct crypto_stat { +struct crypto_stat_aead { + char type[CRYPTO_MAX_NAME]; + __u64 stat_encrypt_cnt; + __u64 stat_encrypt_tlen; + __u64 stat_decrypt_cnt; + __u64 stat_decrypt_tlen; + __u64 stat_err_cnt; +}; + +struct crypto_stat_akcipher { + char type[CRYPTO_MAX_NAME]; + __u64 stat_encrypt_cnt; + __u64 stat_encrypt_tlen; + __u64 stat_decrypt_cnt; + __u64 stat_decrypt_tlen; + __u64 stat_verify_cnt; + __u64 stat_sign_cnt; + __u64 stat_err_cnt; +}; + +struct crypto_stat_cipher { + char type[CRYPTO_MAX_NAME]; + __u64 stat_encrypt_cnt; + __u64 stat_encrypt_tlen; + __u64 stat_decrypt_cnt; + __u64 stat_decrypt_tlen; + __u64 stat_err_cnt; +}; + +struct crypto_stat_compress { + char type[CRYPTO_MAX_NAME]; + __u64 stat_compress_cnt; + __u64 stat_compress_tlen; + __u64 stat_decompress_cnt; + __u64 stat_decompress_tlen; + __u64 stat_err_cnt; +}; + +struct crypto_stat_hash { + char type[CRYPTO_MAX_NAME]; + __u64 stat_hash_cnt; + __u64 stat_hash_tlen; + __u64 stat_err_cnt; +}; + +struct crypto_stat_kpp { + char type[CRYPTO_MAX_NAME]; + __u64 stat_setsecret_cnt; + __u64 stat_generate_public_key_cnt; + __u64 stat_compute_shared_secret_cnt; + __u64 stat_err_cnt; +}; + +struct crypto_stat_rng { + char type[CRYPTO_MAX_NAME]; + __u64 stat_generate_cnt; + __u64 stat_generate_tlen; + __u64 stat_seed_cnt; + __u64 stat_err_cnt; +}; + +struct crypto_stat_larval { char type[CRYPTO_MAX_NAME]; - union { - __u32 stat_encrypt_cnt; - __u32 stat_compress_cnt; - __u32 stat_generate_cnt; - __u32 stat_hash_cnt; - __u32 stat_setsecret_cnt; - }; - union { - __u64 stat_encrypt_tlen; - __u64 stat_compress_tlen; - __u64 stat_generate_tlen; - __u64 stat_hash_tlen; - }; - union { - __u32 stat_akcipher_err_cnt; - __u32 stat_cipher_err_cnt; - __u32 stat_compress_err_cnt; - __u32 stat_aead_err_cnt; - __u32 stat_hash_err_cnt; - __u32 stat_rng_err_cnt; - __u32 stat_kpp_err_cnt; - }; - union { - __u32 stat_decrypt_cnt; - __u32 stat_decompress_cnt; - __u32 stat_seed_cnt; - __u32 stat_generate_public_key_cnt; - }; - union { - __u64 stat_decrypt_tlen; - __u64 stat_decompress_tlen; - }; - union { - __u32 stat_verify_cnt; - __u32 stat_compute_shared_secret_cnt; - }; - __u32 stat_sign_cnt; }; struct crypto_report_larval { diff --git a/include/uapi/linux/hash_info.h b/include/uapi/linux/hash_info.h index eea5d02c58de..74a8609fcb4d 100644 --- a/include/uapi/linux/hash_info.h +++ b/include/uapi/linux/hash_info.h @@ -33,6 +33,8 @@ enum hash_algo { HASH_ALGO_TGR_160, HASH_ALGO_TGR_192, HASH_ALGO_SM3_256, + HASH_ALGO_STREEBOG_256, + HASH_ALGO_STREEBOG_512, HASH_ALGO__LAST }; diff --git a/kernel/padata.c b/kernel/padata.c index d568cc56405f..3e2633ae3bca 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -720,7 +720,7 @@ int padata_start(struct padata_instance *pinst) if (pinst->flags & PADATA_INVALID) err = -EINVAL; - __padata_start(pinst); + __padata_start(pinst); mutex_unlock(&pinst->lock); diff --git a/lib/Makefile b/lib/Makefile index f5262d30bfe6..e1b59da71418 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -20,7 +20,7 @@ KCOV_INSTRUMENT_dynamic_debug.o := n lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o timerqueue.o xarray.o \ idr.o int_sqrt.o extable.o \ - sha1.o chacha20.o irq_regs.o argv_split.o \ + sha1.o chacha.o irq_regs.o argv_split.o \ flex_proportions.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ earlycpio.o seq_buf.o siphash.o dec_and_lock.o \ diff --git a/lib/chacha20.c b/lib/chacha.c similarity index 58% rename from lib/chacha20.c rename to lib/chacha.c index d907fec6a9ed..a46d2832dbab 100644 --- a/lib/chacha20.c +++ b/lib/chacha.c @@ -1,5 +1,5 @@ /* - * ChaCha20 256-bit cipher algorithm, RFC7539 + * The "hash function" used as the core of the ChaCha stream cipher (RFC7539) * * Copyright (C) 2015 Martin Willi * @@ -14,17 +14,16 @@ #include #include #include -#include +#include -void chacha20_block(u32 *state, u8 *stream) +static void chacha_permute(u32 *x, int nrounds) { - u32 x[16]; int i; - for (i = 0; i < ARRAY_SIZE(x); i++) - x[i] = state[i]; + /* whitelist the allowed round counts */ + WARN_ON_ONCE(nrounds != 20 && nrounds != 12); - for (i = 0; i < 20; i += 2) { + for (i = 0; i < nrounds; i += 2) { x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 16); x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 16); x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 16); @@ -65,10 +64,54 @@ void chacha20_block(u32 *state, u8 *stream) x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 7); x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 7); } +} + +/** + * chacha_block - generate one keystream block and increment block counter + * @state: input state matrix (16 32-bit words) + * @stream: output keystream block (64 bytes) + * @nrounds: number of rounds (20 or 12; 20 is recommended) + * + * This is the ChaCha core, a function from 64-byte strings to 64-byte strings. + * The caller has already converted the endianness of the input. This function + * also handles incrementing the block counter in the input matrix. + */ +void chacha_block(u32 *state, u8 *stream, int nrounds) +{ + u32 x[16]; + int i; + + memcpy(x, state, 64); + + chacha_permute(x, nrounds); for (i = 0; i < ARRAY_SIZE(x); i++) put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]); state[12]++; } -EXPORT_SYMBOL(chacha20_block); +EXPORT_SYMBOL(chacha_block); + +/** + * hchacha_block - abbreviated ChaCha core, for XChaCha + * @in: input state matrix (16 32-bit words) + * @out: output (8 32-bit words) + * @nrounds: number of rounds (20 or 12; 20 is recommended) + * + * HChaCha is the ChaCha equivalent of HSalsa and is an intermediate step + * towards XChaCha (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf). HChaCha + * skips the final addition of the initial state, and outputs only certain words + * of the state. It should not be used for streaming directly. + */ +void hchacha_block(const u32 *in, u32 *out, int nrounds) +{ + u32 x[16]; + + memcpy(x, in, 64); + + chacha_permute(x, nrounds); + + memcpy(&out[0], &x[0], 16); + memcpy(&out[4], &x[12], 16); +} +EXPORT_SYMBOL(hchacha_block); diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index c822e626761b..621146d04c03 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -1390,7 +1390,7 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn) if (!smp) return NULL; - smp->tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); + smp->tfm_aes = crypto_alloc_cipher("aes", 0, 0); if (IS_ERR(smp->tfm_aes)) { BT_ERR("Unable to create AES crypto context"); goto zfree_smp; @@ -3233,7 +3233,7 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid) if (!smp) return ERR_PTR(-ENOMEM); - tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); + tfm_aes = crypto_alloc_cipher("aes", 0, 0); if (IS_ERR(tfm_aes)) { BT_ERR("Unable to create AES crypto context"); kzfree(smp); @@ -3906,13 +3906,13 @@ int __init bt_selftest_smp(void) struct crypto_kpp *tfm_ecdh; int err; - tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); + tfm_aes = crypto_alloc_cipher("aes", 0, 0); if (IS_ERR(tfm_aes)) { BT_ERR("Unable to create AES crypto context"); return PTR_ERR(tfm_aes); } - tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, CRYPTO_ALG_ASYNC); + tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0); if (IS_ERR(tfm_cmac)) { BT_ERR("Unable to create CMAC crypto context"); crypto_free_cipher(tfm_aes); diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index 73e8f347802e..bfe9ed9f4c48 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -30,13 +30,13 @@ int ieee80211_wep_init(struct ieee80211_local *local) /* start WEP IV from a random value */ get_random_bytes(&local->wep_iv, IEEE80211_WEP_IV_LEN); - local->wep_tx_tfm = crypto_alloc_cipher("arc4", 0, CRYPTO_ALG_ASYNC); + local->wep_tx_tfm = crypto_alloc_cipher("arc4", 0, 0); if (IS_ERR(local->wep_tx_tfm)) { local->wep_rx_tfm = ERR_PTR(-EINVAL); return PTR_ERR(local->wep_tx_tfm); } - local->wep_rx_tfm = crypto_alloc_cipher("arc4", 0, CRYPTO_ALG_ASYNC); + local->wep_rx_tfm = crypto_alloc_cipher("arc4", 0, 0); if (IS_ERR(local->wep_rx_tfm)) { crypto_free_cipher(local->wep_tx_tfm); local->wep_tx_tfm = ERR_PTR(-EINVAL); diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c index 6beab0cfcb99..55214fe925b2 100644 --- a/net/wireless/lib80211_crypt_ccmp.c +++ b/net/wireless/lib80211_crypt_ccmp.c @@ -75,7 +75,7 @@ static void *lib80211_ccmp_init(int key_idx) goto fail; priv->key_idx = key_idx; - priv->tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); + priv->tfm = crypto_alloc_cipher("aes", 0, 0); if (IS_ERR(priv->tfm)) { priv->tfm = NULL; goto fail; diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c index b5e235573c8a..35f06563207d 100644 --- a/net/wireless/lib80211_crypt_tkip.c +++ b/net/wireless/lib80211_crypt_tkip.c @@ -99,7 +99,7 @@ static void *lib80211_tkip_init(int key_idx) priv->key_idx = key_idx; - priv->tx_tfm_arc4 = crypto_alloc_cipher("arc4", 0, CRYPTO_ALG_ASYNC); + priv->tx_tfm_arc4 = crypto_alloc_cipher("arc4", 0, 0); if (IS_ERR(priv->tx_tfm_arc4)) { priv->tx_tfm_arc4 = NULL; goto fail; @@ -111,7 +111,7 @@ static void *lib80211_tkip_init(int key_idx) goto fail; } - priv->rx_tfm_arc4 = crypto_alloc_cipher("arc4", 0, CRYPTO_ALG_ASYNC); + priv->rx_tfm_arc4 = crypto_alloc_cipher("arc4", 0, 0); if (IS_ERR(priv->rx_tfm_arc4)) { priv->rx_tfm_arc4 = NULL; goto fail; diff --git a/net/wireless/lib80211_crypt_wep.c b/net/wireless/lib80211_crypt_wep.c index 6015f6b542a6..20c1ad63ad44 100644 --- a/net/wireless/lib80211_crypt_wep.c +++ b/net/wireless/lib80211_crypt_wep.c @@ -48,13 +48,13 @@ static void *lib80211_wep_init(int keyidx) goto fail; priv->key_idx = keyidx; - priv->tx_tfm = crypto_alloc_cipher("arc4", 0, CRYPTO_ALG_ASYNC); + priv->tx_tfm = crypto_alloc_cipher("arc4", 0, 0); if (IS_ERR(priv->tx_tfm)) { priv->tx_tfm = NULL; goto fail; } - priv->rx_tfm = crypto_alloc_cipher("arc4", 0, CRYPTO_ALG_ASYNC); + priv->rx_tfm = crypto_alloc_cipher("arc4", 0, 0); if (IS_ERR(priv->rx_tfm)) { priv->rx_tfm = NULL; goto fail; diff --git a/security/apparmor/crypto.c b/security/apparmor/crypto.c index 136f2a047836..af03d98c7552 100644 --- a/security/apparmor/crypto.c +++ b/security/apparmor/crypto.c @@ -112,7 +112,7 @@ static int __init init_profile_hash(void) if (!apparmor_initialized) return 0; - tfm = crypto_alloc_shash("sha1", 0, CRYPTO_ALG_ASYNC); + tfm = crypto_alloc_shash("sha1", 0, 0); if (IS_ERR(tfm)) { int error = PTR_ERR(tfm); AA_ERROR("failed to setup profile sha1 hashing: %d\n", error); diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c index 77ef210a8a6b..43e2dc3a60d0 100644 --- a/security/integrity/evm/evm_crypto.c +++ b/security/integrity/evm/evm_crypto.c @@ -97,8 +97,7 @@ static struct shash_desc *init_desc(char type, uint8_t hash_algo) mutex_lock(&mutex); if (*tfm) goto out; - *tfm = crypto_alloc_shash(algo, 0, - CRYPTO_ALG_ASYNC | CRYPTO_NOLOAD); + *tfm = crypto_alloc_shash(algo, 0, CRYPTO_NOLOAD); if (IS_ERR(*tfm)) { rc = PTR_ERR(*tfm); pr_err("Can not allocate %s (reason: %ld)\n", algo, rc); diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index d92cbf9687c3..a3891ae9fa0f 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -342,7 +342,7 @@ static int calc_hmac(u8 *digest, const u8 *key, unsigned int keylen, struct crypto_shash *tfm; int err; - tfm = crypto_alloc_shash(hmac_alg, 0, CRYPTO_ALG_ASYNC); + tfm = crypto_alloc_shash(hmac_alg, 0, 0); if (IS_ERR(tfm)) { pr_err("encrypted_key: can't alloc %s transform: %ld\n", hmac_alg, PTR_ERR(tfm)); @@ -984,7 +984,7 @@ static int __init init_encrypted(void) { int ret; - hash_tfm = crypto_alloc_shash(hash_alg, 0, CRYPTO_ALG_ASYNC); + hash_tfm = crypto_alloc_shash(hash_alg, 0, 0); if (IS_ERR(hash_tfm)) { pr_err("encrypted_key: can't allocate %s transform: %ld\n", hash_alg, PTR_ERR(hash_tfm)); diff --git a/security/keys/trusted.c b/security/keys/trusted.c index 697bfc6c8192..4d98f4f87236 100644 --- a/security/keys/trusted.c +++ b/security/keys/trusted.c @@ -1199,14 +1199,14 @@ static int __init trusted_shash_alloc(void) { int ret; - hmacalg = crypto_alloc_shash(hmac_alg, 0, CRYPTO_ALG_ASYNC); + hmacalg = crypto_alloc_shash(hmac_alg, 0, 0); if (IS_ERR(hmacalg)) { pr_info("trusted_key: could not allocate crypto %s\n", hmac_alg); return PTR_ERR(hmacalg); } - hashalg = crypto_alloc_shash(hash_alg, 0, CRYPTO_ALG_ASYNC); + hashalg = crypto_alloc_shash(hash_alg, 0, 0); if (IS_ERR(hashalg)) { pr_info("trusted_key: could not allocate crypto %s\n", hash_alg); diff --git a/tools/crypto/getstat.c b/tools/crypto/getstat.c index 24115173a483..9e8ff76420fa 100644 --- a/tools/crypto/getstat.c +++ b/tools/crypto/getstat.c @@ -152,86 +152,86 @@ static int get_stat(const char *drivername) if (tb[CRYPTOCFGA_STAT_HASH]) { struct rtattr *rta = tb[CRYPTOCFGA_STAT_HASH]; - struct crypto_stat *rhash = - (struct crypto_stat *)RTA_DATA(rta); - printf("%s\tHash\n\tHash: %u bytes: %llu\n\tErrors: %u\n", + struct crypto_stat_hash *rhash = + (struct crypto_stat_hash *)RTA_DATA(rta); + printf("%s\tHash\n\tHash: %llu bytes: %llu\n\tErrors: %llu\n", drivername, rhash->stat_hash_cnt, rhash->stat_hash_tlen, - rhash->stat_hash_err_cnt); + rhash->stat_err_cnt); } else if (tb[CRYPTOCFGA_STAT_COMPRESS]) { struct rtattr *rta = tb[CRYPTOCFGA_STAT_COMPRESS]; - struct crypto_stat *rblk = - (struct crypto_stat *)RTA_DATA(rta); - printf("%s\tCompress\n\tCompress: %u bytes: %llu\n\tDecompress: %u bytes: %llu\n\tErrors: %u\n", + struct crypto_stat_compress *rblk = + (struct crypto_stat_compress *)RTA_DATA(rta); + printf("%s\tCompress\n\tCompress: %llu bytes: %llu\n\tDecompress: %llu bytes: %llu\n\tErrors: %llu\n", drivername, rblk->stat_compress_cnt, rblk->stat_compress_tlen, rblk->stat_decompress_cnt, rblk->stat_decompress_tlen, - rblk->stat_compress_err_cnt); + rblk->stat_err_cnt); } else if (tb[CRYPTOCFGA_STAT_ACOMP]) { struct rtattr *rta = tb[CRYPTOCFGA_STAT_ACOMP]; - struct crypto_stat *rcomp = - (struct crypto_stat *)RTA_DATA(rta); - printf("%s\tACompress\n\tCompress: %u bytes: %llu\n\tDecompress: %u bytes: %llu\n\tErrors: %u\n", + struct crypto_stat_compress *rcomp = + (struct crypto_stat_compress *)RTA_DATA(rta); + printf("%s\tACompress\n\tCompress: %llu bytes: %llu\n\tDecompress: %llu bytes: %llu\n\tErrors: %llu\n", drivername, rcomp->stat_compress_cnt, rcomp->stat_compress_tlen, rcomp->stat_decompress_cnt, rcomp->stat_decompress_tlen, - rcomp->stat_compress_err_cnt); + rcomp->stat_err_cnt); } else if (tb[CRYPTOCFGA_STAT_AEAD]) { struct rtattr *rta = tb[CRYPTOCFGA_STAT_AEAD]; - struct crypto_stat *raead = - (struct crypto_stat *)RTA_DATA(rta); - printf("%s\tAEAD\n\tEncrypt: %u bytes: %llu\n\tDecrypt: %u bytes: %llu\n\tErrors: %u\n", + struct crypto_stat_aead *raead = + (struct crypto_stat_aead *)RTA_DATA(rta); + printf("%s\tAEAD\n\tEncrypt: %llu bytes: %llu\n\tDecrypt: %llu bytes: %llu\n\tErrors: %llu\n", drivername, raead->stat_encrypt_cnt, raead->stat_encrypt_tlen, raead->stat_decrypt_cnt, raead->stat_decrypt_tlen, - raead->stat_aead_err_cnt); + raead->stat_err_cnt); } else if (tb[CRYPTOCFGA_STAT_BLKCIPHER]) { struct rtattr *rta = tb[CRYPTOCFGA_STAT_BLKCIPHER]; - struct crypto_stat *rblk = - (struct crypto_stat *)RTA_DATA(rta); - printf("%s\tCipher\n\tEncrypt: %u bytes: %llu\n\tDecrypt: %u bytes: %llu\n\tErrors: %u\n", + struct crypto_stat_cipher *rblk = + (struct crypto_stat_cipher *)RTA_DATA(rta); + printf("%s\tCipher\n\tEncrypt: %llu bytes: %llu\n\tDecrypt: %llu bytes: %llu\n\tErrors: %llu\n", drivername, rblk->stat_encrypt_cnt, rblk->stat_encrypt_tlen, rblk->stat_decrypt_cnt, rblk->stat_decrypt_tlen, - rblk->stat_cipher_err_cnt); + rblk->stat_err_cnt); } else if (tb[CRYPTOCFGA_STAT_AKCIPHER]) { struct rtattr *rta = tb[CRYPTOCFGA_STAT_AKCIPHER]; - struct crypto_stat *rblk = - (struct crypto_stat *)RTA_DATA(rta); - printf("%s\tAkcipher\n\tEncrypt: %u bytes: %llu\n\tDecrypt: %u bytes: %llu\n\tSign: %u\n\tVerify: %u\n\tErrors: %u\n", + struct crypto_stat_akcipher *rblk = + (struct crypto_stat_akcipher *)RTA_DATA(rta); + printf("%s\tAkcipher\n\tEncrypt: %llu bytes: %llu\n\tDecrypt: %llu bytes: %llu\n\tSign: %llu\n\tVerify: %llu\n\tErrors: %llu\n", drivername, rblk->stat_encrypt_cnt, rblk->stat_encrypt_tlen, rblk->stat_decrypt_cnt, rblk->stat_decrypt_tlen, rblk->stat_sign_cnt, rblk->stat_verify_cnt, - rblk->stat_akcipher_err_cnt); + rblk->stat_err_cnt); } else if (tb[CRYPTOCFGA_STAT_CIPHER]) { struct rtattr *rta = tb[CRYPTOCFGA_STAT_CIPHER]; - struct crypto_stat *rblk = - (struct crypto_stat *)RTA_DATA(rta); - printf("%s\tcipher\n\tEncrypt: %u bytes: %llu\n\tDecrypt: %u bytes: %llu\n\tErrors: %u\n", + struct crypto_stat_cipher *rblk = + (struct crypto_stat_cipher *)RTA_DATA(rta); + printf("%s\tcipher\n\tEncrypt: %llu bytes: %llu\n\tDecrypt: %llu bytes: %llu\n\tErrors: %llu\n", drivername, rblk->stat_encrypt_cnt, rblk->stat_encrypt_tlen, rblk->stat_decrypt_cnt, rblk->stat_decrypt_tlen, - rblk->stat_cipher_err_cnt); + rblk->stat_err_cnt); } else if (tb[CRYPTOCFGA_STAT_RNG]) { struct rtattr *rta = tb[CRYPTOCFGA_STAT_RNG]; - struct crypto_stat *rrng = - (struct crypto_stat *)RTA_DATA(rta); - printf("%s\tRNG\n\tSeed: %u\n\tGenerate: %u bytes: %llu\n\tErrors: %u\n", + struct crypto_stat_rng *rrng = + (struct crypto_stat_rng *)RTA_DATA(rta); + printf("%s\tRNG\n\tSeed: %llu\n\tGenerate: %llu bytes: %llu\n\tErrors: %llu\n", drivername, rrng->stat_seed_cnt, rrng->stat_generate_cnt, rrng->stat_generate_tlen, - rrng->stat_rng_err_cnt); + rrng->stat_err_cnt); } else if (tb[CRYPTOCFGA_STAT_KPP]) { struct rtattr *rta = tb[CRYPTOCFGA_STAT_KPP]; - struct crypto_stat *rkpp = - (struct crypto_stat *)RTA_DATA(rta); - printf("%s\tKPP\n\tSetsecret: %u\n\tGenerate public key: %u\n\tCompute_shared_secret: %u\n\tErrors: %u\n", + struct crypto_stat_kpp *rkpp = + (struct crypto_stat_kpp *)RTA_DATA(rta); + printf("%s\tKPP\n\tSetsecret: %llu\n\tGenerate public key: %llu\n\tCompute_shared_secret: %llu\n\tErrors: %llu\n", drivername, rkpp->stat_setsecret_cnt, rkpp->stat_generate_public_key_cnt, rkpp->stat_compute_shared_secret_cnt, - rkpp->stat_kpp_err_cnt); + rkpp->stat_err_cnt); } else { fprintf(stderr, "%s is of an unknown algorithm\n", drivername); }